]> jfr.im git - yt-dlp.git/blame - yt_dlp/cookies.py
[cleanup] Misc
[yt-dlp.git] / yt_dlp / cookies.py
CommitLineData
14f25df2 1import base64
f59f5ef8 2import contextlib
54007a45 3import http.cookiejar
982ee69a
MB
4import json
5import os
9bd13fe5 6import re
982ee69a 7import shutil
982ee69a
MB
8import struct
9import subprocess
10import sys
11import tempfile
2e4585da 12import time
982ee69a 13from datetime import datetime, timedelta, timezone
f59f5ef8 14from enum import Enum, auto
982ee69a
MB
15from hashlib import pbkdf2_hmac
16
1d3586d0 17from .aes import (
18 aes_cbc_decrypt_bytes,
19 aes_gcm_decrypt_and_verify_bytes,
20 unpad_pkcs7,
21)
9b8ee23b 22from .dependencies import (
23 _SECRETSTORAGE_UNAVAILABLE_REASON,
24 secretstorage,
25 sqlite3,
26)
97ec5bc5 27from .minicurses import MultilinePrinter, QuietMultilinePrinter
d2c8aadf 28from .utils import (
29 Popen,
30 YoutubeDLCookieJar,
31 error_to_str,
32 expand_path,
33 try_call,
34)
982ee69a 35
982ee69a
MB
36CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
37SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
38
39
40class YDLLogger:
41 def __init__(self, ydl=None):
42 self._ydl = ydl
43
44 def debug(self, message):
45 if self._ydl:
46 self._ydl.write_debug(message)
47
48 def info(self, message):
49 if self._ydl:
50 self._ydl.to_screen(f'[Cookies] {message}')
51
52 def warning(self, message, only_once=False):
53 if self._ydl:
54 self._ydl.report_warning(message, only_once)
55
56 def error(self, message):
57 if self._ydl:
58 self._ydl.report_error(message)
59
2e4585da 60 class ProgressBar(MultilinePrinter):
61 _DELAY, _timer = 0.1, 0
62
63 def print(self, message):
64 if time.time() - self._timer > self._DELAY:
65 self.print_at_line(f'[Cookies] {message}', 0)
66 self._timer = time.time()
67
97ec5bc5 68 def progress_bar(self):
69 """Return a context manager with a print method. (Optional)"""
70 # Do not print to files/pipes, loggers, or when --no-progress is used
71 if not self._ydl or self._ydl.params.get('noprogress') or self._ydl.params.get('logger'):
72 return
591bb9d3 73 file = self._ydl._out_files.error
97ec5bc5 74 try:
75 if not file.isatty():
76 return
77 except BaseException:
78 return
2e4585da 79 return self.ProgressBar(file, preserve_output=False)
97ec5bc5 80
81
82def _create_progress_bar(logger):
83 if hasattr(logger, 'progress_bar'):
84 printer = logger.progress_bar()
85 if printer:
86 return printer
87 printer = QuietMultilinePrinter()
88 printer.print = lambda _: None
89 return printer
90
982ee69a
MB
91
92def load_cookies(cookie_file, browser_specification, ydl):
93 cookie_jars = []
94 if browser_specification is not None:
9bd13fe5 95 browser_name, profile, keyring, container = _parse_browser_specification(*browser_specification)
96 cookie_jars.append(
97 extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring, container=container))
982ee69a
MB
98
99 if cookie_file is not None:
d76fa1f3 100 is_filename = YoutubeDLCookieJar.is_path(cookie_file)
101 if is_filename:
102 cookie_file = expand_path(cookie_file)
103
982ee69a 104 jar = YoutubeDLCookieJar(cookie_file)
d76fa1f3 105 if not is_filename or os.access(cookie_file, os.R_OK):
982ee69a
MB
106 jar.load(ignore_discard=True, ignore_expires=True)
107 cookie_jars.append(jar)
108
109 return _merge_cookie_jars(cookie_jars)
110
111
9bd13fe5 112def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None, container=None):
982ee69a 113 if browser_name == 'firefox':
9bd13fe5 114 return _extract_firefox_cookies(profile, container, logger)
982ee69a
MB
115 elif browser_name == 'safari':
116 return _extract_safari_cookies(profile, logger)
117 elif browser_name in CHROMIUM_BASED_BROWSERS:
f59f5ef8 118 return _extract_chrome_cookies(browser_name, profile, keyring, logger)
982ee69a 119 else:
86e5f3ed 120 raise ValueError(f'unknown browser: {browser_name}')
982ee69a
MB
121
122
9bd13fe5 123def _extract_firefox_cookies(profile, container, logger):
982ee69a 124 logger.info('Extracting cookies from firefox')
9b8ee23b 125 if not sqlite3:
767b02a9
MB
126 logger.warning('Cannot extract cookies from firefox without sqlite3 support. '
127 'Please use a python interpreter compiled with sqlite3 support')
128 return YoutubeDLCookieJar()
982ee69a
MB
129
130 if profile is None:
131 search_root = _firefox_browser_dir()
132 elif _is_path(profile):
133 search_root = profile
134 else:
135 search_root = os.path.join(_firefox_browser_dir(), profile)
136
825d3ce3 137 cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite', logger)
138 if cookie_database_path is None:
139 raise FileNotFoundError(f'could not find firefox cookies database in {search_root}')
140 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
141
9bd13fe5 142 container_id = None
825d3ce3 143 if container not in (None, 'none'):
144 containers_path = os.path.join(os.path.dirname(cookie_database_path), 'containers.json')
9bd13fe5 145 if not os.path.isfile(containers_path) or not os.access(containers_path, os.R_OK):
146 raise FileNotFoundError(f'could not read containers.json in {search_root}')
d2c8aadf 147 with open(containers_path) as containers:
9bd13fe5 148 identities = json.load(containers).get('identities', [])
149 container_id = next((context.get('userContextId') for context in identities if container in (
150 context.get('name'),
151 try_call(lambda: re.fullmatch(r'userContext([^\.]+)\.label', context['l10nID']).group())
152 )), None)
153 if not isinstance(container_id, int):
154 raise ValueError(f'could not find firefox container "{container}" in containers.json')
155
0930b11f 156 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
982ee69a
MB
157 cursor = None
158 try:
159 cursor = _open_database_copy(cookie_database_path, tmpdir)
9bd13fe5 160 if isinstance(container_id, int):
9bd13fe5 161 logger.debug(
162 f'Only loading cookies from firefox container "{container}", ID {container_id}')
9bd13fe5 163 cursor.execute(
825d3ce3 164 'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE originAttributes LIKE ? OR originAttributes LIKE ?',
165 (f'%userContextId={container_id}', f'%userContextId={container_id}&%'))
166 elif container == 'none':
167 logger.debug('Only loading cookies not belonging to any container')
168 cursor.execute(
169 'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE NOT INSTR(originAttributes,"userContextId=")')
170 else:
9bd13fe5 171 cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
982ee69a 172 jar = YoutubeDLCookieJar()
97ec5bc5 173 with _create_progress_bar(logger) as progress_bar:
174 table = cursor.fetchall()
175 total_cookie_count = len(table)
176 for i, (host, name, value, path, expiry, is_secure) in enumerate(table):
177 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
ac668111 178 cookie = http.cookiejar.Cookie(
97ec5bc5 179 version=0, name=name, value=value, port=None, port_specified=False,
180 domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'),
181 path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False,
182 comment=None, comment_url=None, rest={})
183 jar.set_cookie(cookie)
86e5f3ed 184 logger.info(f'Extracted {len(jar)} cookies from firefox')
982ee69a
MB
185 return jar
186 finally:
187 if cursor is not None:
188 cursor.connection.close()
189
190
191def _firefox_browser_dir():
dec30912 192 if sys.platform in ('cygwin', 'win32'):
19a03940 193 return os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles')
982ee69a
MB
194 elif sys.platform == 'darwin':
195 return os.path.expanduser('~/Library/Application Support/Firefox')
dec30912 196 return os.path.expanduser('~/.mozilla/firefox')
982ee69a
MB
197
198
199def _get_chromium_based_browser_settings(browser_name):
200 # https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md
dec30912 201 if sys.platform in ('cygwin', 'win32'):
982ee69a
MB
202 appdata_local = os.path.expandvars('%LOCALAPPDATA%')
203 appdata_roaming = os.path.expandvars('%APPDATA%')
204 browser_dir = {
19a03940 205 'brave': os.path.join(appdata_local, R'BraveSoftware\Brave-Browser\User Data'),
206 'chrome': os.path.join(appdata_local, R'Google\Chrome\User Data'),
207 'chromium': os.path.join(appdata_local, R'Chromium\User Data'),
208 'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
209 'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
210 'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
982ee69a
MB
211 }[browser_name]
212
213 elif sys.platform == 'darwin':
214 appdata = os.path.expanduser('~/Library/Application Support')
215 browser_dir = {
216 'brave': os.path.join(appdata, 'BraveSoftware/Brave-Browser'),
217 'chrome': os.path.join(appdata, 'Google/Chrome'),
218 'chromium': os.path.join(appdata, 'Chromium'),
219 'edge': os.path.join(appdata, 'Microsoft Edge'),
220 'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
221 'vivaldi': os.path.join(appdata, 'Vivaldi'),
222 }[browser_name]
223
224 else:
dec30912
CMT
225 config = _config_home()
226 browser_dir = {
227 'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'),
228 'chrome': os.path.join(config, 'google-chrome'),
229 'chromium': os.path.join(config, 'chromium'),
230 'edge': os.path.join(config, 'microsoft-edge'),
231 'opera': os.path.join(config, 'opera'),
232 'vivaldi': os.path.join(config, 'vivaldi'),
233 }[browser_name]
982ee69a
MB
234
235 # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
236 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
237 keyring_name = {
238 'brave': 'Brave',
239 'chrome': 'Chrome',
240 'chromium': 'Chromium',
29b208f6 241 'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
982ee69a
MB
242 'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
243 'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
244 }[browser_name]
245
246 browsers_without_profiles = {'opera'}
247
248 return {
249 'browser_dir': browser_dir,
250 'keyring_name': keyring_name,
251 'supports_profiles': browser_name not in browsers_without_profiles
252 }
253
254
f59f5ef8 255def _extract_chrome_cookies(browser_name, profile, keyring, logger):
86e5f3ed 256 logger.info(f'Extracting cookies from {browser_name}')
767b02a9 257
9b8ee23b 258 if not sqlite3:
19a03940 259 logger.warning(f'Cannot extract cookies from {browser_name} without sqlite3 support. '
260 'Please use a python interpreter compiled with sqlite3 support')
767b02a9
MB
261 return YoutubeDLCookieJar()
262
982ee69a
MB
263 config = _get_chromium_based_browser_settings(browser_name)
264
265 if profile is None:
266 search_root = config['browser_dir']
267 elif _is_path(profile):
268 search_root = profile
269 config['browser_dir'] = os.path.dirname(profile) if config['supports_profiles'] else profile
270 else:
271 if config['supports_profiles']:
272 search_root = os.path.join(config['browser_dir'], profile)
273 else:
86e5f3ed 274 logger.error(f'{browser_name} does not support profiles')
982ee69a
MB
275 search_root = config['browser_dir']
276
97ec5bc5 277 cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies', logger)
982ee69a 278 if cookie_database_path is None:
86e5f3ed 279 raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"')
280 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
982ee69a 281
f59f5ef8 282 decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring)
982ee69a 283
0930b11f 284 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
982ee69a
MB
285 cursor = None
286 try:
287 cursor = _open_database_copy(cookie_database_path, tmpdir)
288 cursor.connection.text_factory = bytes
289 column_names = _get_column_names(cursor, 'cookies')
290 secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
19a03940 291 cursor.execute(f'SELECT host_key, name, value, encrypted_value, path, expires_utc, {secure_column} FROM cookies')
982ee69a
MB
292 jar = YoutubeDLCookieJar()
293 failed_cookies = 0
f59f5ef8 294 unencrypted_cookies = 0
97ec5bc5 295 with _create_progress_bar(logger) as progress_bar:
296 table = cursor.fetchall()
297 total_cookie_count = len(table)
298 for i, line in enumerate(table):
299 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
300 is_encrypted, cookie = _process_chrome_cookie(decryptor, *line)
301 if not cookie:
982ee69a
MB
302 failed_cookies += 1
303 continue
97ec5bc5 304 elif not is_encrypted:
305 unencrypted_cookies += 1
306 jar.set_cookie(cookie)
982ee69a 307 if failed_cookies > 0:
86e5f3ed 308 failed_message = f' ({failed_cookies} could not be decrypted)'
982ee69a
MB
309 else:
310 failed_message = ''
86e5f3ed 311 logger.info(f'Extracted {len(jar)} cookies from {browser_name}{failed_message}')
24146491 312 counts = decryptor._cookie_counts.copy()
f59f5ef8 313 counts['unencrypted'] = unencrypted_cookies
86e5f3ed 314 logger.debug(f'cookie version breakdown: {counts}')
982ee69a
MB
315 return jar
316 finally:
317 if cursor is not None:
318 cursor.connection.close()
319
320
97ec5bc5 321def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, path, expires_utc, is_secure):
0f06bcd7 322 host_key = host_key.decode()
323 name = name.decode()
324 value = value.decode()
325 path = path.decode()
97ec5bc5 326 is_encrypted = not value and encrypted_value
327
328 if is_encrypted:
329 value = decryptor.decrypt(encrypted_value)
330 if value is None:
331 return is_encrypted, None
332
ac668111 333 return is_encrypted, http.cookiejar.Cookie(
97ec5bc5 334 version=0, name=name, value=value, port=None, port_specified=False,
335 domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
336 path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False,
337 comment=None, comment_url=None, rest={})
338
339
982ee69a
MB
340class ChromeCookieDecryptor:
341 """
342 Overview:
343
344 Linux:
345 - cookies are either v10 or v11
346 - v10: AES-CBC encrypted with a fixed key
347 - v11: AES-CBC encrypted with an OS protected key (keyring)
348 - v11 keys can be stored in various places depending on the activate desktop environment [2]
349
350 Mac:
351 - cookies are either v10 or not v10
352 - v10: AES-CBC encrypted with an OS protected key (keyring) and more key derivation iterations than linux
353 - not v10: 'old data' stored as plaintext
354
355 Windows:
356 - cookies are either v10 or not v10
357 - v10: AES-GCM encrypted with a key which is encrypted with DPAPI
358 - not v10: encrypted with DPAPI
359
360 Sources:
361 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/
362 - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_linux.cc
363 - KeyStorageLinux::CreateService
364 """
365
0fa7d2c8 366 _cookie_counts = {}
982ee69a 367
0fa7d2c8 368 def decrypt(self, encrypted_value):
19a03940 369 raise NotImplementedError('Must be implemented by sub classes')
f59f5ef8 370
982ee69a 371
f59f5ef8 372def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None):
56ba69e4 373 if sys.platform == 'darwin':
982ee69a 374 return MacChromeCookieDecryptor(browser_keyring_name, logger)
56ba69e4 375 elif sys.platform in ('win32', 'cygwin'):
982ee69a 376 return WindowsChromeCookieDecryptor(browser_root, logger)
56ba69e4 377 return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring)
982ee69a
MB
378
379
380class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
f59f5ef8 381 def __init__(self, browser_keyring_name, logger, *, keyring=None):
982ee69a
MB
382 self._logger = logger
383 self._v10_key = self.derive_key(b'peanuts')
f59f5ef8
MB
384 password = _get_linux_keyring_password(browser_keyring_name, keyring, logger)
385 self._v11_key = None if password is None else self.derive_key(password)
386 self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
982ee69a
MB
387
388 @staticmethod
389 def derive_key(password):
390 # values from
391 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_linux.cc
392 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16)
393
394 def decrypt(self, encrypted_value):
395 version = encrypted_value[:3]
396 ciphertext = encrypted_value[3:]
397
398 if version == b'v10':
f59f5ef8 399 self._cookie_counts['v10'] += 1
982ee69a
MB
400 return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
401
402 elif version == b'v11':
f59f5ef8 403 self._cookie_counts['v11'] += 1
982ee69a 404 if self._v11_key is None:
f59f5ef8 405 self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
982ee69a
MB
406 return None
407 return _decrypt_aes_cbc(ciphertext, self._v11_key, self._logger)
408
409 else:
f59f5ef8 410 self._cookie_counts['other'] += 1
982ee69a
MB
411 return None
412
413
414class MacChromeCookieDecryptor(ChromeCookieDecryptor):
415 def __init__(self, browser_keyring_name, logger):
416 self._logger = logger
f440b14f 417 password = _get_mac_keyring_password(browser_keyring_name, logger)
982ee69a 418 self._v10_key = None if password is None else self.derive_key(password)
f59f5ef8 419 self._cookie_counts = {'v10': 0, 'other': 0}
982ee69a
MB
420
421 @staticmethod
422 def derive_key(password):
423 # values from
424 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
425 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16)
426
427 def decrypt(self, encrypted_value):
428 version = encrypted_value[:3]
429 ciphertext = encrypted_value[3:]
430
431 if version == b'v10':
f59f5ef8 432 self._cookie_counts['v10'] += 1
982ee69a
MB
433 if self._v10_key is None:
434 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
435 return None
436
437 return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
438
439 else:
f59f5ef8 440 self._cookie_counts['other'] += 1
982ee69a
MB
441 # other prefixes are considered 'old data' which were stored as plaintext
442 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
443 return encrypted_value
444
445
446class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
447 def __init__(self, browser_root, logger):
448 self._logger = logger
449 self._v10_key = _get_windows_v10_key(browser_root, logger)
f59f5ef8
MB
450 self._cookie_counts = {'v10': 0, 'other': 0}
451
982ee69a
MB
452 def decrypt(self, encrypted_value):
453 version = encrypted_value[:3]
454 ciphertext = encrypted_value[3:]
455
456 if version == b'v10':
f59f5ef8 457 self._cookie_counts['v10'] += 1
982ee69a
MB
458 if self._v10_key is None:
459 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
460 return None
982ee69a
MB
461
462 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
463 # kNonceLength
464 nonce_length = 96 // 8
465 # boringssl
466 # EVP_AEAD_AES_GCM_TAG_LEN
467 authentication_tag_length = 16
468
469 raw_ciphertext = ciphertext
470 nonce = raw_ciphertext[:nonce_length]
471 ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length]
472 authentication_tag = raw_ciphertext[-authentication_tag_length:]
473
474 return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger)
475
476 else:
f59f5ef8 477 self._cookie_counts['other'] += 1
982ee69a
MB
478 # any other prefix means the data is DPAPI encrypted
479 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
0f06bcd7 480 return _decrypt_windows_dpapi(encrypted_value, self._logger).decode()
982ee69a
MB
481
482
483def _extract_safari_cookies(profile, logger):
484 if profile is not None:
485 logger.error('safari does not support profiles')
486 if sys.platform != 'darwin':
86e5f3ed 487 raise ValueError(f'unsupported platform: {sys.platform}')
982ee69a
MB
488
489 cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')
490
491 if not os.path.isfile(cookies_path):
1f7db853
MP
492 logger.debug('Trying secondary cookie location')
493 cookies_path = os.path.expanduser('~/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies')
494 if not os.path.isfile(cookies_path):
495 raise FileNotFoundError('could not find safari cookies database')
982ee69a
MB
496
497 with open(cookies_path, 'rb') as f:
498 cookies_data = f.read()
499
500 jar = parse_safari_cookies(cookies_data, logger=logger)
86e5f3ed 501 logger.info(f'Extracted {len(jar)} cookies from safari')
982ee69a
MB
502 return jar
503
504
505class ParserError(Exception):
506 pass
507
508
509class DataParser:
510 def __init__(self, data, logger):
511 self._data = data
512 self.cursor = 0
513 self._logger = logger
514
515 def read_bytes(self, num_bytes):
516 if num_bytes < 0:
86e5f3ed 517 raise ParserError(f'invalid read of {num_bytes} bytes')
982ee69a
MB
518 end = self.cursor + num_bytes
519 if end > len(self._data):
520 raise ParserError('reached end of input')
521 data = self._data[self.cursor:end]
522 self.cursor = end
523 return data
524
525 def expect_bytes(self, expected_value, message):
526 value = self.read_bytes(len(expected_value))
527 if value != expected_value:
86e5f3ed 528 raise ParserError(f'unexpected value: {value} != {expected_value} ({message})')
982ee69a
MB
529
530 def read_uint(self, big_endian=False):
531 data_format = '>I' if big_endian else '<I'
532 return struct.unpack(data_format, self.read_bytes(4))[0]
533
534 def read_double(self, big_endian=False):
535 data_format = '>d' if big_endian else '<d'
536 return struct.unpack(data_format, self.read_bytes(8))[0]
537
538 def read_cstring(self):
539 buffer = []
540 while True:
541 c = self.read_bytes(1)
542 if c == b'\x00':
0f06bcd7 543 return b''.join(buffer).decode()
982ee69a
MB
544 else:
545 buffer.append(c)
546
547 def skip(self, num_bytes, description='unknown'):
548 if num_bytes > 0:
19a03940 549 self._logger.debug(f'skipping {num_bytes} bytes ({description}): {self.read_bytes(num_bytes)!r}')
982ee69a 550 elif num_bytes < 0:
86e5f3ed 551 raise ParserError(f'invalid skip of {num_bytes} bytes')
982ee69a
MB
552
553 def skip_to(self, offset, description='unknown'):
554 self.skip(offset - self.cursor, description)
555
556 def skip_to_end(self, description='unknown'):
557 self.skip_to(len(self._data), description)
558
559
560def _mac_absolute_time_to_posix(timestamp):
561 return int((datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc) + timedelta(seconds=timestamp)).timestamp())
562
563
564def _parse_safari_cookies_header(data, logger):
565 p = DataParser(data, logger)
566 p.expect_bytes(b'cook', 'database signature')
567 number_of_pages = p.read_uint(big_endian=True)
568 page_sizes = [p.read_uint(big_endian=True) for _ in range(number_of_pages)]
569 return page_sizes, p.cursor
570
571
572def _parse_safari_cookies_page(data, jar, logger):
573 p = DataParser(data, logger)
574 p.expect_bytes(b'\x00\x00\x01\x00', 'page signature')
575 number_of_cookies = p.read_uint()
576 record_offsets = [p.read_uint() for _ in range(number_of_cookies)]
577 if number_of_cookies == 0:
86e5f3ed 578 logger.debug(f'a cookies page of size {len(data)} has no cookies')
982ee69a
MB
579 return
580
581 p.skip_to(record_offsets[0], 'unknown page header field')
582
97ec5bc5 583 with _create_progress_bar(logger) as progress_bar:
584 for i, record_offset in enumerate(record_offsets):
585 progress_bar.print(f'Loading cookie {i: 6d}/{number_of_cookies: 6d}')
586 p.skip_to(record_offset, 'space between records')
587 record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger)
588 p.read_bytes(record_length)
982ee69a
MB
589 p.skip_to_end('space in between pages')
590
591
592def _parse_safari_cookies_record(data, jar, logger):
593 p = DataParser(data, logger)
594 record_size = p.read_uint()
595 p.skip(4, 'unknown record field 1')
596 flags = p.read_uint()
597 is_secure = bool(flags & 0x0001)
598 p.skip(4, 'unknown record field 2')
599 domain_offset = p.read_uint()
600 name_offset = p.read_uint()
601 path_offset = p.read_uint()
602 value_offset = p.read_uint()
603 p.skip(8, 'unknown record field 3')
604 expiration_date = _mac_absolute_time_to_posix(p.read_double())
605 _creation_date = _mac_absolute_time_to_posix(p.read_double()) # noqa: F841
606
607 try:
608 p.skip_to(domain_offset)
609 domain = p.read_cstring()
610
611 p.skip_to(name_offset)
612 name = p.read_cstring()
613
614 p.skip_to(path_offset)
615 path = p.read_cstring()
616
617 p.skip_to(value_offset)
618 value = p.read_cstring()
619 except UnicodeDecodeError:
f440b14f 620 logger.warning('failed to parse Safari cookie because UTF-8 decoding failed', only_once=True)
982ee69a
MB
621 return record_size
622
623 p.skip_to(record_size, 'space at the end of the record')
624
ac668111 625 cookie = http.cookiejar.Cookie(
982ee69a
MB
626 version=0, name=name, value=value, port=None, port_specified=False,
627 domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'),
628 path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False,
629 comment=None, comment_url=None, rest={})
630 jar.set_cookie(cookie)
631 return record_size
632
633
634def parse_safari_cookies(data, jar=None, logger=YDLLogger()):
635 """
636 References:
637 - https://github.com/libyal/dtformats/blob/main/documentation/Safari%20Cookies.asciidoc
638 - this data appears to be out of date but the important parts of the database structure is the same
639 - there are a few bytes here and there which are skipped during parsing
640 """
641 if jar is None:
642 jar = YoutubeDLCookieJar()
643 page_sizes, body_start = _parse_safari_cookies_header(data, logger)
644 p = DataParser(data[body_start:], logger)
645 for page_size in page_sizes:
646 _parse_safari_cookies_page(p.read_bytes(page_size), jar, logger)
647 p.skip_to_end('footer')
648 return jar
649
650
f59f5ef8
MB
651class _LinuxDesktopEnvironment(Enum):
652 """
653 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.h
654 DesktopEnvironment
655 """
656 OTHER = auto()
657 CINNAMON = auto()
658 GNOME = auto()
659 KDE = auto()
660 PANTHEON = auto()
661 UNITY = auto()
662 XFCE = auto()
982ee69a
MB
663
664
f59f5ef8
MB
665class _LinuxKeyring(Enum):
666 """
667 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.h
668 SelectedLinuxBackend
669 """
670 KWALLET = auto()
671 GNOMEKEYRING = auto()
672 BASICTEXT = auto()
673
674
675SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys()
676
677
678def _get_linux_desktop_environment(env):
679 """
680 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc
681 GetDesktopEnvironment
682 """
683 xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
684 desktop_session = env.get('DESKTOP_SESSION', None)
685 if xdg_current_desktop is not None:
686 xdg_current_desktop = xdg_current_desktop.split(':')[0].strip()
687
688 if xdg_current_desktop == 'Unity':
689 if desktop_session is not None and 'gnome-fallback' in desktop_session:
690 return _LinuxDesktopEnvironment.GNOME
691 else:
692 return _LinuxDesktopEnvironment.UNITY
693 elif xdg_current_desktop == 'GNOME':
694 return _LinuxDesktopEnvironment.GNOME
695 elif xdg_current_desktop == 'X-Cinnamon':
696 return _LinuxDesktopEnvironment.CINNAMON
697 elif xdg_current_desktop == 'KDE':
698 return _LinuxDesktopEnvironment.KDE
699 elif xdg_current_desktop == 'Pantheon':
700 return _LinuxDesktopEnvironment.PANTHEON
701 elif xdg_current_desktop == 'XFCE':
702 return _LinuxDesktopEnvironment.XFCE
703 elif desktop_session is not None:
704 if desktop_session in ('mate', 'gnome'):
705 return _LinuxDesktopEnvironment.GNOME
706 elif 'kde' in desktop_session:
707 return _LinuxDesktopEnvironment.KDE
708 elif 'xfce' in desktop_session:
709 return _LinuxDesktopEnvironment.XFCE
710 else:
711 if 'GNOME_DESKTOP_SESSION_ID' in env:
712 return _LinuxDesktopEnvironment.GNOME
713 elif 'KDE_FULL_SESSION' in env:
714 return _LinuxDesktopEnvironment.KDE
fa8fd951 715 return _LinuxDesktopEnvironment.OTHER
f59f5ef8
MB
716
717
718def _choose_linux_keyring(logger):
719 """
720 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.cc
721 SelectBackend
722 """
723 desktop_environment = _get_linux_desktop_environment(os.environ)
86e5f3ed 724 logger.debug(f'detected desktop environment: {desktop_environment.name}')
f59f5ef8
MB
725 if desktop_environment == _LinuxDesktopEnvironment.KDE:
726 linux_keyring = _LinuxKeyring.KWALLET
727 elif desktop_environment == _LinuxDesktopEnvironment.OTHER:
728 linux_keyring = _LinuxKeyring.BASICTEXT
982ee69a 729 else:
f59f5ef8
MB
730 linux_keyring = _LinuxKeyring.GNOMEKEYRING
731 return linux_keyring
732
733
734def _get_kwallet_network_wallet(logger):
735 """ The name of the wallet used to store network passwords.
736
737 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/kwallet_dbus.cc
738 KWalletDBus::NetworkWallet
739 which does a dbus call to the following function:
740 https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html
741 Wallet::NetworkWallet
742 """
743 default_wallet = 'kdewallet'
744 try:
f0c9fb96 745 stdout, _, returncode = Popen.run([
f59f5ef8
MB
746 'dbus-send', '--session', '--print-reply=literal',
747 '--dest=org.kde.kwalletd5',
748 '/modules/kwalletd5',
749 'org.kde.KWallet.networkWallet'
f0c9fb96 750 ], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
f59f5ef8 751
f0c9fb96 752 if returncode:
f59f5ef8
MB
753 logger.warning('failed to read NetworkWallet')
754 return default_wallet
755 else:
f0c9fb96 756 logger.debug(f'NetworkWallet = "{stdout.strip()}"')
757 return stdout.strip()
a44ca5a4 758 except Exception as e:
86e5f3ed 759 logger.warning(f'exception while obtaining NetworkWallet: {e}')
f59f5ef8
MB
760 return default_wallet
761
762
763def _get_kwallet_password(browser_keyring_name, logger):
764 logger.debug('using kwallet-query to obtain password from kwallet')
765
766 if shutil.which('kwallet-query') is None:
767 logger.error('kwallet-query command not found. KWallet and kwallet-query '
768 'must be installed to read from KWallet. kwallet-query should be'
769 'included in the kwallet package for your distribution')
770 return b''
771
772 network_wallet = _get_kwallet_network_wallet(logger)
773
774 try:
f0c9fb96 775 stdout, _, returncode = Popen.run([
f59f5ef8 776 'kwallet-query',
86e5f3ed 777 '--read-password', f'{browser_keyring_name} Safe Storage',
778 '--folder', f'{browser_keyring_name} Keys',
f59f5ef8
MB
779 network_wallet
780 ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
781
f0c9fb96 782 if returncode:
783 logger.error(f'kwallet-query failed with return code {returncode}. '
784 'Please consult the kwallet-query man page for details')
f59f5ef8
MB
785 return b''
786 else:
787 if stdout.lower().startswith(b'failed to read'):
788 logger.debug('failed to read password from kwallet. Using empty string instead')
789 # this sometimes occurs in KDE because chrome does not check hasEntry and instead
790 # just tries to read the value (which kwallet returns "") whereas kwallet-query
791 # checks hasEntry. To verify this:
792 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
793 # while starting chrome.
794 # this may be a bug as the intended behaviour is to generate a random password and store
795 # it, but that doesn't matter here.
796 return b''
797 else:
798 logger.debug('password found')
f0c9fb96 799 return stdout.rstrip(b'\n')
a44ca5a4 800 except Exception as e:
801 logger.warning(f'exception running kwallet-query: {error_to_str(e)}')
f59f5ef8
MB
802 return b''
803
804
805def _get_gnome_keyring_password(browser_keyring_name, logger):
9b8ee23b 806 if not secretstorage:
807 logger.error(f'secretstorage not available {_SECRETSTORAGE_UNAVAILABLE_REASON}')
f59f5ef8
MB
808 return b''
809 # the Gnome keyring does not seem to organise keys in the same way as KWallet,
810 # using `dbus-monitor` during startup, it can be observed that chromium lists all keys
811 # and presumably searches for its key in the list. It appears that we must do the same.
812 # https://github.com/jaraco/keyring/issues/556
813 with contextlib.closing(secretstorage.dbus_init()) as con:
814 col = secretstorage.get_default_collection(con)
815 for item in col.get_all_items():
86e5f3ed 816 if item.get_label() == f'{browser_keyring_name} Safe Storage':
f59f5ef8
MB
817 return item.get_secret()
818 else:
819 logger.error('failed to read from keyring')
820 return b''
821
822
823def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
824 # note: chrome/chromium can be run with the following flags to determine which keyring backend
825 # it has chosen to use
826 # chromium --enable-logging=stderr --v=1 2>&1 | grep key_storage_
827 # Chromium supports a flag: --password-store=<basic|gnome|kwallet> so the automatic detection
828 # will not be sufficient in all cases.
829
2c539d49 830 keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger)
f59f5ef8
MB
831 logger.debug(f'Chosen keyring: {keyring.name}')
832
833 if keyring == _LinuxKeyring.KWALLET:
834 return _get_kwallet_password(browser_keyring_name, logger)
835 elif keyring == _LinuxKeyring.GNOMEKEYRING:
836 return _get_gnome_keyring_password(browser_keyring_name, logger)
837 elif keyring == _LinuxKeyring.BASICTEXT:
838 # when basic text is chosen, all cookies are stored as v10 (so no keyring password is required)
839 return None
840 assert False, f'Unknown keyring {keyring}'
841
842
843def _get_mac_keyring_password(browser_keyring_name, logger):
844 logger.debug('using find-generic-password to obtain password from OSX keychain')
845 try:
f0c9fb96 846 stdout, _, _ = Popen.run(
d3c93ec2 847 ['security', 'find-generic-password',
848 '-w', # write password to stdout
849 '-a', browser_keyring_name, # match 'account'
86e5f3ed 850 '-s', f'{browser_keyring_name} Safe Storage'], # match 'service'
d3c93ec2 851 stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
f0c9fb96 852 return stdout.rstrip(b'\n')
a44ca5a4 853 except Exception as e:
854 logger.warning(f'exception running find-generic-password: {error_to_str(e)}')
f59f5ef8 855 return None
982ee69a
MB
856
857
858def _get_windows_v10_key(browser_root, logger):
97ec5bc5 859 path = _find_most_recently_used_file(browser_root, 'Local State', logger)
982ee69a
MB
860 if path is None:
861 logger.error('could not find local state file')
862 return None
97ec5bc5 863 logger.debug(f'Found local state file at "{path}"')
86e5f3ed 864 with open(path, encoding='utf8') as f:
982ee69a
MB
865 data = json.load(f)
866 try:
867 base64_key = data['os_crypt']['encrypted_key']
868 except KeyError:
869 logger.error('no encrypted key in Local State')
870 return None
14f25df2 871 encrypted_key = base64.b64decode(base64_key)
982ee69a
MB
872 prefix = b'DPAPI'
873 if not encrypted_key.startswith(prefix):
874 logger.error('invalid key')
875 return None
876 return _decrypt_windows_dpapi(encrypted_key[len(prefix):], logger)
877
878
879def pbkdf2_sha1(password, salt, iterations, key_length):
880 return pbkdf2_hmac('sha1', password, salt, iterations, key_length)
881
882
883def _decrypt_aes_cbc(ciphertext, key, logger, initialization_vector=b' ' * 16):
1d3586d0 884 plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
982ee69a 885 try:
0f06bcd7 886 return plaintext.decode()
982ee69a 887 except UnicodeDecodeError:
f440b14f 888 logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
889 return None
890
891
892def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
982ee69a 893 try:
09906f55 894 plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce)
982ee69a 895 except ValueError:
f440b14f 896 logger.warning('failed to decrypt cookie (AES-GCM) because the MAC check failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
897 return None
898
899 try:
0f06bcd7 900 return plaintext.decode()
982ee69a 901 except UnicodeDecodeError:
f440b14f 902 logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
903 return None
904
905
906def _decrypt_windows_dpapi(ciphertext, logger):
907 """
908 References:
909 - https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata
910 """
fe0918bb 911
912 import ctypes
913 import ctypes.wintypes
982ee69a
MB
914
915 class DATA_BLOB(ctypes.Structure):
fe0918bb 916 _fields_ = [('cbData', ctypes.wintypes.DWORD),
982ee69a
MB
917 ('pbData', ctypes.POINTER(ctypes.c_char))]
918
919 buffer = ctypes.create_string_buffer(ciphertext)
920 blob_in = DATA_BLOB(ctypes.sizeof(buffer), buffer)
921 blob_out = DATA_BLOB()
922 ret = ctypes.windll.crypt32.CryptUnprotectData(
923 ctypes.byref(blob_in), # pDataIn
924 None, # ppszDataDescr: human readable description of pDataIn
925 None, # pOptionalEntropy: salt?
926 None, # pvReserved: must be NULL
927 None, # pPromptStruct: information about prompts to display
928 0, # dwFlags
929 ctypes.byref(blob_out) # pDataOut
930 )
931 if not ret:
f9be9cb9 932 logger.warning('failed to decrypt with DPAPI', only_once=True)
982ee69a
MB
933 return None
934
935 result = ctypes.string_at(blob_out.pbData, blob_out.cbData)
936 ctypes.windll.kernel32.LocalFree(blob_out.pbData)
937 return result
938
939
940def _config_home():
941 return os.environ.get('XDG_CONFIG_HOME', os.path.expanduser('~/.config'))
942
943
944def _open_database_copy(database_path, tmpdir):
945 # cannot open sqlite databases if they are already in use (e.g. by the browser)
946 database_copy_path = os.path.join(tmpdir, 'temporary.sqlite')
947 shutil.copy(database_path, database_copy_path)
948 conn = sqlite3.connect(database_copy_path)
949 return conn.cursor()
950
951
952def _get_column_names(cursor, table_name):
86e5f3ed 953 table_info = cursor.execute(f'PRAGMA table_info({table_name})').fetchall()
0f06bcd7 954 return [row[1].decode() for row in table_info]
982ee69a
MB
955
956
97ec5bc5 957def _find_most_recently_used_file(root, filename, logger):
982ee69a 958 # if there are multiple browser profiles, take the most recently used one
97ec5bc5 959 i, paths = 0, []
960 with _create_progress_bar(logger) as progress_bar:
961 for curr_root, dirs, files in os.walk(root):
962 for file in files:
963 i += 1
964 progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched')
965 if file == filename:
966 paths.append(os.path.join(curr_root, file))
982ee69a
MB
967 return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime)
968
969
970def _merge_cookie_jars(jars):
971 output_jar = YoutubeDLCookieJar()
972 for jar in jars:
973 for cookie in jar:
974 output_jar.set_cookie(cookie)
975 if jar.filename is not None:
976 output_jar.filename = jar.filename
977 return output_jar
978
979
980def _is_path(value):
981 return os.path.sep in value
982
983
9bd13fe5 984def _parse_browser_specification(browser_name, profile=None, keyring=None, container=None):
982ee69a
MB
985 if browser_name not in SUPPORTED_BROWSERS:
986 raise ValueError(f'unsupported browser: "{browser_name}"')
f59f5ef8
MB
987 if keyring not in (None, *SUPPORTED_KEYRINGS):
988 raise ValueError(f'unsupported keyring: "{keyring}"')
982ee69a
MB
989 if profile is not None and _is_path(profile):
990 profile = os.path.expanduser(profile)
9bd13fe5 991 return browser_name, profile, keyring, container