]> jfr.im git - yt-dlp.git/blame - yt_dlp/cookies.py
[jsinterp] Improve separating regex
[yt-dlp.git] / yt_dlp / cookies.py
CommitLineData
14f25df2 1import base64
f59f5ef8 2import contextlib
54007a45 3import http.cookiejar
8817a80d 4import http.cookies
982ee69a
MB
5import json
6import os
9bd13fe5 7import re
982ee69a 8import shutil
982ee69a
MB
9import struct
10import subprocess
11import sys
12import tempfile
2e4585da 13import time
982ee69a 14from datetime import datetime, timedelta, timezone
f59f5ef8 15from enum import Enum, auto
982ee69a
MB
16from hashlib import pbkdf2_hmac
17
1d3586d0 18from .aes import (
19 aes_cbc_decrypt_bytes,
20 aes_gcm_decrypt_and_verify_bytes,
21 unpad_pkcs7,
22)
9b8ee23b 23from .dependencies import (
24 _SECRETSTORAGE_UNAVAILABLE_REASON,
25 secretstorage,
26 sqlite3,
27)
97ec5bc5 28from .minicurses import MultilinePrinter, QuietMultilinePrinter
d2c8aadf 29from .utils import (
30 Popen,
31 YoutubeDLCookieJar,
32 error_to_str,
33 expand_path,
22df97f9 34 is_path_like,
d2c8aadf 35 try_call,
36)
982ee69a 37
982ee69a
MB
38CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
39SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
40
41
42class YDLLogger:
43 def __init__(self, ydl=None):
44 self._ydl = ydl
45
46 def debug(self, message):
47 if self._ydl:
48 self._ydl.write_debug(message)
49
50 def info(self, message):
51 if self._ydl:
52 self._ydl.to_screen(f'[Cookies] {message}')
53
54 def warning(self, message, only_once=False):
55 if self._ydl:
56 self._ydl.report_warning(message, only_once)
57
58 def error(self, message):
59 if self._ydl:
60 self._ydl.report_error(message)
61
2e4585da 62 class ProgressBar(MultilinePrinter):
63 _DELAY, _timer = 0.1, 0
64
65 def print(self, message):
66 if time.time() - self._timer > self._DELAY:
67 self.print_at_line(f'[Cookies] {message}', 0)
68 self._timer = time.time()
69
97ec5bc5 70 def progress_bar(self):
71 """Return a context manager with a print method. (Optional)"""
72 # Do not print to files/pipes, loggers, or when --no-progress is used
73 if not self._ydl or self._ydl.params.get('noprogress') or self._ydl.params.get('logger'):
74 return
591bb9d3 75 file = self._ydl._out_files.error
97ec5bc5 76 try:
77 if not file.isatty():
78 return
79 except BaseException:
80 return
2e4585da 81 return self.ProgressBar(file, preserve_output=False)
97ec5bc5 82
83
84def _create_progress_bar(logger):
85 if hasattr(logger, 'progress_bar'):
86 printer = logger.progress_bar()
87 if printer:
88 return printer
89 printer = QuietMultilinePrinter()
90 printer.print = lambda _: None
91 return printer
92
982ee69a
MB
93
94def load_cookies(cookie_file, browser_specification, ydl):
95 cookie_jars = []
96 if browser_specification is not None:
9bd13fe5 97 browser_name, profile, keyring, container = _parse_browser_specification(*browser_specification)
98 cookie_jars.append(
99 extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring, container=container))
982ee69a
MB
100
101 if cookie_file is not None:
22df97f9 102 is_filename = is_path_like(cookie_file)
d76fa1f3 103 if is_filename:
104 cookie_file = expand_path(cookie_file)
105
982ee69a 106 jar = YoutubeDLCookieJar(cookie_file)
d76fa1f3 107 if not is_filename or os.access(cookie_file, os.R_OK):
982ee69a
MB
108 jar.load(ignore_discard=True, ignore_expires=True)
109 cookie_jars.append(jar)
110
111 return _merge_cookie_jars(cookie_jars)
112
113
9bd13fe5 114def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None, container=None):
982ee69a 115 if browser_name == 'firefox':
9bd13fe5 116 return _extract_firefox_cookies(profile, container, logger)
982ee69a
MB
117 elif browser_name == 'safari':
118 return _extract_safari_cookies(profile, logger)
119 elif browser_name in CHROMIUM_BASED_BROWSERS:
f59f5ef8 120 return _extract_chrome_cookies(browser_name, profile, keyring, logger)
982ee69a 121 else:
86e5f3ed 122 raise ValueError(f'unknown browser: {browser_name}')
982ee69a
MB
123
124
9bd13fe5 125def _extract_firefox_cookies(profile, container, logger):
982ee69a 126 logger.info('Extracting cookies from firefox')
9b8ee23b 127 if not sqlite3:
767b02a9
MB
128 logger.warning('Cannot extract cookies from firefox without sqlite3 support. '
129 'Please use a python interpreter compiled with sqlite3 support')
130 return YoutubeDLCookieJar()
982ee69a
MB
131
132 if profile is None:
133 search_root = _firefox_browser_dir()
134 elif _is_path(profile):
135 search_root = profile
136 else:
137 search_root = os.path.join(_firefox_browser_dir(), profile)
138
825d3ce3 139 cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite', logger)
140 if cookie_database_path is None:
141 raise FileNotFoundError(f'could not find firefox cookies database in {search_root}')
142 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
143
9bd13fe5 144 container_id = None
825d3ce3 145 if container not in (None, 'none'):
146 containers_path = os.path.join(os.path.dirname(cookie_database_path), 'containers.json')
9bd13fe5 147 if not os.path.isfile(containers_path) or not os.access(containers_path, os.R_OK):
148 raise FileNotFoundError(f'could not read containers.json in {search_root}')
d2c8aadf 149 with open(containers_path) as containers:
9bd13fe5 150 identities = json.load(containers).get('identities', [])
151 container_id = next((context.get('userContextId') for context in identities if container in (
152 context.get('name'),
153 try_call(lambda: re.fullmatch(r'userContext([^\.]+)\.label', context['l10nID']).group())
154 )), None)
155 if not isinstance(container_id, int):
156 raise ValueError(f'could not find firefox container "{container}" in containers.json')
157
0930b11f 158 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
982ee69a
MB
159 cursor = None
160 try:
161 cursor = _open_database_copy(cookie_database_path, tmpdir)
9bd13fe5 162 if isinstance(container_id, int):
9bd13fe5 163 logger.debug(
164 f'Only loading cookies from firefox container "{container}", ID {container_id}')
9bd13fe5 165 cursor.execute(
825d3ce3 166 'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE originAttributes LIKE ? OR originAttributes LIKE ?',
167 (f'%userContextId={container_id}', f'%userContextId={container_id}&%'))
168 elif container == 'none':
169 logger.debug('Only loading cookies not belonging to any container')
170 cursor.execute(
171 'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE NOT INSTR(originAttributes,"userContextId=")')
172 else:
9bd13fe5 173 cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
982ee69a 174 jar = YoutubeDLCookieJar()
97ec5bc5 175 with _create_progress_bar(logger) as progress_bar:
176 table = cursor.fetchall()
177 total_cookie_count = len(table)
178 for i, (host, name, value, path, expiry, is_secure) in enumerate(table):
179 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
ac668111 180 cookie = http.cookiejar.Cookie(
97ec5bc5 181 version=0, name=name, value=value, port=None, port_specified=False,
182 domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'),
183 path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False,
184 comment=None, comment_url=None, rest={})
185 jar.set_cookie(cookie)
86e5f3ed 186 logger.info(f'Extracted {len(jar)} cookies from firefox')
982ee69a
MB
187 return jar
188 finally:
189 if cursor is not None:
190 cursor.connection.close()
191
192
193def _firefox_browser_dir():
dec30912 194 if sys.platform in ('cygwin', 'win32'):
19a03940 195 return os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles')
982ee69a
MB
196 elif sys.platform == 'darwin':
197 return os.path.expanduser('~/Library/Application Support/Firefox')
dec30912 198 return os.path.expanduser('~/.mozilla/firefox')
982ee69a
MB
199
200
201def _get_chromium_based_browser_settings(browser_name):
202 # https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md
dec30912 203 if sys.platform in ('cygwin', 'win32'):
982ee69a
MB
204 appdata_local = os.path.expandvars('%LOCALAPPDATA%')
205 appdata_roaming = os.path.expandvars('%APPDATA%')
206 browser_dir = {
19a03940 207 'brave': os.path.join(appdata_local, R'BraveSoftware\Brave-Browser\User Data'),
208 'chrome': os.path.join(appdata_local, R'Google\Chrome\User Data'),
209 'chromium': os.path.join(appdata_local, R'Chromium\User Data'),
210 'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
211 'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
212 'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
982ee69a
MB
213 }[browser_name]
214
215 elif sys.platform == 'darwin':
216 appdata = os.path.expanduser('~/Library/Application Support')
217 browser_dir = {
218 'brave': os.path.join(appdata, 'BraveSoftware/Brave-Browser'),
219 'chrome': os.path.join(appdata, 'Google/Chrome'),
220 'chromium': os.path.join(appdata, 'Chromium'),
221 'edge': os.path.join(appdata, 'Microsoft Edge'),
222 'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
223 'vivaldi': os.path.join(appdata, 'Vivaldi'),
224 }[browser_name]
225
226 else:
dec30912
CMT
227 config = _config_home()
228 browser_dir = {
229 'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'),
230 'chrome': os.path.join(config, 'google-chrome'),
231 'chromium': os.path.join(config, 'chromium'),
232 'edge': os.path.join(config, 'microsoft-edge'),
233 'opera': os.path.join(config, 'opera'),
234 'vivaldi': os.path.join(config, 'vivaldi'),
235 }[browser_name]
982ee69a
MB
236
237 # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
238 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
239 keyring_name = {
240 'brave': 'Brave',
241 'chrome': 'Chrome',
242 'chromium': 'Chromium',
29b208f6 243 'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
982ee69a
MB
244 'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
245 'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
246 }[browser_name]
247
248 browsers_without_profiles = {'opera'}
249
250 return {
251 'browser_dir': browser_dir,
252 'keyring_name': keyring_name,
253 'supports_profiles': browser_name not in browsers_without_profiles
254 }
255
256
f59f5ef8 257def _extract_chrome_cookies(browser_name, profile, keyring, logger):
86e5f3ed 258 logger.info(f'Extracting cookies from {browser_name}')
767b02a9 259
9b8ee23b 260 if not sqlite3:
19a03940 261 logger.warning(f'Cannot extract cookies from {browser_name} without sqlite3 support. '
262 'Please use a python interpreter compiled with sqlite3 support')
767b02a9
MB
263 return YoutubeDLCookieJar()
264
982ee69a
MB
265 config = _get_chromium_based_browser_settings(browser_name)
266
267 if profile is None:
268 search_root = config['browser_dir']
269 elif _is_path(profile):
270 search_root = profile
271 config['browser_dir'] = os.path.dirname(profile) if config['supports_profiles'] else profile
272 else:
273 if config['supports_profiles']:
274 search_root = os.path.join(config['browser_dir'], profile)
275 else:
86e5f3ed 276 logger.error(f'{browser_name} does not support profiles')
982ee69a
MB
277 search_root = config['browser_dir']
278
97ec5bc5 279 cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies', logger)
982ee69a 280 if cookie_database_path is None:
86e5f3ed 281 raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"')
282 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
982ee69a 283
f59f5ef8 284 decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring)
982ee69a 285
0930b11f 286 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
982ee69a
MB
287 cursor = None
288 try:
289 cursor = _open_database_copy(cookie_database_path, tmpdir)
290 cursor.connection.text_factory = bytes
291 column_names = _get_column_names(cursor, 'cookies')
292 secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
19a03940 293 cursor.execute(f'SELECT host_key, name, value, encrypted_value, path, expires_utc, {secure_column} FROM cookies')
982ee69a
MB
294 jar = YoutubeDLCookieJar()
295 failed_cookies = 0
f59f5ef8 296 unencrypted_cookies = 0
97ec5bc5 297 with _create_progress_bar(logger) as progress_bar:
298 table = cursor.fetchall()
299 total_cookie_count = len(table)
300 for i, line in enumerate(table):
301 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
302 is_encrypted, cookie = _process_chrome_cookie(decryptor, *line)
303 if not cookie:
982ee69a
MB
304 failed_cookies += 1
305 continue
97ec5bc5 306 elif not is_encrypted:
307 unencrypted_cookies += 1
308 jar.set_cookie(cookie)
982ee69a 309 if failed_cookies > 0:
86e5f3ed 310 failed_message = f' ({failed_cookies} could not be decrypted)'
982ee69a
MB
311 else:
312 failed_message = ''
86e5f3ed 313 logger.info(f'Extracted {len(jar)} cookies from {browser_name}{failed_message}')
24146491 314 counts = decryptor._cookie_counts.copy()
f59f5ef8 315 counts['unencrypted'] = unencrypted_cookies
86e5f3ed 316 logger.debug(f'cookie version breakdown: {counts}')
982ee69a
MB
317 return jar
318 finally:
319 if cursor is not None:
320 cursor.connection.close()
321
322
97ec5bc5 323def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, path, expires_utc, is_secure):
0f06bcd7 324 host_key = host_key.decode()
325 name = name.decode()
326 value = value.decode()
327 path = path.decode()
97ec5bc5 328 is_encrypted = not value and encrypted_value
329
330 if is_encrypted:
331 value = decryptor.decrypt(encrypted_value)
332 if value is None:
333 return is_encrypted, None
334
ac668111 335 return is_encrypted, http.cookiejar.Cookie(
97ec5bc5 336 version=0, name=name, value=value, port=None, port_specified=False,
337 domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
338 path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False,
339 comment=None, comment_url=None, rest={})
340
341
982ee69a
MB
342class ChromeCookieDecryptor:
343 """
344 Overview:
345
346 Linux:
347 - cookies are either v10 or v11
348 - v10: AES-CBC encrypted with a fixed key
349 - v11: AES-CBC encrypted with an OS protected key (keyring)
350 - v11 keys can be stored in various places depending on the activate desktop environment [2]
351
352 Mac:
353 - cookies are either v10 or not v10
354 - v10: AES-CBC encrypted with an OS protected key (keyring) and more key derivation iterations than linux
355 - not v10: 'old data' stored as plaintext
356
357 Windows:
358 - cookies are either v10 or not v10
359 - v10: AES-GCM encrypted with a key which is encrypted with DPAPI
360 - not v10: encrypted with DPAPI
361
362 Sources:
363 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/
364 - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_linux.cc
365 - KeyStorageLinux::CreateService
366 """
367
0fa7d2c8 368 _cookie_counts = {}
982ee69a 369
0fa7d2c8 370 def decrypt(self, encrypted_value):
19a03940 371 raise NotImplementedError('Must be implemented by sub classes')
f59f5ef8 372
982ee69a 373
f59f5ef8 374def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None):
56ba69e4 375 if sys.platform == 'darwin':
982ee69a 376 return MacChromeCookieDecryptor(browser_keyring_name, logger)
56ba69e4 377 elif sys.platform in ('win32', 'cygwin'):
982ee69a 378 return WindowsChromeCookieDecryptor(browser_root, logger)
56ba69e4 379 return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring)
982ee69a
MB
380
381
382class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
f59f5ef8 383 def __init__(self, browser_keyring_name, logger, *, keyring=None):
982ee69a
MB
384 self._logger = logger
385 self._v10_key = self.derive_key(b'peanuts')
f59f5ef8
MB
386 password = _get_linux_keyring_password(browser_keyring_name, keyring, logger)
387 self._v11_key = None if password is None else self.derive_key(password)
388 self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
982ee69a
MB
389
390 @staticmethod
391 def derive_key(password):
392 # values from
393 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_linux.cc
394 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16)
395
396 def decrypt(self, encrypted_value):
397 version = encrypted_value[:3]
398 ciphertext = encrypted_value[3:]
399
400 if version == b'v10':
f59f5ef8 401 self._cookie_counts['v10'] += 1
982ee69a
MB
402 return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
403
404 elif version == b'v11':
f59f5ef8 405 self._cookie_counts['v11'] += 1
982ee69a 406 if self._v11_key is None:
f59f5ef8 407 self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
982ee69a
MB
408 return None
409 return _decrypt_aes_cbc(ciphertext, self._v11_key, self._logger)
410
411 else:
f59f5ef8 412 self._cookie_counts['other'] += 1
982ee69a
MB
413 return None
414
415
416class MacChromeCookieDecryptor(ChromeCookieDecryptor):
417 def __init__(self, browser_keyring_name, logger):
418 self._logger = logger
f440b14f 419 password = _get_mac_keyring_password(browser_keyring_name, logger)
982ee69a 420 self._v10_key = None if password is None else self.derive_key(password)
f59f5ef8 421 self._cookie_counts = {'v10': 0, 'other': 0}
982ee69a
MB
422
423 @staticmethod
424 def derive_key(password):
425 # values from
426 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
427 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16)
428
429 def decrypt(self, encrypted_value):
430 version = encrypted_value[:3]
431 ciphertext = encrypted_value[3:]
432
433 if version == b'v10':
f59f5ef8 434 self._cookie_counts['v10'] += 1
982ee69a
MB
435 if self._v10_key is None:
436 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
437 return None
438
439 return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
440
441 else:
f59f5ef8 442 self._cookie_counts['other'] += 1
982ee69a
MB
443 # other prefixes are considered 'old data' which were stored as plaintext
444 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
445 return encrypted_value
446
447
448class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
449 def __init__(self, browser_root, logger):
450 self._logger = logger
451 self._v10_key = _get_windows_v10_key(browser_root, logger)
f59f5ef8
MB
452 self._cookie_counts = {'v10': 0, 'other': 0}
453
982ee69a
MB
454 def decrypt(self, encrypted_value):
455 version = encrypted_value[:3]
456 ciphertext = encrypted_value[3:]
457
458 if version == b'v10':
f59f5ef8 459 self._cookie_counts['v10'] += 1
982ee69a
MB
460 if self._v10_key is None:
461 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
462 return None
982ee69a
MB
463
464 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
465 # kNonceLength
466 nonce_length = 96 // 8
467 # boringssl
468 # EVP_AEAD_AES_GCM_TAG_LEN
469 authentication_tag_length = 16
470
471 raw_ciphertext = ciphertext
472 nonce = raw_ciphertext[:nonce_length]
473 ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length]
474 authentication_tag = raw_ciphertext[-authentication_tag_length:]
475
476 return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger)
477
478 else:
f59f5ef8 479 self._cookie_counts['other'] += 1
982ee69a
MB
480 # any other prefix means the data is DPAPI encrypted
481 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
0f06bcd7 482 return _decrypt_windows_dpapi(encrypted_value, self._logger).decode()
982ee69a
MB
483
484
485def _extract_safari_cookies(profile, logger):
486 if profile is not None:
487 logger.error('safari does not support profiles')
488 if sys.platform != 'darwin':
86e5f3ed 489 raise ValueError(f'unsupported platform: {sys.platform}')
982ee69a
MB
490
491 cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')
492
493 if not os.path.isfile(cookies_path):
1f7db853
MP
494 logger.debug('Trying secondary cookie location')
495 cookies_path = os.path.expanduser('~/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies')
496 if not os.path.isfile(cookies_path):
497 raise FileNotFoundError('could not find safari cookies database')
982ee69a
MB
498
499 with open(cookies_path, 'rb') as f:
500 cookies_data = f.read()
501
502 jar = parse_safari_cookies(cookies_data, logger=logger)
86e5f3ed 503 logger.info(f'Extracted {len(jar)} cookies from safari')
982ee69a
MB
504 return jar
505
506
507class ParserError(Exception):
508 pass
509
510
511class DataParser:
512 def __init__(self, data, logger):
513 self._data = data
514 self.cursor = 0
515 self._logger = logger
516
517 def read_bytes(self, num_bytes):
518 if num_bytes < 0:
86e5f3ed 519 raise ParserError(f'invalid read of {num_bytes} bytes')
982ee69a
MB
520 end = self.cursor + num_bytes
521 if end > len(self._data):
522 raise ParserError('reached end of input')
523 data = self._data[self.cursor:end]
524 self.cursor = end
525 return data
526
527 def expect_bytes(self, expected_value, message):
528 value = self.read_bytes(len(expected_value))
529 if value != expected_value:
86e5f3ed 530 raise ParserError(f'unexpected value: {value} != {expected_value} ({message})')
982ee69a
MB
531
532 def read_uint(self, big_endian=False):
533 data_format = '>I' if big_endian else '<I'
534 return struct.unpack(data_format, self.read_bytes(4))[0]
535
536 def read_double(self, big_endian=False):
537 data_format = '>d' if big_endian else '<d'
538 return struct.unpack(data_format, self.read_bytes(8))[0]
539
540 def read_cstring(self):
541 buffer = []
542 while True:
543 c = self.read_bytes(1)
544 if c == b'\x00':
0f06bcd7 545 return b''.join(buffer).decode()
982ee69a
MB
546 else:
547 buffer.append(c)
548
549 def skip(self, num_bytes, description='unknown'):
550 if num_bytes > 0:
19a03940 551 self._logger.debug(f'skipping {num_bytes} bytes ({description}): {self.read_bytes(num_bytes)!r}')
982ee69a 552 elif num_bytes < 0:
86e5f3ed 553 raise ParserError(f'invalid skip of {num_bytes} bytes')
982ee69a
MB
554
555 def skip_to(self, offset, description='unknown'):
556 self.skip(offset - self.cursor, description)
557
558 def skip_to_end(self, description='unknown'):
559 self.skip_to(len(self._data), description)
560
561
562def _mac_absolute_time_to_posix(timestamp):
563 return int((datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc) + timedelta(seconds=timestamp)).timestamp())
564
565
566def _parse_safari_cookies_header(data, logger):
567 p = DataParser(data, logger)
568 p.expect_bytes(b'cook', 'database signature')
569 number_of_pages = p.read_uint(big_endian=True)
570 page_sizes = [p.read_uint(big_endian=True) for _ in range(number_of_pages)]
571 return page_sizes, p.cursor
572
573
574def _parse_safari_cookies_page(data, jar, logger):
575 p = DataParser(data, logger)
576 p.expect_bytes(b'\x00\x00\x01\x00', 'page signature')
577 number_of_cookies = p.read_uint()
578 record_offsets = [p.read_uint() for _ in range(number_of_cookies)]
579 if number_of_cookies == 0:
86e5f3ed 580 logger.debug(f'a cookies page of size {len(data)} has no cookies')
982ee69a
MB
581 return
582
583 p.skip_to(record_offsets[0], 'unknown page header field')
584
97ec5bc5 585 with _create_progress_bar(logger) as progress_bar:
586 for i, record_offset in enumerate(record_offsets):
587 progress_bar.print(f'Loading cookie {i: 6d}/{number_of_cookies: 6d}')
588 p.skip_to(record_offset, 'space between records')
589 record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger)
590 p.read_bytes(record_length)
982ee69a
MB
591 p.skip_to_end('space in between pages')
592
593
594def _parse_safari_cookies_record(data, jar, logger):
595 p = DataParser(data, logger)
596 record_size = p.read_uint()
597 p.skip(4, 'unknown record field 1')
598 flags = p.read_uint()
599 is_secure = bool(flags & 0x0001)
600 p.skip(4, 'unknown record field 2')
601 domain_offset = p.read_uint()
602 name_offset = p.read_uint()
603 path_offset = p.read_uint()
604 value_offset = p.read_uint()
605 p.skip(8, 'unknown record field 3')
606 expiration_date = _mac_absolute_time_to_posix(p.read_double())
607 _creation_date = _mac_absolute_time_to_posix(p.read_double()) # noqa: F841
608
609 try:
610 p.skip_to(domain_offset)
611 domain = p.read_cstring()
612
613 p.skip_to(name_offset)
614 name = p.read_cstring()
615
616 p.skip_to(path_offset)
617 path = p.read_cstring()
618
619 p.skip_to(value_offset)
620 value = p.read_cstring()
621 except UnicodeDecodeError:
f440b14f 622 logger.warning('failed to parse Safari cookie because UTF-8 decoding failed', only_once=True)
982ee69a
MB
623 return record_size
624
625 p.skip_to(record_size, 'space at the end of the record')
626
ac668111 627 cookie = http.cookiejar.Cookie(
982ee69a
MB
628 version=0, name=name, value=value, port=None, port_specified=False,
629 domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'),
630 path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False,
631 comment=None, comment_url=None, rest={})
632 jar.set_cookie(cookie)
633 return record_size
634
635
636def parse_safari_cookies(data, jar=None, logger=YDLLogger()):
637 """
638 References:
639 - https://github.com/libyal/dtformats/blob/main/documentation/Safari%20Cookies.asciidoc
640 - this data appears to be out of date but the important parts of the database structure is the same
641 - there are a few bytes here and there which are skipped during parsing
642 """
643 if jar is None:
644 jar = YoutubeDLCookieJar()
645 page_sizes, body_start = _parse_safari_cookies_header(data, logger)
646 p = DataParser(data[body_start:], logger)
647 for page_size in page_sizes:
648 _parse_safari_cookies_page(p.read_bytes(page_size), jar, logger)
649 p.skip_to_end('footer')
650 return jar
651
652
f59f5ef8
MB
653class _LinuxDesktopEnvironment(Enum):
654 """
655 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.h
656 DesktopEnvironment
657 """
658 OTHER = auto()
659 CINNAMON = auto()
660 GNOME = auto()
661 KDE = auto()
662 PANTHEON = auto()
663 UNITY = auto()
664 XFCE = auto()
982ee69a
MB
665
666
f59f5ef8
MB
667class _LinuxKeyring(Enum):
668 """
669 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.h
670 SelectedLinuxBackend
671 """
672 KWALLET = auto()
673 GNOMEKEYRING = auto()
674 BASICTEXT = auto()
675
676
677SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys()
678
679
680def _get_linux_desktop_environment(env):
681 """
682 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc
683 GetDesktopEnvironment
684 """
685 xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
686 desktop_session = env.get('DESKTOP_SESSION', None)
687 if xdg_current_desktop is not None:
688 xdg_current_desktop = xdg_current_desktop.split(':')[0].strip()
689
690 if xdg_current_desktop == 'Unity':
691 if desktop_session is not None and 'gnome-fallback' in desktop_session:
692 return _LinuxDesktopEnvironment.GNOME
693 else:
694 return _LinuxDesktopEnvironment.UNITY
695 elif xdg_current_desktop == 'GNOME':
696 return _LinuxDesktopEnvironment.GNOME
697 elif xdg_current_desktop == 'X-Cinnamon':
698 return _LinuxDesktopEnvironment.CINNAMON
699 elif xdg_current_desktop == 'KDE':
700 return _LinuxDesktopEnvironment.KDE
701 elif xdg_current_desktop == 'Pantheon':
702 return _LinuxDesktopEnvironment.PANTHEON
703 elif xdg_current_desktop == 'XFCE':
704 return _LinuxDesktopEnvironment.XFCE
705 elif desktop_session is not None:
706 if desktop_session in ('mate', 'gnome'):
707 return _LinuxDesktopEnvironment.GNOME
708 elif 'kde' in desktop_session:
709 return _LinuxDesktopEnvironment.KDE
710 elif 'xfce' in desktop_session:
711 return _LinuxDesktopEnvironment.XFCE
712 else:
713 if 'GNOME_DESKTOP_SESSION_ID' in env:
714 return _LinuxDesktopEnvironment.GNOME
715 elif 'KDE_FULL_SESSION' in env:
716 return _LinuxDesktopEnvironment.KDE
fa8fd951 717 return _LinuxDesktopEnvironment.OTHER
f59f5ef8
MB
718
719
720def _choose_linux_keyring(logger):
721 """
722 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.cc
723 SelectBackend
724 """
725 desktop_environment = _get_linux_desktop_environment(os.environ)
86e5f3ed 726 logger.debug(f'detected desktop environment: {desktop_environment.name}')
f59f5ef8
MB
727 if desktop_environment == _LinuxDesktopEnvironment.KDE:
728 linux_keyring = _LinuxKeyring.KWALLET
729 elif desktop_environment == _LinuxDesktopEnvironment.OTHER:
730 linux_keyring = _LinuxKeyring.BASICTEXT
982ee69a 731 else:
f59f5ef8
MB
732 linux_keyring = _LinuxKeyring.GNOMEKEYRING
733 return linux_keyring
734
735
736def _get_kwallet_network_wallet(logger):
737 """ The name of the wallet used to store network passwords.
738
739 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/kwallet_dbus.cc
740 KWalletDBus::NetworkWallet
741 which does a dbus call to the following function:
742 https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html
743 Wallet::NetworkWallet
744 """
745 default_wallet = 'kdewallet'
746 try:
f0c9fb96 747 stdout, _, returncode = Popen.run([
f59f5ef8
MB
748 'dbus-send', '--session', '--print-reply=literal',
749 '--dest=org.kde.kwalletd5',
750 '/modules/kwalletd5',
751 'org.kde.KWallet.networkWallet'
f0c9fb96 752 ], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
f59f5ef8 753
f0c9fb96 754 if returncode:
f59f5ef8
MB
755 logger.warning('failed to read NetworkWallet')
756 return default_wallet
757 else:
f0c9fb96 758 logger.debug(f'NetworkWallet = "{stdout.strip()}"')
759 return stdout.strip()
a44ca5a4 760 except Exception as e:
86e5f3ed 761 logger.warning(f'exception while obtaining NetworkWallet: {e}')
f59f5ef8
MB
762 return default_wallet
763
764
765def _get_kwallet_password(browser_keyring_name, logger):
766 logger.debug('using kwallet-query to obtain password from kwallet')
767
768 if shutil.which('kwallet-query') is None:
769 logger.error('kwallet-query command not found. KWallet and kwallet-query '
770 'must be installed to read from KWallet. kwallet-query should be'
771 'included in the kwallet package for your distribution')
772 return b''
773
774 network_wallet = _get_kwallet_network_wallet(logger)
775
776 try:
f0c9fb96 777 stdout, _, returncode = Popen.run([
f59f5ef8 778 'kwallet-query',
86e5f3ed 779 '--read-password', f'{browser_keyring_name} Safe Storage',
780 '--folder', f'{browser_keyring_name} Keys',
f59f5ef8
MB
781 network_wallet
782 ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
783
f0c9fb96 784 if returncode:
785 logger.error(f'kwallet-query failed with return code {returncode}. '
786 'Please consult the kwallet-query man page for details')
f59f5ef8
MB
787 return b''
788 else:
789 if stdout.lower().startswith(b'failed to read'):
790 logger.debug('failed to read password from kwallet. Using empty string instead')
791 # this sometimes occurs in KDE because chrome does not check hasEntry and instead
792 # just tries to read the value (which kwallet returns "") whereas kwallet-query
793 # checks hasEntry. To verify this:
794 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
795 # while starting chrome.
796 # this may be a bug as the intended behaviour is to generate a random password and store
797 # it, but that doesn't matter here.
798 return b''
799 else:
800 logger.debug('password found')
f0c9fb96 801 return stdout.rstrip(b'\n')
a44ca5a4 802 except Exception as e:
803 logger.warning(f'exception running kwallet-query: {error_to_str(e)}')
f59f5ef8
MB
804 return b''
805
806
807def _get_gnome_keyring_password(browser_keyring_name, logger):
9b8ee23b 808 if not secretstorage:
809 logger.error(f'secretstorage not available {_SECRETSTORAGE_UNAVAILABLE_REASON}')
f59f5ef8
MB
810 return b''
811 # the Gnome keyring does not seem to organise keys in the same way as KWallet,
812 # using `dbus-monitor` during startup, it can be observed that chromium lists all keys
813 # and presumably searches for its key in the list. It appears that we must do the same.
814 # https://github.com/jaraco/keyring/issues/556
815 with contextlib.closing(secretstorage.dbus_init()) as con:
816 col = secretstorage.get_default_collection(con)
817 for item in col.get_all_items():
86e5f3ed 818 if item.get_label() == f'{browser_keyring_name} Safe Storage':
f59f5ef8
MB
819 return item.get_secret()
820 else:
821 logger.error('failed to read from keyring')
822 return b''
823
824
825def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
826 # note: chrome/chromium can be run with the following flags to determine which keyring backend
827 # it has chosen to use
828 # chromium --enable-logging=stderr --v=1 2>&1 | grep key_storage_
829 # Chromium supports a flag: --password-store=<basic|gnome|kwallet> so the automatic detection
830 # will not be sufficient in all cases.
831
2c539d49 832 keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger)
f59f5ef8
MB
833 logger.debug(f'Chosen keyring: {keyring.name}')
834
835 if keyring == _LinuxKeyring.KWALLET:
836 return _get_kwallet_password(browser_keyring_name, logger)
837 elif keyring == _LinuxKeyring.GNOMEKEYRING:
838 return _get_gnome_keyring_password(browser_keyring_name, logger)
839 elif keyring == _LinuxKeyring.BASICTEXT:
840 # when basic text is chosen, all cookies are stored as v10 (so no keyring password is required)
841 return None
842 assert False, f'Unknown keyring {keyring}'
843
844
845def _get_mac_keyring_password(browser_keyring_name, logger):
846 logger.debug('using find-generic-password to obtain password from OSX keychain')
847 try:
46a5b335 848 stdout, _, returncode = Popen.run(
d3c93ec2 849 ['security', 'find-generic-password',
850 '-w', # write password to stdout
851 '-a', browser_keyring_name, # match 'account'
86e5f3ed 852 '-s', f'{browser_keyring_name} Safe Storage'], # match 'service'
d3c93ec2 853 stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
46a5b335 854 if returncode:
855 logger.warning('find-generic-password failed')
856 return None
f0c9fb96 857 return stdout.rstrip(b'\n')
a44ca5a4 858 except Exception as e:
859 logger.warning(f'exception running find-generic-password: {error_to_str(e)}')
f59f5ef8 860 return None
982ee69a
MB
861
862
863def _get_windows_v10_key(browser_root, logger):
97ec5bc5 864 path = _find_most_recently_used_file(browser_root, 'Local State', logger)
982ee69a
MB
865 if path is None:
866 logger.error('could not find local state file')
867 return None
97ec5bc5 868 logger.debug(f'Found local state file at "{path}"')
86e5f3ed 869 with open(path, encoding='utf8') as f:
982ee69a
MB
870 data = json.load(f)
871 try:
872 base64_key = data['os_crypt']['encrypted_key']
873 except KeyError:
874 logger.error('no encrypted key in Local State')
875 return None
14f25df2 876 encrypted_key = base64.b64decode(base64_key)
982ee69a
MB
877 prefix = b'DPAPI'
878 if not encrypted_key.startswith(prefix):
879 logger.error('invalid key')
880 return None
881 return _decrypt_windows_dpapi(encrypted_key[len(prefix):], logger)
882
883
884def pbkdf2_sha1(password, salt, iterations, key_length):
885 return pbkdf2_hmac('sha1', password, salt, iterations, key_length)
886
887
888def _decrypt_aes_cbc(ciphertext, key, logger, initialization_vector=b' ' * 16):
1d3586d0 889 plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
982ee69a 890 try:
0f06bcd7 891 return plaintext.decode()
982ee69a 892 except UnicodeDecodeError:
f440b14f 893 logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
894 return None
895
896
897def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
982ee69a 898 try:
09906f55 899 plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce)
982ee69a 900 except ValueError:
f440b14f 901 logger.warning('failed to decrypt cookie (AES-GCM) because the MAC check failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
902 return None
903
904 try:
0f06bcd7 905 return plaintext.decode()
982ee69a 906 except UnicodeDecodeError:
f440b14f 907 logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
908 return None
909
910
911def _decrypt_windows_dpapi(ciphertext, logger):
912 """
913 References:
914 - https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata
915 """
fe0918bb 916
917 import ctypes
918 import ctypes.wintypes
982ee69a
MB
919
920 class DATA_BLOB(ctypes.Structure):
fe0918bb 921 _fields_ = [('cbData', ctypes.wintypes.DWORD),
982ee69a
MB
922 ('pbData', ctypes.POINTER(ctypes.c_char))]
923
924 buffer = ctypes.create_string_buffer(ciphertext)
925 blob_in = DATA_BLOB(ctypes.sizeof(buffer), buffer)
926 blob_out = DATA_BLOB()
927 ret = ctypes.windll.crypt32.CryptUnprotectData(
928 ctypes.byref(blob_in), # pDataIn
929 None, # ppszDataDescr: human readable description of pDataIn
930 None, # pOptionalEntropy: salt?
931 None, # pvReserved: must be NULL
932 None, # pPromptStruct: information about prompts to display
933 0, # dwFlags
934 ctypes.byref(blob_out) # pDataOut
935 )
936 if not ret:
f9be9cb9 937 logger.warning('failed to decrypt with DPAPI', only_once=True)
982ee69a
MB
938 return None
939
940 result = ctypes.string_at(blob_out.pbData, blob_out.cbData)
941 ctypes.windll.kernel32.LocalFree(blob_out.pbData)
942 return result
943
944
945def _config_home():
946 return os.environ.get('XDG_CONFIG_HOME', os.path.expanduser('~/.config'))
947
948
949def _open_database_copy(database_path, tmpdir):
950 # cannot open sqlite databases if they are already in use (e.g. by the browser)
951 database_copy_path = os.path.join(tmpdir, 'temporary.sqlite')
952 shutil.copy(database_path, database_copy_path)
953 conn = sqlite3.connect(database_copy_path)
954 return conn.cursor()
955
956
957def _get_column_names(cursor, table_name):
86e5f3ed 958 table_info = cursor.execute(f'PRAGMA table_info({table_name})').fetchall()
0f06bcd7 959 return [row[1].decode() for row in table_info]
982ee69a
MB
960
961
97ec5bc5 962def _find_most_recently_used_file(root, filename, logger):
982ee69a 963 # if there are multiple browser profiles, take the most recently used one
97ec5bc5 964 i, paths = 0, []
965 with _create_progress_bar(logger) as progress_bar:
966 for curr_root, dirs, files in os.walk(root):
967 for file in files:
968 i += 1
969 progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched')
970 if file == filename:
971 paths.append(os.path.join(curr_root, file))
982ee69a
MB
972 return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime)
973
974
975def _merge_cookie_jars(jars):
976 output_jar = YoutubeDLCookieJar()
977 for jar in jars:
978 for cookie in jar:
979 output_jar.set_cookie(cookie)
980 if jar.filename is not None:
981 output_jar.filename = jar.filename
982 return output_jar
983
984
985def _is_path(value):
986 return os.path.sep in value
987
988
9bd13fe5 989def _parse_browser_specification(browser_name, profile=None, keyring=None, container=None):
982ee69a
MB
990 if browser_name not in SUPPORTED_BROWSERS:
991 raise ValueError(f'unsupported browser: "{browser_name}"')
f59f5ef8
MB
992 if keyring not in (None, *SUPPORTED_KEYRINGS):
993 raise ValueError(f'unsupported keyring: "{keyring}"')
2b24afa6 994 if profile is not None and _is_path(expand_path(profile)):
995 profile = expand_path(profile)
9bd13fe5 996 return browser_name, profile, keyring, container
8817a80d
SS
997
998
999class LenientSimpleCookie(http.cookies.SimpleCookie):
1000 """More lenient version of http.cookies.SimpleCookie"""
1001 # From https://github.com/python/cpython/blob/v3.10.7/Lib/http/cookies.py
1002 _LEGAL_KEY_CHARS = r"\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\="
1003 _LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + r"\[\]"
1004
1005 _RESERVED = {
1006 "expires",
1007 "path",
1008 "comment",
1009 "domain",
1010 "max-age",
1011 "secure",
1012 "httponly",
1013 "version",
1014 "samesite",
1015 }
1016
1017 _FLAGS = {"secure", "httponly"}
1018
1019 # Added 'bad' group to catch the remaining value
1020 _COOKIE_PATTERN = re.compile(r"""
1021 \s* # Optional whitespace at start of cookie
1022 (?P<key> # Start of group 'key'
1023 [""" + _LEGAL_KEY_CHARS + r"""]+?# Any word of at least one letter
1024 ) # End of group 'key'
1025 ( # Optional group: there may not be a value.
1026 \s*=\s* # Equal Sign
1027 ( # Start of potential value
1028 (?P<val> # Start of group 'val'
1029 "(?:[^\\"]|\\.)*" # Any doublequoted string
1030 | # or
1031 \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr
1032 | # or
1033 [""" + _LEGAL_VALUE_CHARS + r"""]* # Any word or empty string
1034 ) # End of group 'val'
1035 | # or
1036 (?P<bad>(?:\\;|[^;])*?) # 'bad' group fallback for invalid values
1037 ) # End of potential value
1038 )? # End of optional value group
1039 \s* # Any number of spaces.
1040 (\s+|;|$) # Ending either at space, semicolon, or EOS.
1041 """, re.ASCII | re.VERBOSE)
1042
1043 def load(self, data):
1044 # Workaround for https://github.com/yt-dlp/yt-dlp/issues/4776
1045 if not isinstance(data, str):
1046 return super().load(data)
1047
1048 morsel = None
1049 index = 0
1050 length = len(data)
1051
1052 while 0 <= index < length:
1053 match = self._COOKIE_PATTERN.search(data, index)
1054 if not match:
1055 break
1056
1057 index = match.end(0)
1058 if match.group("bad"):
1059 morsel = None
1060 continue
1061
1062 key, value = match.group("key", "val")
1063
1064 if key[0] == "$":
1065 if morsel is not None:
1066 morsel[key[1:]] = True
1067 continue
1068
1069 lower_key = key.lower()
1070 if lower_key in self._RESERVED:
1071 if morsel is None:
1072 continue
1073
1074 if value is None:
1075 if lower_key not in self._FLAGS:
1076 morsel = None
1077 continue
1078 value = True
1079 else:
1080 value, _ = self.value_decode(value)
1081
1082 morsel[key] = value
1083
1084 elif value is not None:
1085 morsel = self.get(key, http.cookies.Morsel())
1086 real_value, coded_value = self.value_decode(value)
1087 morsel.set(key, real_value, coded_value)
1088 self[key] = morsel
1089
1090 else:
1091 morsel = None