]> jfr.im git - yt-dlp.git/blame - yt_dlp/cookies.py
Update to ytdl-commit-ed5c44e7
[yt-dlp.git] / yt_dlp / cookies.py
CommitLineData
14f25df2 1import base64
f59f5ef8 2import contextlib
54007a45 3import http.cookiejar
982ee69a
MB
4import json
5import os
9bd13fe5 6import re
982ee69a 7import shutil
982ee69a
MB
8import struct
9import subprocess
10import sys
11import tempfile
2e4585da 12import time
982ee69a 13from datetime import datetime, timedelta, timezone
f59f5ef8 14from enum import Enum, auto
982ee69a
MB
15from hashlib import pbkdf2_hmac
16
1d3586d0 17from .aes import (
18 aes_cbc_decrypt_bytes,
19 aes_gcm_decrypt_and_verify_bytes,
20 unpad_pkcs7,
21)
9b8ee23b 22from .dependencies import (
23 _SECRETSTORAGE_UNAVAILABLE_REASON,
24 secretstorage,
25 sqlite3,
26)
97ec5bc5 27from .minicurses import MultilinePrinter, QuietMultilinePrinter
9bd13fe5 28from .utils import Popen, YoutubeDLCookieJar, error_to_str, expand_path, try_call
982ee69a 29
982ee69a
MB
30CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
31SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
32
33
34class YDLLogger:
35 def __init__(self, ydl=None):
36 self._ydl = ydl
37
38 def debug(self, message):
39 if self._ydl:
40 self._ydl.write_debug(message)
41
42 def info(self, message):
43 if self._ydl:
44 self._ydl.to_screen(f'[Cookies] {message}')
45
46 def warning(self, message, only_once=False):
47 if self._ydl:
48 self._ydl.report_warning(message, only_once)
49
50 def error(self, message):
51 if self._ydl:
52 self._ydl.report_error(message)
53
2e4585da 54 class ProgressBar(MultilinePrinter):
55 _DELAY, _timer = 0.1, 0
56
57 def print(self, message):
58 if time.time() - self._timer > self._DELAY:
59 self.print_at_line(f'[Cookies] {message}', 0)
60 self._timer = time.time()
61
97ec5bc5 62 def progress_bar(self):
63 """Return a context manager with a print method. (Optional)"""
64 # Do not print to files/pipes, loggers, or when --no-progress is used
65 if not self._ydl or self._ydl.params.get('noprogress') or self._ydl.params.get('logger'):
66 return
591bb9d3 67 file = self._ydl._out_files.error
97ec5bc5 68 try:
69 if not file.isatty():
70 return
71 except BaseException:
72 return
2e4585da 73 return self.ProgressBar(file, preserve_output=False)
97ec5bc5 74
75
76def _create_progress_bar(logger):
77 if hasattr(logger, 'progress_bar'):
78 printer = logger.progress_bar()
79 if printer:
80 return printer
81 printer = QuietMultilinePrinter()
82 printer.print = lambda _: None
83 return printer
84
982ee69a
MB
85
86def load_cookies(cookie_file, browser_specification, ydl):
87 cookie_jars = []
88 if browser_specification is not None:
9bd13fe5 89 browser_name, profile, keyring, container = _parse_browser_specification(*browser_specification)
90 cookie_jars.append(
91 extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring, container=container))
982ee69a
MB
92
93 if cookie_file is not None:
d76fa1f3 94 is_filename = YoutubeDLCookieJar.is_path(cookie_file)
95 if is_filename:
96 cookie_file = expand_path(cookie_file)
97
982ee69a 98 jar = YoutubeDLCookieJar(cookie_file)
d76fa1f3 99 if not is_filename or os.access(cookie_file, os.R_OK):
982ee69a
MB
100 jar.load(ignore_discard=True, ignore_expires=True)
101 cookie_jars.append(jar)
102
103 return _merge_cookie_jars(cookie_jars)
104
105
9bd13fe5 106def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None, container=None):
982ee69a 107 if browser_name == 'firefox':
9bd13fe5 108 return _extract_firefox_cookies(profile, container, logger)
982ee69a
MB
109 elif browser_name == 'safari':
110 return _extract_safari_cookies(profile, logger)
111 elif browser_name in CHROMIUM_BASED_BROWSERS:
f59f5ef8 112 return _extract_chrome_cookies(browser_name, profile, keyring, logger)
982ee69a 113 else:
86e5f3ed 114 raise ValueError(f'unknown browser: {browser_name}')
982ee69a
MB
115
116
9bd13fe5 117def _extract_firefox_cookies(profile, container, logger):
982ee69a 118 logger.info('Extracting cookies from firefox')
9b8ee23b 119 if not sqlite3:
767b02a9
MB
120 logger.warning('Cannot extract cookies from firefox without sqlite3 support. '
121 'Please use a python interpreter compiled with sqlite3 support')
122 return YoutubeDLCookieJar()
982ee69a
MB
123
124 if profile is None:
125 search_root = _firefox_browser_dir()
126 elif _is_path(profile):
127 search_root = profile
128 else:
129 search_root = os.path.join(_firefox_browser_dir(), profile)
130
825d3ce3 131 cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite', logger)
132 if cookie_database_path is None:
133 raise FileNotFoundError(f'could not find firefox cookies database in {search_root}')
134 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
135
9bd13fe5 136 container_id = None
825d3ce3 137 if container not in (None, 'none'):
138 containers_path = os.path.join(os.path.dirname(cookie_database_path), 'containers.json')
9bd13fe5 139 if not os.path.isfile(containers_path) or not os.access(containers_path, os.R_OK):
140 raise FileNotFoundError(f'could not read containers.json in {search_root}')
141 with open(containers_path, 'r') as containers:
142 identities = json.load(containers).get('identities', [])
143 container_id = next((context.get('userContextId') for context in identities if container in (
144 context.get('name'),
145 try_call(lambda: re.fullmatch(r'userContext([^\.]+)\.label', context['l10nID']).group())
146 )), None)
147 if not isinstance(container_id, int):
148 raise ValueError(f'could not find firefox container "{container}" in containers.json')
149
0930b11f 150 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
982ee69a
MB
151 cursor = None
152 try:
153 cursor = _open_database_copy(cookie_database_path, tmpdir)
9bd13fe5 154 if isinstance(container_id, int):
9bd13fe5 155 logger.debug(
156 f'Only loading cookies from firefox container "{container}", ID {container_id}')
9bd13fe5 157 cursor.execute(
825d3ce3 158 'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE originAttributes LIKE ? OR originAttributes LIKE ?',
159 (f'%userContextId={container_id}', f'%userContextId={container_id}&%'))
160 elif container == 'none':
161 logger.debug('Only loading cookies not belonging to any container')
162 cursor.execute(
163 'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE NOT INSTR(originAttributes,"userContextId=")')
164 else:
9bd13fe5 165 cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
982ee69a 166 jar = YoutubeDLCookieJar()
97ec5bc5 167 with _create_progress_bar(logger) as progress_bar:
168 table = cursor.fetchall()
169 total_cookie_count = len(table)
170 for i, (host, name, value, path, expiry, is_secure) in enumerate(table):
171 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
ac668111 172 cookie = http.cookiejar.Cookie(
97ec5bc5 173 version=0, name=name, value=value, port=None, port_specified=False,
174 domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'),
175 path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False,
176 comment=None, comment_url=None, rest={})
177 jar.set_cookie(cookie)
86e5f3ed 178 logger.info(f'Extracted {len(jar)} cookies from firefox')
982ee69a
MB
179 return jar
180 finally:
181 if cursor is not None:
182 cursor.connection.close()
183
184
185def _firefox_browser_dir():
dec30912 186 if sys.platform in ('cygwin', 'win32'):
19a03940 187 return os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles')
982ee69a
MB
188 elif sys.platform == 'darwin':
189 return os.path.expanduser('~/Library/Application Support/Firefox')
dec30912 190 return os.path.expanduser('~/.mozilla/firefox')
982ee69a
MB
191
192
193def _get_chromium_based_browser_settings(browser_name):
194 # https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md
dec30912 195 if sys.platform in ('cygwin', 'win32'):
982ee69a
MB
196 appdata_local = os.path.expandvars('%LOCALAPPDATA%')
197 appdata_roaming = os.path.expandvars('%APPDATA%')
198 browser_dir = {
19a03940 199 'brave': os.path.join(appdata_local, R'BraveSoftware\Brave-Browser\User Data'),
200 'chrome': os.path.join(appdata_local, R'Google\Chrome\User Data'),
201 'chromium': os.path.join(appdata_local, R'Chromium\User Data'),
202 'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
203 'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
204 'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
982ee69a
MB
205 }[browser_name]
206
207 elif sys.platform == 'darwin':
208 appdata = os.path.expanduser('~/Library/Application Support')
209 browser_dir = {
210 'brave': os.path.join(appdata, 'BraveSoftware/Brave-Browser'),
211 'chrome': os.path.join(appdata, 'Google/Chrome'),
212 'chromium': os.path.join(appdata, 'Chromium'),
213 'edge': os.path.join(appdata, 'Microsoft Edge'),
214 'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
215 'vivaldi': os.path.join(appdata, 'Vivaldi'),
216 }[browser_name]
217
218 else:
dec30912
CMT
219 config = _config_home()
220 browser_dir = {
221 'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'),
222 'chrome': os.path.join(config, 'google-chrome'),
223 'chromium': os.path.join(config, 'chromium'),
224 'edge': os.path.join(config, 'microsoft-edge'),
225 'opera': os.path.join(config, 'opera'),
226 'vivaldi': os.path.join(config, 'vivaldi'),
227 }[browser_name]
982ee69a
MB
228
229 # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
230 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
231 keyring_name = {
232 'brave': 'Brave',
233 'chrome': 'Chrome',
234 'chromium': 'Chromium',
29b208f6 235 'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
982ee69a
MB
236 'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
237 'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
238 }[browser_name]
239
240 browsers_without_profiles = {'opera'}
241
242 return {
243 'browser_dir': browser_dir,
244 'keyring_name': keyring_name,
245 'supports_profiles': browser_name not in browsers_without_profiles
246 }
247
248
f59f5ef8 249def _extract_chrome_cookies(browser_name, profile, keyring, logger):
86e5f3ed 250 logger.info(f'Extracting cookies from {browser_name}')
767b02a9 251
9b8ee23b 252 if not sqlite3:
19a03940 253 logger.warning(f'Cannot extract cookies from {browser_name} without sqlite3 support. '
254 'Please use a python interpreter compiled with sqlite3 support')
767b02a9
MB
255 return YoutubeDLCookieJar()
256
982ee69a
MB
257 config = _get_chromium_based_browser_settings(browser_name)
258
259 if profile is None:
260 search_root = config['browser_dir']
261 elif _is_path(profile):
262 search_root = profile
263 config['browser_dir'] = os.path.dirname(profile) if config['supports_profiles'] else profile
264 else:
265 if config['supports_profiles']:
266 search_root = os.path.join(config['browser_dir'], profile)
267 else:
86e5f3ed 268 logger.error(f'{browser_name} does not support profiles')
982ee69a
MB
269 search_root = config['browser_dir']
270
97ec5bc5 271 cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies', logger)
982ee69a 272 if cookie_database_path is None:
86e5f3ed 273 raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"')
274 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
982ee69a 275
f59f5ef8 276 decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring)
982ee69a 277
0930b11f 278 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
982ee69a
MB
279 cursor = None
280 try:
281 cursor = _open_database_copy(cookie_database_path, tmpdir)
282 cursor.connection.text_factory = bytes
283 column_names = _get_column_names(cursor, 'cookies')
284 secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
19a03940 285 cursor.execute(f'SELECT host_key, name, value, encrypted_value, path, expires_utc, {secure_column} FROM cookies')
982ee69a
MB
286 jar = YoutubeDLCookieJar()
287 failed_cookies = 0
f59f5ef8 288 unencrypted_cookies = 0
97ec5bc5 289 with _create_progress_bar(logger) as progress_bar:
290 table = cursor.fetchall()
291 total_cookie_count = len(table)
292 for i, line in enumerate(table):
293 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
294 is_encrypted, cookie = _process_chrome_cookie(decryptor, *line)
295 if not cookie:
982ee69a
MB
296 failed_cookies += 1
297 continue
97ec5bc5 298 elif not is_encrypted:
299 unencrypted_cookies += 1
300 jar.set_cookie(cookie)
982ee69a 301 if failed_cookies > 0:
86e5f3ed 302 failed_message = f' ({failed_cookies} could not be decrypted)'
982ee69a
MB
303 else:
304 failed_message = ''
86e5f3ed 305 logger.info(f'Extracted {len(jar)} cookies from {browser_name}{failed_message}')
24146491 306 counts = decryptor._cookie_counts.copy()
f59f5ef8 307 counts['unencrypted'] = unencrypted_cookies
86e5f3ed 308 logger.debug(f'cookie version breakdown: {counts}')
982ee69a
MB
309 return jar
310 finally:
311 if cursor is not None:
312 cursor.connection.close()
313
314
97ec5bc5 315def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, path, expires_utc, is_secure):
0f06bcd7 316 host_key = host_key.decode()
317 name = name.decode()
318 value = value.decode()
319 path = path.decode()
97ec5bc5 320 is_encrypted = not value and encrypted_value
321
322 if is_encrypted:
323 value = decryptor.decrypt(encrypted_value)
324 if value is None:
325 return is_encrypted, None
326
ac668111 327 return is_encrypted, http.cookiejar.Cookie(
97ec5bc5 328 version=0, name=name, value=value, port=None, port_specified=False,
329 domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
330 path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False,
331 comment=None, comment_url=None, rest={})
332
333
982ee69a
MB
334class ChromeCookieDecryptor:
335 """
336 Overview:
337
338 Linux:
339 - cookies are either v10 or v11
340 - v10: AES-CBC encrypted with a fixed key
341 - v11: AES-CBC encrypted with an OS protected key (keyring)
342 - v11 keys can be stored in various places depending on the activate desktop environment [2]
343
344 Mac:
345 - cookies are either v10 or not v10
346 - v10: AES-CBC encrypted with an OS protected key (keyring) and more key derivation iterations than linux
347 - not v10: 'old data' stored as plaintext
348
349 Windows:
350 - cookies are either v10 or not v10
351 - v10: AES-GCM encrypted with a key which is encrypted with DPAPI
352 - not v10: encrypted with DPAPI
353
354 Sources:
355 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/
356 - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_linux.cc
357 - KeyStorageLinux::CreateService
358 """
359
0fa7d2c8 360 _cookie_counts = {}
982ee69a 361
0fa7d2c8 362 def decrypt(self, encrypted_value):
19a03940 363 raise NotImplementedError('Must be implemented by sub classes')
f59f5ef8 364
982ee69a 365
f59f5ef8 366def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None):
56ba69e4 367 if sys.platform == 'darwin':
982ee69a 368 return MacChromeCookieDecryptor(browser_keyring_name, logger)
56ba69e4 369 elif sys.platform in ('win32', 'cygwin'):
982ee69a 370 return WindowsChromeCookieDecryptor(browser_root, logger)
56ba69e4 371 return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring)
982ee69a
MB
372
373
374class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
f59f5ef8 375 def __init__(self, browser_keyring_name, logger, *, keyring=None):
982ee69a
MB
376 self._logger = logger
377 self._v10_key = self.derive_key(b'peanuts')
f59f5ef8
MB
378 password = _get_linux_keyring_password(browser_keyring_name, keyring, logger)
379 self._v11_key = None if password is None else self.derive_key(password)
380 self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
982ee69a
MB
381
382 @staticmethod
383 def derive_key(password):
384 # values from
385 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_linux.cc
386 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16)
387
388 def decrypt(self, encrypted_value):
389 version = encrypted_value[:3]
390 ciphertext = encrypted_value[3:]
391
392 if version == b'v10':
f59f5ef8 393 self._cookie_counts['v10'] += 1
982ee69a
MB
394 return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
395
396 elif version == b'v11':
f59f5ef8 397 self._cookie_counts['v11'] += 1
982ee69a 398 if self._v11_key is None:
f59f5ef8 399 self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
982ee69a
MB
400 return None
401 return _decrypt_aes_cbc(ciphertext, self._v11_key, self._logger)
402
403 else:
f59f5ef8 404 self._cookie_counts['other'] += 1
982ee69a
MB
405 return None
406
407
408class MacChromeCookieDecryptor(ChromeCookieDecryptor):
409 def __init__(self, browser_keyring_name, logger):
410 self._logger = logger
f440b14f 411 password = _get_mac_keyring_password(browser_keyring_name, logger)
982ee69a 412 self._v10_key = None if password is None else self.derive_key(password)
f59f5ef8 413 self._cookie_counts = {'v10': 0, 'other': 0}
982ee69a
MB
414
415 @staticmethod
416 def derive_key(password):
417 # values from
418 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
419 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16)
420
421 def decrypt(self, encrypted_value):
422 version = encrypted_value[:3]
423 ciphertext = encrypted_value[3:]
424
425 if version == b'v10':
f59f5ef8 426 self._cookie_counts['v10'] += 1
982ee69a
MB
427 if self._v10_key is None:
428 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
429 return None
430
431 return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
432
433 else:
f59f5ef8 434 self._cookie_counts['other'] += 1
982ee69a
MB
435 # other prefixes are considered 'old data' which were stored as plaintext
436 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
437 return encrypted_value
438
439
440class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
441 def __init__(self, browser_root, logger):
442 self._logger = logger
443 self._v10_key = _get_windows_v10_key(browser_root, logger)
f59f5ef8
MB
444 self._cookie_counts = {'v10': 0, 'other': 0}
445
982ee69a
MB
446 def decrypt(self, encrypted_value):
447 version = encrypted_value[:3]
448 ciphertext = encrypted_value[3:]
449
450 if version == b'v10':
f59f5ef8 451 self._cookie_counts['v10'] += 1
982ee69a
MB
452 if self._v10_key is None:
453 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
454 return None
982ee69a
MB
455
456 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
457 # kNonceLength
458 nonce_length = 96 // 8
459 # boringssl
460 # EVP_AEAD_AES_GCM_TAG_LEN
461 authentication_tag_length = 16
462
463 raw_ciphertext = ciphertext
464 nonce = raw_ciphertext[:nonce_length]
465 ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length]
466 authentication_tag = raw_ciphertext[-authentication_tag_length:]
467
468 return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger)
469
470 else:
f59f5ef8 471 self._cookie_counts['other'] += 1
982ee69a
MB
472 # any other prefix means the data is DPAPI encrypted
473 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
0f06bcd7 474 return _decrypt_windows_dpapi(encrypted_value, self._logger).decode()
982ee69a
MB
475
476
477def _extract_safari_cookies(profile, logger):
478 if profile is not None:
479 logger.error('safari does not support profiles')
480 if sys.platform != 'darwin':
86e5f3ed 481 raise ValueError(f'unsupported platform: {sys.platform}')
982ee69a
MB
482
483 cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')
484
485 if not os.path.isfile(cookies_path):
1f7db853
MP
486 logger.debug('Trying secondary cookie location')
487 cookies_path = os.path.expanduser('~/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies')
488 if not os.path.isfile(cookies_path):
489 raise FileNotFoundError('could not find safari cookies database')
982ee69a
MB
490
491 with open(cookies_path, 'rb') as f:
492 cookies_data = f.read()
493
494 jar = parse_safari_cookies(cookies_data, logger=logger)
86e5f3ed 495 logger.info(f'Extracted {len(jar)} cookies from safari')
982ee69a
MB
496 return jar
497
498
499class ParserError(Exception):
500 pass
501
502
503class DataParser:
504 def __init__(self, data, logger):
505 self._data = data
506 self.cursor = 0
507 self._logger = logger
508
509 def read_bytes(self, num_bytes):
510 if num_bytes < 0:
86e5f3ed 511 raise ParserError(f'invalid read of {num_bytes} bytes')
982ee69a
MB
512 end = self.cursor + num_bytes
513 if end > len(self._data):
514 raise ParserError('reached end of input')
515 data = self._data[self.cursor:end]
516 self.cursor = end
517 return data
518
519 def expect_bytes(self, expected_value, message):
520 value = self.read_bytes(len(expected_value))
521 if value != expected_value:
86e5f3ed 522 raise ParserError(f'unexpected value: {value} != {expected_value} ({message})')
982ee69a
MB
523
524 def read_uint(self, big_endian=False):
525 data_format = '>I' if big_endian else '<I'
526 return struct.unpack(data_format, self.read_bytes(4))[0]
527
528 def read_double(self, big_endian=False):
529 data_format = '>d' if big_endian else '<d'
530 return struct.unpack(data_format, self.read_bytes(8))[0]
531
532 def read_cstring(self):
533 buffer = []
534 while True:
535 c = self.read_bytes(1)
536 if c == b'\x00':
0f06bcd7 537 return b''.join(buffer).decode()
982ee69a
MB
538 else:
539 buffer.append(c)
540
541 def skip(self, num_bytes, description='unknown'):
542 if num_bytes > 0:
19a03940 543 self._logger.debug(f'skipping {num_bytes} bytes ({description}): {self.read_bytes(num_bytes)!r}')
982ee69a 544 elif num_bytes < 0:
86e5f3ed 545 raise ParserError(f'invalid skip of {num_bytes} bytes')
982ee69a
MB
546
547 def skip_to(self, offset, description='unknown'):
548 self.skip(offset - self.cursor, description)
549
550 def skip_to_end(self, description='unknown'):
551 self.skip_to(len(self._data), description)
552
553
554def _mac_absolute_time_to_posix(timestamp):
555 return int((datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc) + timedelta(seconds=timestamp)).timestamp())
556
557
558def _parse_safari_cookies_header(data, logger):
559 p = DataParser(data, logger)
560 p.expect_bytes(b'cook', 'database signature')
561 number_of_pages = p.read_uint(big_endian=True)
562 page_sizes = [p.read_uint(big_endian=True) for _ in range(number_of_pages)]
563 return page_sizes, p.cursor
564
565
566def _parse_safari_cookies_page(data, jar, logger):
567 p = DataParser(data, logger)
568 p.expect_bytes(b'\x00\x00\x01\x00', 'page signature')
569 number_of_cookies = p.read_uint()
570 record_offsets = [p.read_uint() for _ in range(number_of_cookies)]
571 if number_of_cookies == 0:
86e5f3ed 572 logger.debug(f'a cookies page of size {len(data)} has no cookies')
982ee69a
MB
573 return
574
575 p.skip_to(record_offsets[0], 'unknown page header field')
576
97ec5bc5 577 with _create_progress_bar(logger) as progress_bar:
578 for i, record_offset in enumerate(record_offsets):
579 progress_bar.print(f'Loading cookie {i: 6d}/{number_of_cookies: 6d}')
580 p.skip_to(record_offset, 'space between records')
581 record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger)
582 p.read_bytes(record_length)
982ee69a
MB
583 p.skip_to_end('space in between pages')
584
585
586def _parse_safari_cookies_record(data, jar, logger):
587 p = DataParser(data, logger)
588 record_size = p.read_uint()
589 p.skip(4, 'unknown record field 1')
590 flags = p.read_uint()
591 is_secure = bool(flags & 0x0001)
592 p.skip(4, 'unknown record field 2')
593 domain_offset = p.read_uint()
594 name_offset = p.read_uint()
595 path_offset = p.read_uint()
596 value_offset = p.read_uint()
597 p.skip(8, 'unknown record field 3')
598 expiration_date = _mac_absolute_time_to_posix(p.read_double())
599 _creation_date = _mac_absolute_time_to_posix(p.read_double()) # noqa: F841
600
601 try:
602 p.skip_to(domain_offset)
603 domain = p.read_cstring()
604
605 p.skip_to(name_offset)
606 name = p.read_cstring()
607
608 p.skip_to(path_offset)
609 path = p.read_cstring()
610
611 p.skip_to(value_offset)
612 value = p.read_cstring()
613 except UnicodeDecodeError:
f440b14f 614 logger.warning('failed to parse Safari cookie because UTF-8 decoding failed', only_once=True)
982ee69a
MB
615 return record_size
616
617 p.skip_to(record_size, 'space at the end of the record')
618
ac668111 619 cookie = http.cookiejar.Cookie(
982ee69a
MB
620 version=0, name=name, value=value, port=None, port_specified=False,
621 domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'),
622 path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False,
623 comment=None, comment_url=None, rest={})
624 jar.set_cookie(cookie)
625 return record_size
626
627
628def parse_safari_cookies(data, jar=None, logger=YDLLogger()):
629 """
630 References:
631 - https://github.com/libyal/dtformats/blob/main/documentation/Safari%20Cookies.asciidoc
632 - this data appears to be out of date but the important parts of the database structure is the same
633 - there are a few bytes here and there which are skipped during parsing
634 """
635 if jar is None:
636 jar = YoutubeDLCookieJar()
637 page_sizes, body_start = _parse_safari_cookies_header(data, logger)
638 p = DataParser(data[body_start:], logger)
639 for page_size in page_sizes:
640 _parse_safari_cookies_page(p.read_bytes(page_size), jar, logger)
641 p.skip_to_end('footer')
642 return jar
643
644
f59f5ef8
MB
645class _LinuxDesktopEnvironment(Enum):
646 """
647 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.h
648 DesktopEnvironment
649 """
650 OTHER = auto()
651 CINNAMON = auto()
652 GNOME = auto()
653 KDE = auto()
654 PANTHEON = auto()
655 UNITY = auto()
656 XFCE = auto()
982ee69a
MB
657
658
f59f5ef8
MB
659class _LinuxKeyring(Enum):
660 """
661 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.h
662 SelectedLinuxBackend
663 """
664 KWALLET = auto()
665 GNOMEKEYRING = auto()
666 BASICTEXT = auto()
667
668
669SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys()
670
671
672def _get_linux_desktop_environment(env):
673 """
674 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc
675 GetDesktopEnvironment
676 """
677 xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
678 desktop_session = env.get('DESKTOP_SESSION', None)
679 if xdg_current_desktop is not None:
680 xdg_current_desktop = xdg_current_desktop.split(':')[0].strip()
681
682 if xdg_current_desktop == 'Unity':
683 if desktop_session is not None and 'gnome-fallback' in desktop_session:
684 return _LinuxDesktopEnvironment.GNOME
685 else:
686 return _LinuxDesktopEnvironment.UNITY
687 elif xdg_current_desktop == 'GNOME':
688 return _LinuxDesktopEnvironment.GNOME
689 elif xdg_current_desktop == 'X-Cinnamon':
690 return _LinuxDesktopEnvironment.CINNAMON
691 elif xdg_current_desktop == 'KDE':
692 return _LinuxDesktopEnvironment.KDE
693 elif xdg_current_desktop == 'Pantheon':
694 return _LinuxDesktopEnvironment.PANTHEON
695 elif xdg_current_desktop == 'XFCE':
696 return _LinuxDesktopEnvironment.XFCE
697 elif desktop_session is not None:
698 if desktop_session in ('mate', 'gnome'):
699 return _LinuxDesktopEnvironment.GNOME
700 elif 'kde' in desktop_session:
701 return _LinuxDesktopEnvironment.KDE
702 elif 'xfce' in desktop_session:
703 return _LinuxDesktopEnvironment.XFCE
704 else:
705 if 'GNOME_DESKTOP_SESSION_ID' in env:
706 return _LinuxDesktopEnvironment.GNOME
707 elif 'KDE_FULL_SESSION' in env:
708 return _LinuxDesktopEnvironment.KDE
fa8fd951 709 return _LinuxDesktopEnvironment.OTHER
f59f5ef8
MB
710
711
712def _choose_linux_keyring(logger):
713 """
714 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.cc
715 SelectBackend
716 """
717 desktop_environment = _get_linux_desktop_environment(os.environ)
86e5f3ed 718 logger.debug(f'detected desktop environment: {desktop_environment.name}')
f59f5ef8
MB
719 if desktop_environment == _LinuxDesktopEnvironment.KDE:
720 linux_keyring = _LinuxKeyring.KWALLET
721 elif desktop_environment == _LinuxDesktopEnvironment.OTHER:
722 linux_keyring = _LinuxKeyring.BASICTEXT
982ee69a 723 else:
f59f5ef8
MB
724 linux_keyring = _LinuxKeyring.GNOMEKEYRING
725 return linux_keyring
726
727
728def _get_kwallet_network_wallet(logger):
729 """ The name of the wallet used to store network passwords.
730
731 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/kwallet_dbus.cc
732 KWalletDBus::NetworkWallet
733 which does a dbus call to the following function:
734 https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html
735 Wallet::NetworkWallet
736 """
737 default_wallet = 'kdewallet'
738 try:
f0c9fb96 739 stdout, _, returncode = Popen.run([
f59f5ef8
MB
740 'dbus-send', '--session', '--print-reply=literal',
741 '--dest=org.kde.kwalletd5',
742 '/modules/kwalletd5',
743 'org.kde.KWallet.networkWallet'
f0c9fb96 744 ], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
f59f5ef8 745
f0c9fb96 746 if returncode:
f59f5ef8
MB
747 logger.warning('failed to read NetworkWallet')
748 return default_wallet
749 else:
f0c9fb96 750 logger.debug(f'NetworkWallet = "{stdout.strip()}"')
751 return stdout.strip()
a44ca5a4 752 except Exception as e:
86e5f3ed 753 logger.warning(f'exception while obtaining NetworkWallet: {e}')
f59f5ef8
MB
754 return default_wallet
755
756
757def _get_kwallet_password(browser_keyring_name, logger):
758 logger.debug('using kwallet-query to obtain password from kwallet')
759
760 if shutil.which('kwallet-query') is None:
761 logger.error('kwallet-query command not found. KWallet and kwallet-query '
762 'must be installed to read from KWallet. kwallet-query should be'
763 'included in the kwallet package for your distribution')
764 return b''
765
766 network_wallet = _get_kwallet_network_wallet(logger)
767
768 try:
f0c9fb96 769 stdout, _, returncode = Popen.run([
f59f5ef8 770 'kwallet-query',
86e5f3ed 771 '--read-password', f'{browser_keyring_name} Safe Storage',
772 '--folder', f'{browser_keyring_name} Keys',
f59f5ef8
MB
773 network_wallet
774 ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
775
f0c9fb96 776 if returncode:
777 logger.error(f'kwallet-query failed with return code {returncode}. '
778 'Please consult the kwallet-query man page for details')
f59f5ef8
MB
779 return b''
780 else:
781 if stdout.lower().startswith(b'failed to read'):
782 logger.debug('failed to read password from kwallet. Using empty string instead')
783 # this sometimes occurs in KDE because chrome does not check hasEntry and instead
784 # just tries to read the value (which kwallet returns "") whereas kwallet-query
785 # checks hasEntry. To verify this:
786 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
787 # while starting chrome.
788 # this may be a bug as the intended behaviour is to generate a random password and store
789 # it, but that doesn't matter here.
790 return b''
791 else:
792 logger.debug('password found')
f0c9fb96 793 return stdout.rstrip(b'\n')
a44ca5a4 794 except Exception as e:
795 logger.warning(f'exception running kwallet-query: {error_to_str(e)}')
f59f5ef8
MB
796 return b''
797
798
799def _get_gnome_keyring_password(browser_keyring_name, logger):
9b8ee23b 800 if not secretstorage:
801 logger.error(f'secretstorage not available {_SECRETSTORAGE_UNAVAILABLE_REASON}')
f59f5ef8
MB
802 return b''
803 # the Gnome keyring does not seem to organise keys in the same way as KWallet,
804 # using `dbus-monitor` during startup, it can be observed that chromium lists all keys
805 # and presumably searches for its key in the list. It appears that we must do the same.
806 # https://github.com/jaraco/keyring/issues/556
807 with contextlib.closing(secretstorage.dbus_init()) as con:
808 col = secretstorage.get_default_collection(con)
809 for item in col.get_all_items():
86e5f3ed 810 if item.get_label() == f'{browser_keyring_name} Safe Storage':
f59f5ef8
MB
811 return item.get_secret()
812 else:
813 logger.error('failed to read from keyring')
814 return b''
815
816
817def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
818 # note: chrome/chromium can be run with the following flags to determine which keyring backend
819 # it has chosen to use
820 # chromium --enable-logging=stderr --v=1 2>&1 | grep key_storage_
821 # Chromium supports a flag: --password-store=<basic|gnome|kwallet> so the automatic detection
822 # will not be sufficient in all cases.
823
2c539d49 824 keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger)
f59f5ef8
MB
825 logger.debug(f'Chosen keyring: {keyring.name}')
826
827 if keyring == _LinuxKeyring.KWALLET:
828 return _get_kwallet_password(browser_keyring_name, logger)
829 elif keyring == _LinuxKeyring.GNOMEKEYRING:
830 return _get_gnome_keyring_password(browser_keyring_name, logger)
831 elif keyring == _LinuxKeyring.BASICTEXT:
832 # when basic text is chosen, all cookies are stored as v10 (so no keyring password is required)
833 return None
834 assert False, f'Unknown keyring {keyring}'
835
836
837def _get_mac_keyring_password(browser_keyring_name, logger):
838 logger.debug('using find-generic-password to obtain password from OSX keychain')
839 try:
f0c9fb96 840 stdout, _, _ = Popen.run(
d3c93ec2 841 ['security', 'find-generic-password',
842 '-w', # write password to stdout
843 '-a', browser_keyring_name, # match 'account'
86e5f3ed 844 '-s', f'{browser_keyring_name} Safe Storage'], # match 'service'
d3c93ec2 845 stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
f0c9fb96 846 return stdout.rstrip(b'\n')
a44ca5a4 847 except Exception as e:
848 logger.warning(f'exception running find-generic-password: {error_to_str(e)}')
f59f5ef8 849 return None
982ee69a
MB
850
851
852def _get_windows_v10_key(browser_root, logger):
97ec5bc5 853 path = _find_most_recently_used_file(browser_root, 'Local State', logger)
982ee69a
MB
854 if path is None:
855 logger.error('could not find local state file')
856 return None
97ec5bc5 857 logger.debug(f'Found local state file at "{path}"')
86e5f3ed 858 with open(path, encoding='utf8') as f:
982ee69a
MB
859 data = json.load(f)
860 try:
861 base64_key = data['os_crypt']['encrypted_key']
862 except KeyError:
863 logger.error('no encrypted key in Local State')
864 return None
14f25df2 865 encrypted_key = base64.b64decode(base64_key)
982ee69a
MB
866 prefix = b'DPAPI'
867 if not encrypted_key.startswith(prefix):
868 logger.error('invalid key')
869 return None
870 return _decrypt_windows_dpapi(encrypted_key[len(prefix):], logger)
871
872
873def pbkdf2_sha1(password, salt, iterations, key_length):
874 return pbkdf2_hmac('sha1', password, salt, iterations, key_length)
875
876
877def _decrypt_aes_cbc(ciphertext, key, logger, initialization_vector=b' ' * 16):
1d3586d0 878 plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
982ee69a 879 try:
0f06bcd7 880 return plaintext.decode()
982ee69a 881 except UnicodeDecodeError:
f440b14f 882 logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
883 return None
884
885
886def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
982ee69a 887 try:
09906f55 888 plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce)
982ee69a 889 except ValueError:
f440b14f 890 logger.warning('failed to decrypt cookie (AES-GCM) because the MAC check failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
891 return None
892
893 try:
0f06bcd7 894 return plaintext.decode()
982ee69a 895 except UnicodeDecodeError:
f440b14f 896 logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
897 return None
898
899
900def _decrypt_windows_dpapi(ciphertext, logger):
901 """
902 References:
903 - https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata
904 """
fe0918bb 905
906 import ctypes
907 import ctypes.wintypes
982ee69a
MB
908
909 class DATA_BLOB(ctypes.Structure):
fe0918bb 910 _fields_ = [('cbData', ctypes.wintypes.DWORD),
982ee69a
MB
911 ('pbData', ctypes.POINTER(ctypes.c_char))]
912
913 buffer = ctypes.create_string_buffer(ciphertext)
914 blob_in = DATA_BLOB(ctypes.sizeof(buffer), buffer)
915 blob_out = DATA_BLOB()
916 ret = ctypes.windll.crypt32.CryptUnprotectData(
917 ctypes.byref(blob_in), # pDataIn
918 None, # ppszDataDescr: human readable description of pDataIn
919 None, # pOptionalEntropy: salt?
920 None, # pvReserved: must be NULL
921 None, # pPromptStruct: information about prompts to display
922 0, # dwFlags
923 ctypes.byref(blob_out) # pDataOut
924 )
925 if not ret:
f9be9cb9 926 logger.warning('failed to decrypt with DPAPI', only_once=True)
982ee69a
MB
927 return None
928
929 result = ctypes.string_at(blob_out.pbData, blob_out.cbData)
930 ctypes.windll.kernel32.LocalFree(blob_out.pbData)
931 return result
932
933
934def _config_home():
935 return os.environ.get('XDG_CONFIG_HOME', os.path.expanduser('~/.config'))
936
937
938def _open_database_copy(database_path, tmpdir):
939 # cannot open sqlite databases if they are already in use (e.g. by the browser)
940 database_copy_path = os.path.join(tmpdir, 'temporary.sqlite')
941 shutil.copy(database_path, database_copy_path)
942 conn = sqlite3.connect(database_copy_path)
943 return conn.cursor()
944
945
946def _get_column_names(cursor, table_name):
86e5f3ed 947 table_info = cursor.execute(f'PRAGMA table_info({table_name})').fetchall()
0f06bcd7 948 return [row[1].decode() for row in table_info]
982ee69a
MB
949
950
97ec5bc5 951def _find_most_recently_used_file(root, filename, logger):
982ee69a 952 # if there are multiple browser profiles, take the most recently used one
97ec5bc5 953 i, paths = 0, []
954 with _create_progress_bar(logger) as progress_bar:
955 for curr_root, dirs, files in os.walk(root):
956 for file in files:
957 i += 1
958 progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched')
959 if file == filename:
960 paths.append(os.path.join(curr_root, file))
982ee69a
MB
961 return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime)
962
963
964def _merge_cookie_jars(jars):
965 output_jar = YoutubeDLCookieJar()
966 for jar in jars:
967 for cookie in jar:
968 output_jar.set_cookie(cookie)
969 if jar.filename is not None:
970 output_jar.filename = jar.filename
971 return output_jar
972
973
974def _is_path(value):
975 return os.path.sep in value
976
977
9bd13fe5 978def _parse_browser_specification(browser_name, profile=None, keyring=None, container=None):
982ee69a
MB
979 if browser_name not in SUPPORTED_BROWSERS:
980 raise ValueError(f'unsupported browser: "{browser_name}"')
f59f5ef8
MB
981 if keyring not in (None, *SUPPORTED_KEYRINGS):
982 raise ValueError(f'unsupported keyring: "{keyring}"')
982ee69a
MB
983 if profile is not None and _is_path(profile):
984 profile = os.path.expanduser(profile)
9bd13fe5 985 return browser_name, profile, keyring, container