]> jfr.im git - yt-dlp.git/blame - yt_dlp/cookies.py
Support environment variables in `--ffmpeg-location`
[yt-dlp.git] / yt_dlp / cookies.py
CommitLineData
14f25df2 1import base64
f59f5ef8 2import contextlib
54007a45 3import http.cookiejar
982ee69a
MB
4import json
5import os
9bd13fe5 6import re
982ee69a 7import shutil
982ee69a
MB
8import struct
9import subprocess
10import sys
11import tempfile
2e4585da 12import time
982ee69a 13from datetime import datetime, timedelta, timezone
f59f5ef8 14from enum import Enum, auto
982ee69a
MB
15from hashlib import pbkdf2_hmac
16
1d3586d0 17from .aes import (
18 aes_cbc_decrypt_bytes,
19 aes_gcm_decrypt_and_verify_bytes,
20 unpad_pkcs7,
21)
9b8ee23b 22from .dependencies import (
23 _SECRETSTORAGE_UNAVAILABLE_REASON,
24 secretstorage,
25 sqlite3,
26)
97ec5bc5 27from .minicurses import MultilinePrinter, QuietMultilinePrinter
d2c8aadf 28from .utils import (
29 Popen,
30 YoutubeDLCookieJar,
31 error_to_str,
32 expand_path,
22df97f9 33 is_path_like,
d2c8aadf 34 try_call,
35)
982ee69a 36
982ee69a
MB
37CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
38SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
39
40
41class YDLLogger:
42 def __init__(self, ydl=None):
43 self._ydl = ydl
44
45 def debug(self, message):
46 if self._ydl:
47 self._ydl.write_debug(message)
48
49 def info(self, message):
50 if self._ydl:
51 self._ydl.to_screen(f'[Cookies] {message}')
52
53 def warning(self, message, only_once=False):
54 if self._ydl:
55 self._ydl.report_warning(message, only_once)
56
57 def error(self, message):
58 if self._ydl:
59 self._ydl.report_error(message)
60
2e4585da 61 class ProgressBar(MultilinePrinter):
62 _DELAY, _timer = 0.1, 0
63
64 def print(self, message):
65 if time.time() - self._timer > self._DELAY:
66 self.print_at_line(f'[Cookies] {message}', 0)
67 self._timer = time.time()
68
97ec5bc5 69 def progress_bar(self):
70 """Return a context manager with a print method. (Optional)"""
71 # Do not print to files/pipes, loggers, or when --no-progress is used
72 if not self._ydl or self._ydl.params.get('noprogress') or self._ydl.params.get('logger'):
73 return
591bb9d3 74 file = self._ydl._out_files.error
97ec5bc5 75 try:
76 if not file.isatty():
77 return
78 except BaseException:
79 return
2e4585da 80 return self.ProgressBar(file, preserve_output=False)
97ec5bc5 81
82
83def _create_progress_bar(logger):
84 if hasattr(logger, 'progress_bar'):
85 printer = logger.progress_bar()
86 if printer:
87 return printer
88 printer = QuietMultilinePrinter()
89 printer.print = lambda _: None
90 return printer
91
982ee69a
MB
92
93def load_cookies(cookie_file, browser_specification, ydl):
94 cookie_jars = []
95 if browser_specification is not None:
9bd13fe5 96 browser_name, profile, keyring, container = _parse_browser_specification(*browser_specification)
97 cookie_jars.append(
98 extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring, container=container))
982ee69a
MB
99
100 if cookie_file is not None:
22df97f9 101 is_filename = is_path_like(cookie_file)
d76fa1f3 102 if is_filename:
103 cookie_file = expand_path(cookie_file)
104
982ee69a 105 jar = YoutubeDLCookieJar(cookie_file)
d76fa1f3 106 if not is_filename or os.access(cookie_file, os.R_OK):
982ee69a
MB
107 jar.load(ignore_discard=True, ignore_expires=True)
108 cookie_jars.append(jar)
109
110 return _merge_cookie_jars(cookie_jars)
111
112
9bd13fe5 113def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None, container=None):
982ee69a 114 if browser_name == 'firefox':
9bd13fe5 115 return _extract_firefox_cookies(profile, container, logger)
982ee69a
MB
116 elif browser_name == 'safari':
117 return _extract_safari_cookies(profile, logger)
118 elif browser_name in CHROMIUM_BASED_BROWSERS:
f59f5ef8 119 return _extract_chrome_cookies(browser_name, profile, keyring, logger)
982ee69a 120 else:
86e5f3ed 121 raise ValueError(f'unknown browser: {browser_name}')
982ee69a
MB
122
123
9bd13fe5 124def _extract_firefox_cookies(profile, container, logger):
982ee69a 125 logger.info('Extracting cookies from firefox')
9b8ee23b 126 if not sqlite3:
767b02a9
MB
127 logger.warning('Cannot extract cookies from firefox without sqlite3 support. '
128 'Please use a python interpreter compiled with sqlite3 support')
129 return YoutubeDLCookieJar()
982ee69a
MB
130
131 if profile is None:
132 search_root = _firefox_browser_dir()
133 elif _is_path(profile):
134 search_root = profile
135 else:
136 search_root = os.path.join(_firefox_browser_dir(), profile)
137
825d3ce3 138 cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite', logger)
139 if cookie_database_path is None:
140 raise FileNotFoundError(f'could not find firefox cookies database in {search_root}')
141 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
142
9bd13fe5 143 container_id = None
825d3ce3 144 if container not in (None, 'none'):
145 containers_path = os.path.join(os.path.dirname(cookie_database_path), 'containers.json')
9bd13fe5 146 if not os.path.isfile(containers_path) or not os.access(containers_path, os.R_OK):
147 raise FileNotFoundError(f'could not read containers.json in {search_root}')
d2c8aadf 148 with open(containers_path) as containers:
9bd13fe5 149 identities = json.load(containers).get('identities', [])
150 container_id = next((context.get('userContextId') for context in identities if container in (
151 context.get('name'),
152 try_call(lambda: re.fullmatch(r'userContext([^\.]+)\.label', context['l10nID']).group())
153 )), None)
154 if not isinstance(container_id, int):
155 raise ValueError(f'could not find firefox container "{container}" in containers.json')
156
0930b11f 157 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
982ee69a
MB
158 cursor = None
159 try:
160 cursor = _open_database_copy(cookie_database_path, tmpdir)
9bd13fe5 161 if isinstance(container_id, int):
9bd13fe5 162 logger.debug(
163 f'Only loading cookies from firefox container "{container}", ID {container_id}')
9bd13fe5 164 cursor.execute(
825d3ce3 165 'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE originAttributes LIKE ? OR originAttributes LIKE ?',
166 (f'%userContextId={container_id}', f'%userContextId={container_id}&%'))
167 elif container == 'none':
168 logger.debug('Only loading cookies not belonging to any container')
169 cursor.execute(
170 'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE NOT INSTR(originAttributes,"userContextId=")')
171 else:
9bd13fe5 172 cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
982ee69a 173 jar = YoutubeDLCookieJar()
97ec5bc5 174 with _create_progress_bar(logger) as progress_bar:
175 table = cursor.fetchall()
176 total_cookie_count = len(table)
177 for i, (host, name, value, path, expiry, is_secure) in enumerate(table):
178 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
ac668111 179 cookie = http.cookiejar.Cookie(
97ec5bc5 180 version=0, name=name, value=value, port=None, port_specified=False,
181 domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'),
182 path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False,
183 comment=None, comment_url=None, rest={})
184 jar.set_cookie(cookie)
86e5f3ed 185 logger.info(f'Extracted {len(jar)} cookies from firefox')
982ee69a
MB
186 return jar
187 finally:
188 if cursor is not None:
189 cursor.connection.close()
190
191
192def _firefox_browser_dir():
dec30912 193 if sys.platform in ('cygwin', 'win32'):
19a03940 194 return os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles')
982ee69a
MB
195 elif sys.platform == 'darwin':
196 return os.path.expanduser('~/Library/Application Support/Firefox')
dec30912 197 return os.path.expanduser('~/.mozilla/firefox')
982ee69a
MB
198
199
200def _get_chromium_based_browser_settings(browser_name):
201 # https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md
dec30912 202 if sys.platform in ('cygwin', 'win32'):
982ee69a
MB
203 appdata_local = os.path.expandvars('%LOCALAPPDATA%')
204 appdata_roaming = os.path.expandvars('%APPDATA%')
205 browser_dir = {
19a03940 206 'brave': os.path.join(appdata_local, R'BraveSoftware\Brave-Browser\User Data'),
207 'chrome': os.path.join(appdata_local, R'Google\Chrome\User Data'),
208 'chromium': os.path.join(appdata_local, R'Chromium\User Data'),
209 'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
210 'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
211 'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
982ee69a
MB
212 }[browser_name]
213
214 elif sys.platform == 'darwin':
215 appdata = os.path.expanduser('~/Library/Application Support')
216 browser_dir = {
217 'brave': os.path.join(appdata, 'BraveSoftware/Brave-Browser'),
218 'chrome': os.path.join(appdata, 'Google/Chrome'),
219 'chromium': os.path.join(appdata, 'Chromium'),
220 'edge': os.path.join(appdata, 'Microsoft Edge'),
221 'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
222 'vivaldi': os.path.join(appdata, 'Vivaldi'),
223 }[browser_name]
224
225 else:
dec30912
CMT
226 config = _config_home()
227 browser_dir = {
228 'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'),
229 'chrome': os.path.join(config, 'google-chrome'),
230 'chromium': os.path.join(config, 'chromium'),
231 'edge': os.path.join(config, 'microsoft-edge'),
232 'opera': os.path.join(config, 'opera'),
233 'vivaldi': os.path.join(config, 'vivaldi'),
234 }[browser_name]
982ee69a
MB
235
236 # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
237 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
238 keyring_name = {
239 'brave': 'Brave',
240 'chrome': 'Chrome',
241 'chromium': 'Chromium',
29b208f6 242 'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
982ee69a
MB
243 'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
244 'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
245 }[browser_name]
246
247 browsers_without_profiles = {'opera'}
248
249 return {
250 'browser_dir': browser_dir,
251 'keyring_name': keyring_name,
252 'supports_profiles': browser_name not in browsers_without_profiles
253 }
254
255
f59f5ef8 256def _extract_chrome_cookies(browser_name, profile, keyring, logger):
86e5f3ed 257 logger.info(f'Extracting cookies from {browser_name}')
767b02a9 258
9b8ee23b 259 if not sqlite3:
19a03940 260 logger.warning(f'Cannot extract cookies from {browser_name} without sqlite3 support. '
261 'Please use a python interpreter compiled with sqlite3 support')
767b02a9
MB
262 return YoutubeDLCookieJar()
263
982ee69a
MB
264 config = _get_chromium_based_browser_settings(browser_name)
265
266 if profile is None:
267 search_root = config['browser_dir']
268 elif _is_path(profile):
269 search_root = profile
270 config['browser_dir'] = os.path.dirname(profile) if config['supports_profiles'] else profile
271 else:
272 if config['supports_profiles']:
273 search_root = os.path.join(config['browser_dir'], profile)
274 else:
86e5f3ed 275 logger.error(f'{browser_name} does not support profiles')
982ee69a
MB
276 search_root = config['browser_dir']
277
97ec5bc5 278 cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies', logger)
982ee69a 279 if cookie_database_path is None:
86e5f3ed 280 raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"')
281 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
982ee69a 282
f59f5ef8 283 decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring)
982ee69a 284
0930b11f 285 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
982ee69a
MB
286 cursor = None
287 try:
288 cursor = _open_database_copy(cookie_database_path, tmpdir)
289 cursor.connection.text_factory = bytes
290 column_names = _get_column_names(cursor, 'cookies')
291 secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
19a03940 292 cursor.execute(f'SELECT host_key, name, value, encrypted_value, path, expires_utc, {secure_column} FROM cookies')
982ee69a
MB
293 jar = YoutubeDLCookieJar()
294 failed_cookies = 0
f59f5ef8 295 unencrypted_cookies = 0
97ec5bc5 296 with _create_progress_bar(logger) as progress_bar:
297 table = cursor.fetchall()
298 total_cookie_count = len(table)
299 for i, line in enumerate(table):
300 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
301 is_encrypted, cookie = _process_chrome_cookie(decryptor, *line)
302 if not cookie:
982ee69a
MB
303 failed_cookies += 1
304 continue
97ec5bc5 305 elif not is_encrypted:
306 unencrypted_cookies += 1
307 jar.set_cookie(cookie)
982ee69a 308 if failed_cookies > 0:
86e5f3ed 309 failed_message = f' ({failed_cookies} could not be decrypted)'
982ee69a
MB
310 else:
311 failed_message = ''
86e5f3ed 312 logger.info(f'Extracted {len(jar)} cookies from {browser_name}{failed_message}')
24146491 313 counts = decryptor._cookie_counts.copy()
f59f5ef8 314 counts['unencrypted'] = unencrypted_cookies
86e5f3ed 315 logger.debug(f'cookie version breakdown: {counts}')
982ee69a
MB
316 return jar
317 finally:
318 if cursor is not None:
319 cursor.connection.close()
320
321
97ec5bc5 322def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, path, expires_utc, is_secure):
0f06bcd7 323 host_key = host_key.decode()
324 name = name.decode()
325 value = value.decode()
326 path = path.decode()
97ec5bc5 327 is_encrypted = not value and encrypted_value
328
329 if is_encrypted:
330 value = decryptor.decrypt(encrypted_value)
331 if value is None:
332 return is_encrypted, None
333
ac668111 334 return is_encrypted, http.cookiejar.Cookie(
97ec5bc5 335 version=0, name=name, value=value, port=None, port_specified=False,
336 domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
337 path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False,
338 comment=None, comment_url=None, rest={})
339
340
982ee69a
MB
341class ChromeCookieDecryptor:
342 """
343 Overview:
344
345 Linux:
346 - cookies are either v10 or v11
347 - v10: AES-CBC encrypted with a fixed key
348 - v11: AES-CBC encrypted with an OS protected key (keyring)
349 - v11 keys can be stored in various places depending on the activate desktop environment [2]
350
351 Mac:
352 - cookies are either v10 or not v10
353 - v10: AES-CBC encrypted with an OS protected key (keyring) and more key derivation iterations than linux
354 - not v10: 'old data' stored as plaintext
355
356 Windows:
357 - cookies are either v10 or not v10
358 - v10: AES-GCM encrypted with a key which is encrypted with DPAPI
359 - not v10: encrypted with DPAPI
360
361 Sources:
362 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/
363 - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_linux.cc
364 - KeyStorageLinux::CreateService
365 """
366
0fa7d2c8 367 _cookie_counts = {}
982ee69a 368
0fa7d2c8 369 def decrypt(self, encrypted_value):
19a03940 370 raise NotImplementedError('Must be implemented by sub classes')
f59f5ef8 371
982ee69a 372
f59f5ef8 373def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None):
56ba69e4 374 if sys.platform == 'darwin':
982ee69a 375 return MacChromeCookieDecryptor(browser_keyring_name, logger)
56ba69e4 376 elif sys.platform in ('win32', 'cygwin'):
982ee69a 377 return WindowsChromeCookieDecryptor(browser_root, logger)
56ba69e4 378 return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring)
982ee69a
MB
379
380
381class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
f59f5ef8 382 def __init__(self, browser_keyring_name, logger, *, keyring=None):
982ee69a
MB
383 self._logger = logger
384 self._v10_key = self.derive_key(b'peanuts')
f59f5ef8
MB
385 password = _get_linux_keyring_password(browser_keyring_name, keyring, logger)
386 self._v11_key = None if password is None else self.derive_key(password)
387 self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
982ee69a
MB
388
389 @staticmethod
390 def derive_key(password):
391 # values from
392 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_linux.cc
393 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16)
394
395 def decrypt(self, encrypted_value):
396 version = encrypted_value[:3]
397 ciphertext = encrypted_value[3:]
398
399 if version == b'v10':
f59f5ef8 400 self._cookie_counts['v10'] += 1
982ee69a
MB
401 return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
402
403 elif version == b'v11':
f59f5ef8 404 self._cookie_counts['v11'] += 1
982ee69a 405 if self._v11_key is None:
f59f5ef8 406 self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
982ee69a
MB
407 return None
408 return _decrypt_aes_cbc(ciphertext, self._v11_key, self._logger)
409
410 else:
f59f5ef8 411 self._cookie_counts['other'] += 1
982ee69a
MB
412 return None
413
414
415class MacChromeCookieDecryptor(ChromeCookieDecryptor):
416 def __init__(self, browser_keyring_name, logger):
417 self._logger = logger
f440b14f 418 password = _get_mac_keyring_password(browser_keyring_name, logger)
982ee69a 419 self._v10_key = None if password is None else self.derive_key(password)
f59f5ef8 420 self._cookie_counts = {'v10': 0, 'other': 0}
982ee69a
MB
421
422 @staticmethod
423 def derive_key(password):
424 # values from
425 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
426 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16)
427
428 def decrypt(self, encrypted_value):
429 version = encrypted_value[:3]
430 ciphertext = encrypted_value[3:]
431
432 if version == b'v10':
f59f5ef8 433 self._cookie_counts['v10'] += 1
982ee69a
MB
434 if self._v10_key is None:
435 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
436 return None
437
438 return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
439
440 else:
f59f5ef8 441 self._cookie_counts['other'] += 1
982ee69a
MB
442 # other prefixes are considered 'old data' which were stored as plaintext
443 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
444 return encrypted_value
445
446
447class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
448 def __init__(self, browser_root, logger):
449 self._logger = logger
450 self._v10_key = _get_windows_v10_key(browser_root, logger)
f59f5ef8
MB
451 self._cookie_counts = {'v10': 0, 'other': 0}
452
982ee69a
MB
453 def decrypt(self, encrypted_value):
454 version = encrypted_value[:3]
455 ciphertext = encrypted_value[3:]
456
457 if version == b'v10':
f59f5ef8 458 self._cookie_counts['v10'] += 1
982ee69a
MB
459 if self._v10_key is None:
460 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
461 return None
982ee69a
MB
462
463 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
464 # kNonceLength
465 nonce_length = 96 // 8
466 # boringssl
467 # EVP_AEAD_AES_GCM_TAG_LEN
468 authentication_tag_length = 16
469
470 raw_ciphertext = ciphertext
471 nonce = raw_ciphertext[:nonce_length]
472 ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length]
473 authentication_tag = raw_ciphertext[-authentication_tag_length:]
474
475 return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger)
476
477 else:
f59f5ef8 478 self._cookie_counts['other'] += 1
982ee69a
MB
479 # any other prefix means the data is DPAPI encrypted
480 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
0f06bcd7 481 return _decrypt_windows_dpapi(encrypted_value, self._logger).decode()
982ee69a
MB
482
483
484def _extract_safari_cookies(profile, logger):
485 if profile is not None:
486 logger.error('safari does not support profiles')
487 if sys.platform != 'darwin':
86e5f3ed 488 raise ValueError(f'unsupported platform: {sys.platform}')
982ee69a
MB
489
490 cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')
491
492 if not os.path.isfile(cookies_path):
1f7db853
MP
493 logger.debug('Trying secondary cookie location')
494 cookies_path = os.path.expanduser('~/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies')
495 if not os.path.isfile(cookies_path):
496 raise FileNotFoundError('could not find safari cookies database')
982ee69a
MB
497
498 with open(cookies_path, 'rb') as f:
499 cookies_data = f.read()
500
501 jar = parse_safari_cookies(cookies_data, logger=logger)
86e5f3ed 502 logger.info(f'Extracted {len(jar)} cookies from safari')
982ee69a
MB
503 return jar
504
505
506class ParserError(Exception):
507 pass
508
509
510class DataParser:
511 def __init__(self, data, logger):
512 self._data = data
513 self.cursor = 0
514 self._logger = logger
515
516 def read_bytes(self, num_bytes):
517 if num_bytes < 0:
86e5f3ed 518 raise ParserError(f'invalid read of {num_bytes} bytes')
982ee69a
MB
519 end = self.cursor + num_bytes
520 if end > len(self._data):
521 raise ParserError('reached end of input')
522 data = self._data[self.cursor:end]
523 self.cursor = end
524 return data
525
526 def expect_bytes(self, expected_value, message):
527 value = self.read_bytes(len(expected_value))
528 if value != expected_value:
86e5f3ed 529 raise ParserError(f'unexpected value: {value} != {expected_value} ({message})')
982ee69a
MB
530
531 def read_uint(self, big_endian=False):
532 data_format = '>I' if big_endian else '<I'
533 return struct.unpack(data_format, self.read_bytes(4))[0]
534
535 def read_double(self, big_endian=False):
536 data_format = '>d' if big_endian else '<d'
537 return struct.unpack(data_format, self.read_bytes(8))[0]
538
539 def read_cstring(self):
540 buffer = []
541 while True:
542 c = self.read_bytes(1)
543 if c == b'\x00':
0f06bcd7 544 return b''.join(buffer).decode()
982ee69a
MB
545 else:
546 buffer.append(c)
547
548 def skip(self, num_bytes, description='unknown'):
549 if num_bytes > 0:
19a03940 550 self._logger.debug(f'skipping {num_bytes} bytes ({description}): {self.read_bytes(num_bytes)!r}')
982ee69a 551 elif num_bytes < 0:
86e5f3ed 552 raise ParserError(f'invalid skip of {num_bytes} bytes')
982ee69a
MB
553
554 def skip_to(self, offset, description='unknown'):
555 self.skip(offset - self.cursor, description)
556
557 def skip_to_end(self, description='unknown'):
558 self.skip_to(len(self._data), description)
559
560
561def _mac_absolute_time_to_posix(timestamp):
562 return int((datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc) + timedelta(seconds=timestamp)).timestamp())
563
564
565def _parse_safari_cookies_header(data, logger):
566 p = DataParser(data, logger)
567 p.expect_bytes(b'cook', 'database signature')
568 number_of_pages = p.read_uint(big_endian=True)
569 page_sizes = [p.read_uint(big_endian=True) for _ in range(number_of_pages)]
570 return page_sizes, p.cursor
571
572
573def _parse_safari_cookies_page(data, jar, logger):
574 p = DataParser(data, logger)
575 p.expect_bytes(b'\x00\x00\x01\x00', 'page signature')
576 number_of_cookies = p.read_uint()
577 record_offsets = [p.read_uint() for _ in range(number_of_cookies)]
578 if number_of_cookies == 0:
86e5f3ed 579 logger.debug(f'a cookies page of size {len(data)} has no cookies')
982ee69a
MB
580 return
581
582 p.skip_to(record_offsets[0], 'unknown page header field')
583
97ec5bc5 584 with _create_progress_bar(logger) as progress_bar:
585 for i, record_offset in enumerate(record_offsets):
586 progress_bar.print(f'Loading cookie {i: 6d}/{number_of_cookies: 6d}')
587 p.skip_to(record_offset, 'space between records')
588 record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger)
589 p.read_bytes(record_length)
982ee69a
MB
590 p.skip_to_end('space in between pages')
591
592
593def _parse_safari_cookies_record(data, jar, logger):
594 p = DataParser(data, logger)
595 record_size = p.read_uint()
596 p.skip(4, 'unknown record field 1')
597 flags = p.read_uint()
598 is_secure = bool(flags & 0x0001)
599 p.skip(4, 'unknown record field 2')
600 domain_offset = p.read_uint()
601 name_offset = p.read_uint()
602 path_offset = p.read_uint()
603 value_offset = p.read_uint()
604 p.skip(8, 'unknown record field 3')
605 expiration_date = _mac_absolute_time_to_posix(p.read_double())
606 _creation_date = _mac_absolute_time_to_posix(p.read_double()) # noqa: F841
607
608 try:
609 p.skip_to(domain_offset)
610 domain = p.read_cstring()
611
612 p.skip_to(name_offset)
613 name = p.read_cstring()
614
615 p.skip_to(path_offset)
616 path = p.read_cstring()
617
618 p.skip_to(value_offset)
619 value = p.read_cstring()
620 except UnicodeDecodeError:
f440b14f 621 logger.warning('failed to parse Safari cookie because UTF-8 decoding failed', only_once=True)
982ee69a
MB
622 return record_size
623
624 p.skip_to(record_size, 'space at the end of the record')
625
ac668111 626 cookie = http.cookiejar.Cookie(
982ee69a
MB
627 version=0, name=name, value=value, port=None, port_specified=False,
628 domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'),
629 path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False,
630 comment=None, comment_url=None, rest={})
631 jar.set_cookie(cookie)
632 return record_size
633
634
635def parse_safari_cookies(data, jar=None, logger=YDLLogger()):
636 """
637 References:
638 - https://github.com/libyal/dtformats/blob/main/documentation/Safari%20Cookies.asciidoc
639 - this data appears to be out of date but the important parts of the database structure is the same
640 - there are a few bytes here and there which are skipped during parsing
641 """
642 if jar is None:
643 jar = YoutubeDLCookieJar()
644 page_sizes, body_start = _parse_safari_cookies_header(data, logger)
645 p = DataParser(data[body_start:], logger)
646 for page_size in page_sizes:
647 _parse_safari_cookies_page(p.read_bytes(page_size), jar, logger)
648 p.skip_to_end('footer')
649 return jar
650
651
f59f5ef8
MB
652class _LinuxDesktopEnvironment(Enum):
653 """
654 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.h
655 DesktopEnvironment
656 """
657 OTHER = auto()
658 CINNAMON = auto()
659 GNOME = auto()
660 KDE = auto()
661 PANTHEON = auto()
662 UNITY = auto()
663 XFCE = auto()
982ee69a
MB
664
665
f59f5ef8
MB
666class _LinuxKeyring(Enum):
667 """
668 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.h
669 SelectedLinuxBackend
670 """
671 KWALLET = auto()
672 GNOMEKEYRING = auto()
673 BASICTEXT = auto()
674
675
676SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys()
677
678
679def _get_linux_desktop_environment(env):
680 """
681 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc
682 GetDesktopEnvironment
683 """
684 xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
685 desktop_session = env.get('DESKTOP_SESSION', None)
686 if xdg_current_desktop is not None:
687 xdg_current_desktop = xdg_current_desktop.split(':')[0].strip()
688
689 if xdg_current_desktop == 'Unity':
690 if desktop_session is not None and 'gnome-fallback' in desktop_session:
691 return _LinuxDesktopEnvironment.GNOME
692 else:
693 return _LinuxDesktopEnvironment.UNITY
694 elif xdg_current_desktop == 'GNOME':
695 return _LinuxDesktopEnvironment.GNOME
696 elif xdg_current_desktop == 'X-Cinnamon':
697 return _LinuxDesktopEnvironment.CINNAMON
698 elif xdg_current_desktop == 'KDE':
699 return _LinuxDesktopEnvironment.KDE
700 elif xdg_current_desktop == 'Pantheon':
701 return _LinuxDesktopEnvironment.PANTHEON
702 elif xdg_current_desktop == 'XFCE':
703 return _LinuxDesktopEnvironment.XFCE
704 elif desktop_session is not None:
705 if desktop_session in ('mate', 'gnome'):
706 return _LinuxDesktopEnvironment.GNOME
707 elif 'kde' in desktop_session:
708 return _LinuxDesktopEnvironment.KDE
709 elif 'xfce' in desktop_session:
710 return _LinuxDesktopEnvironment.XFCE
711 else:
712 if 'GNOME_DESKTOP_SESSION_ID' in env:
713 return _LinuxDesktopEnvironment.GNOME
714 elif 'KDE_FULL_SESSION' in env:
715 return _LinuxDesktopEnvironment.KDE
fa8fd951 716 return _LinuxDesktopEnvironment.OTHER
f59f5ef8
MB
717
718
719def _choose_linux_keyring(logger):
720 """
721 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.cc
722 SelectBackend
723 """
724 desktop_environment = _get_linux_desktop_environment(os.environ)
86e5f3ed 725 logger.debug(f'detected desktop environment: {desktop_environment.name}')
f59f5ef8
MB
726 if desktop_environment == _LinuxDesktopEnvironment.KDE:
727 linux_keyring = _LinuxKeyring.KWALLET
728 elif desktop_environment == _LinuxDesktopEnvironment.OTHER:
729 linux_keyring = _LinuxKeyring.BASICTEXT
982ee69a 730 else:
f59f5ef8
MB
731 linux_keyring = _LinuxKeyring.GNOMEKEYRING
732 return linux_keyring
733
734
735def _get_kwallet_network_wallet(logger):
736 """ The name of the wallet used to store network passwords.
737
738 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/kwallet_dbus.cc
739 KWalletDBus::NetworkWallet
740 which does a dbus call to the following function:
741 https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html
742 Wallet::NetworkWallet
743 """
744 default_wallet = 'kdewallet'
745 try:
f0c9fb96 746 stdout, _, returncode = Popen.run([
f59f5ef8
MB
747 'dbus-send', '--session', '--print-reply=literal',
748 '--dest=org.kde.kwalletd5',
749 '/modules/kwalletd5',
750 'org.kde.KWallet.networkWallet'
f0c9fb96 751 ], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
f59f5ef8 752
f0c9fb96 753 if returncode:
f59f5ef8
MB
754 logger.warning('failed to read NetworkWallet')
755 return default_wallet
756 else:
f0c9fb96 757 logger.debug(f'NetworkWallet = "{stdout.strip()}"')
758 return stdout.strip()
a44ca5a4 759 except Exception as e:
86e5f3ed 760 logger.warning(f'exception while obtaining NetworkWallet: {e}')
f59f5ef8
MB
761 return default_wallet
762
763
764def _get_kwallet_password(browser_keyring_name, logger):
765 logger.debug('using kwallet-query to obtain password from kwallet')
766
767 if shutil.which('kwallet-query') is None:
768 logger.error('kwallet-query command not found. KWallet and kwallet-query '
769 'must be installed to read from KWallet. kwallet-query should be'
770 'included in the kwallet package for your distribution')
771 return b''
772
773 network_wallet = _get_kwallet_network_wallet(logger)
774
775 try:
f0c9fb96 776 stdout, _, returncode = Popen.run([
f59f5ef8 777 'kwallet-query',
86e5f3ed 778 '--read-password', f'{browser_keyring_name} Safe Storage',
779 '--folder', f'{browser_keyring_name} Keys',
f59f5ef8
MB
780 network_wallet
781 ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
782
f0c9fb96 783 if returncode:
784 logger.error(f'kwallet-query failed with return code {returncode}. '
785 'Please consult the kwallet-query man page for details')
f59f5ef8
MB
786 return b''
787 else:
788 if stdout.lower().startswith(b'failed to read'):
789 logger.debug('failed to read password from kwallet. Using empty string instead')
790 # this sometimes occurs in KDE because chrome does not check hasEntry and instead
791 # just tries to read the value (which kwallet returns "") whereas kwallet-query
792 # checks hasEntry. To verify this:
793 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
794 # while starting chrome.
795 # this may be a bug as the intended behaviour is to generate a random password and store
796 # it, but that doesn't matter here.
797 return b''
798 else:
799 logger.debug('password found')
f0c9fb96 800 return stdout.rstrip(b'\n')
a44ca5a4 801 except Exception as e:
802 logger.warning(f'exception running kwallet-query: {error_to_str(e)}')
f59f5ef8
MB
803 return b''
804
805
806def _get_gnome_keyring_password(browser_keyring_name, logger):
9b8ee23b 807 if not secretstorage:
808 logger.error(f'secretstorage not available {_SECRETSTORAGE_UNAVAILABLE_REASON}')
f59f5ef8
MB
809 return b''
810 # the Gnome keyring does not seem to organise keys in the same way as KWallet,
811 # using `dbus-monitor` during startup, it can be observed that chromium lists all keys
812 # and presumably searches for its key in the list. It appears that we must do the same.
813 # https://github.com/jaraco/keyring/issues/556
814 with contextlib.closing(secretstorage.dbus_init()) as con:
815 col = secretstorage.get_default_collection(con)
816 for item in col.get_all_items():
86e5f3ed 817 if item.get_label() == f'{browser_keyring_name} Safe Storage':
f59f5ef8
MB
818 return item.get_secret()
819 else:
820 logger.error('failed to read from keyring')
821 return b''
822
823
824def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
825 # note: chrome/chromium can be run with the following flags to determine which keyring backend
826 # it has chosen to use
827 # chromium --enable-logging=stderr --v=1 2>&1 | grep key_storage_
828 # Chromium supports a flag: --password-store=<basic|gnome|kwallet> so the automatic detection
829 # will not be sufficient in all cases.
830
2c539d49 831 keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger)
f59f5ef8
MB
832 logger.debug(f'Chosen keyring: {keyring.name}')
833
834 if keyring == _LinuxKeyring.KWALLET:
835 return _get_kwallet_password(browser_keyring_name, logger)
836 elif keyring == _LinuxKeyring.GNOMEKEYRING:
837 return _get_gnome_keyring_password(browser_keyring_name, logger)
838 elif keyring == _LinuxKeyring.BASICTEXT:
839 # when basic text is chosen, all cookies are stored as v10 (so no keyring password is required)
840 return None
841 assert False, f'Unknown keyring {keyring}'
842
843
844def _get_mac_keyring_password(browser_keyring_name, logger):
845 logger.debug('using find-generic-password to obtain password from OSX keychain')
846 try:
f0c9fb96 847 stdout, _, _ = Popen.run(
d3c93ec2 848 ['security', 'find-generic-password',
849 '-w', # write password to stdout
850 '-a', browser_keyring_name, # match 'account'
86e5f3ed 851 '-s', f'{browser_keyring_name} Safe Storage'], # match 'service'
d3c93ec2 852 stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
f0c9fb96 853 return stdout.rstrip(b'\n')
a44ca5a4 854 except Exception as e:
855 logger.warning(f'exception running find-generic-password: {error_to_str(e)}')
f59f5ef8 856 return None
982ee69a
MB
857
858
859def _get_windows_v10_key(browser_root, logger):
97ec5bc5 860 path = _find_most_recently_used_file(browser_root, 'Local State', logger)
982ee69a
MB
861 if path is None:
862 logger.error('could not find local state file')
863 return None
97ec5bc5 864 logger.debug(f'Found local state file at "{path}"')
86e5f3ed 865 with open(path, encoding='utf8') as f:
982ee69a
MB
866 data = json.load(f)
867 try:
868 base64_key = data['os_crypt']['encrypted_key']
869 except KeyError:
870 logger.error('no encrypted key in Local State')
871 return None
14f25df2 872 encrypted_key = base64.b64decode(base64_key)
982ee69a
MB
873 prefix = b'DPAPI'
874 if not encrypted_key.startswith(prefix):
875 logger.error('invalid key')
876 return None
877 return _decrypt_windows_dpapi(encrypted_key[len(prefix):], logger)
878
879
880def pbkdf2_sha1(password, salt, iterations, key_length):
881 return pbkdf2_hmac('sha1', password, salt, iterations, key_length)
882
883
884def _decrypt_aes_cbc(ciphertext, key, logger, initialization_vector=b' ' * 16):
1d3586d0 885 plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
982ee69a 886 try:
0f06bcd7 887 return plaintext.decode()
982ee69a 888 except UnicodeDecodeError:
f440b14f 889 logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
890 return None
891
892
893def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
982ee69a 894 try:
09906f55 895 plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce)
982ee69a 896 except ValueError:
f440b14f 897 logger.warning('failed to decrypt cookie (AES-GCM) because the MAC check failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
898 return None
899
900 try:
0f06bcd7 901 return plaintext.decode()
982ee69a 902 except UnicodeDecodeError:
f440b14f 903 logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
904 return None
905
906
907def _decrypt_windows_dpapi(ciphertext, logger):
908 """
909 References:
910 - https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata
911 """
fe0918bb 912
913 import ctypes
914 import ctypes.wintypes
982ee69a
MB
915
916 class DATA_BLOB(ctypes.Structure):
fe0918bb 917 _fields_ = [('cbData', ctypes.wintypes.DWORD),
982ee69a
MB
918 ('pbData', ctypes.POINTER(ctypes.c_char))]
919
920 buffer = ctypes.create_string_buffer(ciphertext)
921 blob_in = DATA_BLOB(ctypes.sizeof(buffer), buffer)
922 blob_out = DATA_BLOB()
923 ret = ctypes.windll.crypt32.CryptUnprotectData(
924 ctypes.byref(blob_in), # pDataIn
925 None, # ppszDataDescr: human readable description of pDataIn
926 None, # pOptionalEntropy: salt?
927 None, # pvReserved: must be NULL
928 None, # pPromptStruct: information about prompts to display
929 0, # dwFlags
930 ctypes.byref(blob_out) # pDataOut
931 )
932 if not ret:
f9be9cb9 933 logger.warning('failed to decrypt with DPAPI', only_once=True)
982ee69a
MB
934 return None
935
936 result = ctypes.string_at(blob_out.pbData, blob_out.cbData)
937 ctypes.windll.kernel32.LocalFree(blob_out.pbData)
938 return result
939
940
941def _config_home():
942 return os.environ.get('XDG_CONFIG_HOME', os.path.expanduser('~/.config'))
943
944
945def _open_database_copy(database_path, tmpdir):
946 # cannot open sqlite databases if they are already in use (e.g. by the browser)
947 database_copy_path = os.path.join(tmpdir, 'temporary.sqlite')
948 shutil.copy(database_path, database_copy_path)
949 conn = sqlite3.connect(database_copy_path)
950 return conn.cursor()
951
952
953def _get_column_names(cursor, table_name):
86e5f3ed 954 table_info = cursor.execute(f'PRAGMA table_info({table_name})').fetchall()
0f06bcd7 955 return [row[1].decode() for row in table_info]
982ee69a
MB
956
957
97ec5bc5 958def _find_most_recently_used_file(root, filename, logger):
982ee69a 959 # if there are multiple browser profiles, take the most recently used one
97ec5bc5 960 i, paths = 0, []
961 with _create_progress_bar(logger) as progress_bar:
962 for curr_root, dirs, files in os.walk(root):
963 for file in files:
964 i += 1
965 progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched')
966 if file == filename:
967 paths.append(os.path.join(curr_root, file))
982ee69a
MB
968 return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime)
969
970
971def _merge_cookie_jars(jars):
972 output_jar = YoutubeDLCookieJar()
973 for jar in jars:
974 for cookie in jar:
975 output_jar.set_cookie(cookie)
976 if jar.filename is not None:
977 output_jar.filename = jar.filename
978 return output_jar
979
980
981def _is_path(value):
982 return os.path.sep in value
983
984
9bd13fe5 985def _parse_browser_specification(browser_name, profile=None, keyring=None, container=None):
982ee69a
MB
986 if browser_name not in SUPPORTED_BROWSERS:
987 raise ValueError(f'unsupported browser: "{browser_name}"')
f59f5ef8
MB
988 if keyring not in (None, *SUPPORTED_KEYRINGS):
989 raise ValueError(f'unsupported keyring: "{keyring}"')
982ee69a
MB
990 if profile is not None and _is_path(profile):
991 profile = os.path.expanduser(profile)
9bd13fe5 992 return browser_name, profile, keyring, container