]> jfr.im git - yt-dlp.git/blame - yt_dlp/cookies.py
[extractor/crunchyroll:beta] Extract timestamp and fix tests (#4535)
[yt-dlp.git] / yt_dlp / cookies.py
CommitLineData
14f25df2 1import base64
f59f5ef8 2import contextlib
982ee69a 3import ctypes
54007a45 4import http.cookiejar
982ee69a
MB
5import json
6import os
7import shutil
982ee69a
MB
8import struct
9import subprocess
10import sys
11import tempfile
2e4585da 12import time
982ee69a 13from datetime import datetime, timedelta, timezone
f59f5ef8 14from enum import Enum, auto
982ee69a
MB
15from hashlib import pbkdf2_hmac
16
1d3586d0 17from .aes import (
18 aes_cbc_decrypt_bytes,
19 aes_gcm_decrypt_and_verify_bytes,
20 unpad_pkcs7,
21)
9b8ee23b 22from .dependencies import (
23 _SECRETSTORAGE_UNAVAILABLE_REASON,
24 secretstorage,
25 sqlite3,
26)
97ec5bc5 27from .minicurses import MultilinePrinter, QuietMultilinePrinter
f8271158 28from .utils import Popen, YoutubeDLCookieJar, error_to_str, expand_path
982ee69a 29
982ee69a
MB
30CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
31SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
32
33
34class YDLLogger:
35 def __init__(self, ydl=None):
36 self._ydl = ydl
37
38 def debug(self, message):
39 if self._ydl:
40 self._ydl.write_debug(message)
41
42 def info(self, message):
43 if self._ydl:
44 self._ydl.to_screen(f'[Cookies] {message}')
45
46 def warning(self, message, only_once=False):
47 if self._ydl:
48 self._ydl.report_warning(message, only_once)
49
50 def error(self, message):
51 if self._ydl:
52 self._ydl.report_error(message)
53
2e4585da 54 class ProgressBar(MultilinePrinter):
55 _DELAY, _timer = 0.1, 0
56
57 def print(self, message):
58 if time.time() - self._timer > self._DELAY:
59 self.print_at_line(f'[Cookies] {message}', 0)
60 self._timer = time.time()
61
97ec5bc5 62 def progress_bar(self):
63 """Return a context manager with a print method. (Optional)"""
64 # Do not print to files/pipes, loggers, or when --no-progress is used
65 if not self._ydl or self._ydl.params.get('noprogress') or self._ydl.params.get('logger'):
66 return
591bb9d3 67 file = self._ydl._out_files.error
97ec5bc5 68 try:
69 if not file.isatty():
70 return
71 except BaseException:
72 return
2e4585da 73 return self.ProgressBar(file, preserve_output=False)
97ec5bc5 74
75
76def _create_progress_bar(logger):
77 if hasattr(logger, 'progress_bar'):
78 printer = logger.progress_bar()
79 if printer:
80 return printer
81 printer = QuietMultilinePrinter()
82 printer.print = lambda _: None
83 return printer
84
982ee69a
MB
85
86def load_cookies(cookie_file, browser_specification, ydl):
87 cookie_jars = []
88 if browser_specification is not None:
f59f5ef8
MB
89 browser_name, profile, keyring = _parse_browser_specification(*browser_specification)
90 cookie_jars.append(extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring))
982ee69a
MB
91
92 if cookie_file is not None:
d76fa1f3 93 is_filename = YoutubeDLCookieJar.is_path(cookie_file)
94 if is_filename:
95 cookie_file = expand_path(cookie_file)
96
982ee69a 97 jar = YoutubeDLCookieJar(cookie_file)
d76fa1f3 98 if not is_filename or os.access(cookie_file, os.R_OK):
982ee69a
MB
99 jar.load(ignore_discard=True, ignore_expires=True)
100 cookie_jars.append(jar)
101
102 return _merge_cookie_jars(cookie_jars)
103
104
f59f5ef8 105def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None):
982ee69a
MB
106 if browser_name == 'firefox':
107 return _extract_firefox_cookies(profile, logger)
108 elif browser_name == 'safari':
109 return _extract_safari_cookies(profile, logger)
110 elif browser_name in CHROMIUM_BASED_BROWSERS:
f59f5ef8 111 return _extract_chrome_cookies(browser_name, profile, keyring, logger)
982ee69a 112 else:
86e5f3ed 113 raise ValueError(f'unknown browser: {browser_name}')
982ee69a
MB
114
115
116def _extract_firefox_cookies(profile, logger):
117 logger.info('Extracting cookies from firefox')
9b8ee23b 118 if not sqlite3:
767b02a9
MB
119 logger.warning('Cannot extract cookies from firefox without sqlite3 support. '
120 'Please use a python interpreter compiled with sqlite3 support')
121 return YoutubeDLCookieJar()
982ee69a
MB
122
123 if profile is None:
124 search_root = _firefox_browser_dir()
125 elif _is_path(profile):
126 search_root = profile
127 else:
128 search_root = os.path.join(_firefox_browser_dir(), profile)
129
97ec5bc5 130 cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite', logger)
982ee69a 131 if cookie_database_path is None:
86e5f3ed 132 raise FileNotFoundError(f'could not find firefox cookies database in {search_root}')
133 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
982ee69a 134
0930b11f 135 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
982ee69a
MB
136 cursor = None
137 try:
138 cursor = _open_database_copy(cookie_database_path, tmpdir)
139 cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
140 jar = YoutubeDLCookieJar()
97ec5bc5 141 with _create_progress_bar(logger) as progress_bar:
142 table = cursor.fetchall()
143 total_cookie_count = len(table)
144 for i, (host, name, value, path, expiry, is_secure) in enumerate(table):
145 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
ac668111 146 cookie = http.cookiejar.Cookie(
97ec5bc5 147 version=0, name=name, value=value, port=None, port_specified=False,
148 domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'),
149 path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False,
150 comment=None, comment_url=None, rest={})
151 jar.set_cookie(cookie)
86e5f3ed 152 logger.info(f'Extracted {len(jar)} cookies from firefox')
982ee69a
MB
153 return jar
154 finally:
155 if cursor is not None:
156 cursor.connection.close()
157
158
159def _firefox_browser_dir():
dec30912 160 if sys.platform in ('cygwin', 'win32'):
19a03940 161 return os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles')
982ee69a
MB
162 elif sys.platform == 'darwin':
163 return os.path.expanduser('~/Library/Application Support/Firefox')
dec30912 164 return os.path.expanduser('~/.mozilla/firefox')
982ee69a
MB
165
166
167def _get_chromium_based_browser_settings(browser_name):
168 # https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md
dec30912 169 if sys.platform in ('cygwin', 'win32'):
982ee69a
MB
170 appdata_local = os.path.expandvars('%LOCALAPPDATA%')
171 appdata_roaming = os.path.expandvars('%APPDATA%')
172 browser_dir = {
19a03940 173 'brave': os.path.join(appdata_local, R'BraveSoftware\Brave-Browser\User Data'),
174 'chrome': os.path.join(appdata_local, R'Google\Chrome\User Data'),
175 'chromium': os.path.join(appdata_local, R'Chromium\User Data'),
176 'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
177 'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
178 'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
982ee69a
MB
179 }[browser_name]
180
181 elif sys.platform == 'darwin':
182 appdata = os.path.expanduser('~/Library/Application Support')
183 browser_dir = {
184 'brave': os.path.join(appdata, 'BraveSoftware/Brave-Browser'),
185 'chrome': os.path.join(appdata, 'Google/Chrome'),
186 'chromium': os.path.join(appdata, 'Chromium'),
187 'edge': os.path.join(appdata, 'Microsoft Edge'),
188 'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
189 'vivaldi': os.path.join(appdata, 'Vivaldi'),
190 }[browser_name]
191
192 else:
dec30912
CMT
193 config = _config_home()
194 browser_dir = {
195 'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'),
196 'chrome': os.path.join(config, 'google-chrome'),
197 'chromium': os.path.join(config, 'chromium'),
198 'edge': os.path.join(config, 'microsoft-edge'),
199 'opera': os.path.join(config, 'opera'),
200 'vivaldi': os.path.join(config, 'vivaldi'),
201 }[browser_name]
982ee69a
MB
202
203 # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
204 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
205 keyring_name = {
206 'brave': 'Brave',
207 'chrome': 'Chrome',
208 'chromium': 'Chromium',
29b208f6 209 'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
982ee69a
MB
210 'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
211 'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
212 }[browser_name]
213
214 browsers_without_profiles = {'opera'}
215
216 return {
217 'browser_dir': browser_dir,
218 'keyring_name': keyring_name,
219 'supports_profiles': browser_name not in browsers_without_profiles
220 }
221
222
f59f5ef8 223def _extract_chrome_cookies(browser_name, profile, keyring, logger):
86e5f3ed 224 logger.info(f'Extracting cookies from {browser_name}')
767b02a9 225
9b8ee23b 226 if not sqlite3:
19a03940 227 logger.warning(f'Cannot extract cookies from {browser_name} without sqlite3 support. '
228 'Please use a python interpreter compiled with sqlite3 support')
767b02a9
MB
229 return YoutubeDLCookieJar()
230
982ee69a
MB
231 config = _get_chromium_based_browser_settings(browser_name)
232
233 if profile is None:
234 search_root = config['browser_dir']
235 elif _is_path(profile):
236 search_root = profile
237 config['browser_dir'] = os.path.dirname(profile) if config['supports_profiles'] else profile
238 else:
239 if config['supports_profiles']:
240 search_root = os.path.join(config['browser_dir'], profile)
241 else:
86e5f3ed 242 logger.error(f'{browser_name} does not support profiles')
982ee69a
MB
243 search_root = config['browser_dir']
244
97ec5bc5 245 cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies', logger)
982ee69a 246 if cookie_database_path is None:
86e5f3ed 247 raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"')
248 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
982ee69a 249
f59f5ef8 250 decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring)
982ee69a 251
0930b11f 252 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
982ee69a
MB
253 cursor = None
254 try:
255 cursor = _open_database_copy(cookie_database_path, tmpdir)
256 cursor.connection.text_factory = bytes
257 column_names = _get_column_names(cursor, 'cookies')
258 secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
19a03940 259 cursor.execute(f'SELECT host_key, name, value, encrypted_value, path, expires_utc, {secure_column} FROM cookies')
982ee69a
MB
260 jar = YoutubeDLCookieJar()
261 failed_cookies = 0
f59f5ef8 262 unencrypted_cookies = 0
97ec5bc5 263 with _create_progress_bar(logger) as progress_bar:
264 table = cursor.fetchall()
265 total_cookie_count = len(table)
266 for i, line in enumerate(table):
267 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
268 is_encrypted, cookie = _process_chrome_cookie(decryptor, *line)
269 if not cookie:
982ee69a
MB
270 failed_cookies += 1
271 continue
97ec5bc5 272 elif not is_encrypted:
273 unencrypted_cookies += 1
274 jar.set_cookie(cookie)
982ee69a 275 if failed_cookies > 0:
86e5f3ed 276 failed_message = f' ({failed_cookies} could not be decrypted)'
982ee69a
MB
277 else:
278 failed_message = ''
86e5f3ed 279 logger.info(f'Extracted {len(jar)} cookies from {browser_name}{failed_message}')
24146491 280 counts = decryptor._cookie_counts.copy()
f59f5ef8 281 counts['unencrypted'] = unencrypted_cookies
86e5f3ed 282 logger.debug(f'cookie version breakdown: {counts}')
982ee69a
MB
283 return jar
284 finally:
285 if cursor is not None:
286 cursor.connection.close()
287
288
97ec5bc5 289def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, path, expires_utc, is_secure):
0f06bcd7 290 host_key = host_key.decode()
291 name = name.decode()
292 value = value.decode()
293 path = path.decode()
97ec5bc5 294 is_encrypted = not value and encrypted_value
295
296 if is_encrypted:
297 value = decryptor.decrypt(encrypted_value)
298 if value is None:
299 return is_encrypted, None
300
ac668111 301 return is_encrypted, http.cookiejar.Cookie(
97ec5bc5 302 version=0, name=name, value=value, port=None, port_specified=False,
303 domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
304 path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False,
305 comment=None, comment_url=None, rest={})
306
307
982ee69a
MB
308class ChromeCookieDecryptor:
309 """
310 Overview:
311
312 Linux:
313 - cookies are either v10 or v11
314 - v10: AES-CBC encrypted with a fixed key
315 - v11: AES-CBC encrypted with an OS protected key (keyring)
316 - v11 keys can be stored in various places depending on the activate desktop environment [2]
317
318 Mac:
319 - cookies are either v10 or not v10
320 - v10: AES-CBC encrypted with an OS protected key (keyring) and more key derivation iterations than linux
321 - not v10: 'old data' stored as plaintext
322
323 Windows:
324 - cookies are either v10 or not v10
325 - v10: AES-GCM encrypted with a key which is encrypted with DPAPI
326 - not v10: encrypted with DPAPI
327
328 Sources:
329 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/
330 - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_linux.cc
331 - KeyStorageLinux::CreateService
332 """
333
0fa7d2c8 334 _cookie_counts = {}
982ee69a 335
0fa7d2c8 336 def decrypt(self, encrypted_value):
19a03940 337 raise NotImplementedError('Must be implemented by sub classes')
f59f5ef8 338
982ee69a 339
f59f5ef8 340def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None):
56ba69e4 341 if sys.platform == 'darwin':
982ee69a 342 return MacChromeCookieDecryptor(browser_keyring_name, logger)
56ba69e4 343 elif sys.platform in ('win32', 'cygwin'):
982ee69a 344 return WindowsChromeCookieDecryptor(browser_root, logger)
56ba69e4 345 return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring)
982ee69a
MB
346
347
348class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
f59f5ef8 349 def __init__(self, browser_keyring_name, logger, *, keyring=None):
982ee69a
MB
350 self._logger = logger
351 self._v10_key = self.derive_key(b'peanuts')
f59f5ef8
MB
352 password = _get_linux_keyring_password(browser_keyring_name, keyring, logger)
353 self._v11_key = None if password is None else self.derive_key(password)
354 self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
982ee69a
MB
355
356 @staticmethod
357 def derive_key(password):
358 # values from
359 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_linux.cc
360 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16)
361
362 def decrypt(self, encrypted_value):
363 version = encrypted_value[:3]
364 ciphertext = encrypted_value[3:]
365
366 if version == b'v10':
f59f5ef8 367 self._cookie_counts['v10'] += 1
982ee69a
MB
368 return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
369
370 elif version == b'v11':
f59f5ef8 371 self._cookie_counts['v11'] += 1
982ee69a 372 if self._v11_key is None:
f59f5ef8 373 self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
982ee69a
MB
374 return None
375 return _decrypt_aes_cbc(ciphertext, self._v11_key, self._logger)
376
377 else:
f59f5ef8 378 self._cookie_counts['other'] += 1
982ee69a
MB
379 return None
380
381
382class MacChromeCookieDecryptor(ChromeCookieDecryptor):
383 def __init__(self, browser_keyring_name, logger):
384 self._logger = logger
f440b14f 385 password = _get_mac_keyring_password(browser_keyring_name, logger)
982ee69a 386 self._v10_key = None if password is None else self.derive_key(password)
f59f5ef8 387 self._cookie_counts = {'v10': 0, 'other': 0}
982ee69a
MB
388
389 @staticmethod
390 def derive_key(password):
391 # values from
392 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
393 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16)
394
395 def decrypt(self, encrypted_value):
396 version = encrypted_value[:3]
397 ciphertext = encrypted_value[3:]
398
399 if version == b'v10':
f59f5ef8 400 self._cookie_counts['v10'] += 1
982ee69a
MB
401 if self._v10_key is None:
402 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
403 return None
404
405 return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
406
407 else:
f59f5ef8 408 self._cookie_counts['other'] += 1
982ee69a
MB
409 # other prefixes are considered 'old data' which were stored as plaintext
410 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
411 return encrypted_value
412
413
414class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
415 def __init__(self, browser_root, logger):
416 self._logger = logger
417 self._v10_key = _get_windows_v10_key(browser_root, logger)
f59f5ef8
MB
418 self._cookie_counts = {'v10': 0, 'other': 0}
419
982ee69a
MB
420 def decrypt(self, encrypted_value):
421 version = encrypted_value[:3]
422 ciphertext = encrypted_value[3:]
423
424 if version == b'v10':
f59f5ef8 425 self._cookie_counts['v10'] += 1
982ee69a
MB
426 if self._v10_key is None:
427 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
428 return None
982ee69a
MB
429
430 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
431 # kNonceLength
432 nonce_length = 96 // 8
433 # boringssl
434 # EVP_AEAD_AES_GCM_TAG_LEN
435 authentication_tag_length = 16
436
437 raw_ciphertext = ciphertext
438 nonce = raw_ciphertext[:nonce_length]
439 ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length]
440 authentication_tag = raw_ciphertext[-authentication_tag_length:]
441
442 return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger)
443
444 else:
f59f5ef8 445 self._cookie_counts['other'] += 1
982ee69a
MB
446 # any other prefix means the data is DPAPI encrypted
447 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
0f06bcd7 448 return _decrypt_windows_dpapi(encrypted_value, self._logger).decode()
982ee69a
MB
449
450
451def _extract_safari_cookies(profile, logger):
452 if profile is not None:
453 logger.error('safari does not support profiles')
454 if sys.platform != 'darwin':
86e5f3ed 455 raise ValueError(f'unsupported platform: {sys.platform}')
982ee69a
MB
456
457 cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')
458
459 if not os.path.isfile(cookies_path):
1f7db853
MP
460 logger.debug('Trying secondary cookie location')
461 cookies_path = os.path.expanduser('~/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies')
462 if not os.path.isfile(cookies_path):
463 raise FileNotFoundError('could not find safari cookies database')
982ee69a
MB
464
465 with open(cookies_path, 'rb') as f:
466 cookies_data = f.read()
467
468 jar = parse_safari_cookies(cookies_data, logger=logger)
86e5f3ed 469 logger.info(f'Extracted {len(jar)} cookies from safari')
982ee69a
MB
470 return jar
471
472
473class ParserError(Exception):
474 pass
475
476
477class DataParser:
478 def __init__(self, data, logger):
479 self._data = data
480 self.cursor = 0
481 self._logger = logger
482
483 def read_bytes(self, num_bytes):
484 if num_bytes < 0:
86e5f3ed 485 raise ParserError(f'invalid read of {num_bytes} bytes')
982ee69a
MB
486 end = self.cursor + num_bytes
487 if end > len(self._data):
488 raise ParserError('reached end of input')
489 data = self._data[self.cursor:end]
490 self.cursor = end
491 return data
492
493 def expect_bytes(self, expected_value, message):
494 value = self.read_bytes(len(expected_value))
495 if value != expected_value:
86e5f3ed 496 raise ParserError(f'unexpected value: {value} != {expected_value} ({message})')
982ee69a
MB
497
498 def read_uint(self, big_endian=False):
499 data_format = '>I' if big_endian else '<I'
500 return struct.unpack(data_format, self.read_bytes(4))[0]
501
502 def read_double(self, big_endian=False):
503 data_format = '>d' if big_endian else '<d'
504 return struct.unpack(data_format, self.read_bytes(8))[0]
505
506 def read_cstring(self):
507 buffer = []
508 while True:
509 c = self.read_bytes(1)
510 if c == b'\x00':
0f06bcd7 511 return b''.join(buffer).decode()
982ee69a
MB
512 else:
513 buffer.append(c)
514
515 def skip(self, num_bytes, description='unknown'):
516 if num_bytes > 0:
19a03940 517 self._logger.debug(f'skipping {num_bytes} bytes ({description}): {self.read_bytes(num_bytes)!r}')
982ee69a 518 elif num_bytes < 0:
86e5f3ed 519 raise ParserError(f'invalid skip of {num_bytes} bytes')
982ee69a
MB
520
521 def skip_to(self, offset, description='unknown'):
522 self.skip(offset - self.cursor, description)
523
524 def skip_to_end(self, description='unknown'):
525 self.skip_to(len(self._data), description)
526
527
528def _mac_absolute_time_to_posix(timestamp):
529 return int((datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc) + timedelta(seconds=timestamp)).timestamp())
530
531
532def _parse_safari_cookies_header(data, logger):
533 p = DataParser(data, logger)
534 p.expect_bytes(b'cook', 'database signature')
535 number_of_pages = p.read_uint(big_endian=True)
536 page_sizes = [p.read_uint(big_endian=True) for _ in range(number_of_pages)]
537 return page_sizes, p.cursor
538
539
540def _parse_safari_cookies_page(data, jar, logger):
541 p = DataParser(data, logger)
542 p.expect_bytes(b'\x00\x00\x01\x00', 'page signature')
543 number_of_cookies = p.read_uint()
544 record_offsets = [p.read_uint() for _ in range(number_of_cookies)]
545 if number_of_cookies == 0:
86e5f3ed 546 logger.debug(f'a cookies page of size {len(data)} has no cookies')
982ee69a
MB
547 return
548
549 p.skip_to(record_offsets[0], 'unknown page header field')
550
97ec5bc5 551 with _create_progress_bar(logger) as progress_bar:
552 for i, record_offset in enumerate(record_offsets):
553 progress_bar.print(f'Loading cookie {i: 6d}/{number_of_cookies: 6d}')
554 p.skip_to(record_offset, 'space between records')
555 record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger)
556 p.read_bytes(record_length)
982ee69a
MB
557 p.skip_to_end('space in between pages')
558
559
560def _parse_safari_cookies_record(data, jar, logger):
561 p = DataParser(data, logger)
562 record_size = p.read_uint()
563 p.skip(4, 'unknown record field 1')
564 flags = p.read_uint()
565 is_secure = bool(flags & 0x0001)
566 p.skip(4, 'unknown record field 2')
567 domain_offset = p.read_uint()
568 name_offset = p.read_uint()
569 path_offset = p.read_uint()
570 value_offset = p.read_uint()
571 p.skip(8, 'unknown record field 3')
572 expiration_date = _mac_absolute_time_to_posix(p.read_double())
573 _creation_date = _mac_absolute_time_to_posix(p.read_double()) # noqa: F841
574
575 try:
576 p.skip_to(domain_offset)
577 domain = p.read_cstring()
578
579 p.skip_to(name_offset)
580 name = p.read_cstring()
581
582 p.skip_to(path_offset)
583 path = p.read_cstring()
584
585 p.skip_to(value_offset)
586 value = p.read_cstring()
587 except UnicodeDecodeError:
f440b14f 588 logger.warning('failed to parse Safari cookie because UTF-8 decoding failed', only_once=True)
982ee69a
MB
589 return record_size
590
591 p.skip_to(record_size, 'space at the end of the record')
592
ac668111 593 cookie = http.cookiejar.Cookie(
982ee69a
MB
594 version=0, name=name, value=value, port=None, port_specified=False,
595 domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'),
596 path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False,
597 comment=None, comment_url=None, rest={})
598 jar.set_cookie(cookie)
599 return record_size
600
601
602def parse_safari_cookies(data, jar=None, logger=YDLLogger()):
603 """
604 References:
605 - https://github.com/libyal/dtformats/blob/main/documentation/Safari%20Cookies.asciidoc
606 - this data appears to be out of date but the important parts of the database structure is the same
607 - there are a few bytes here and there which are skipped during parsing
608 """
609 if jar is None:
610 jar = YoutubeDLCookieJar()
611 page_sizes, body_start = _parse_safari_cookies_header(data, logger)
612 p = DataParser(data[body_start:], logger)
613 for page_size in page_sizes:
614 _parse_safari_cookies_page(p.read_bytes(page_size), jar, logger)
615 p.skip_to_end('footer')
616 return jar
617
618
f59f5ef8
MB
619class _LinuxDesktopEnvironment(Enum):
620 """
621 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.h
622 DesktopEnvironment
623 """
624 OTHER = auto()
625 CINNAMON = auto()
626 GNOME = auto()
627 KDE = auto()
628 PANTHEON = auto()
629 UNITY = auto()
630 XFCE = auto()
982ee69a
MB
631
632
f59f5ef8
MB
633class _LinuxKeyring(Enum):
634 """
635 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.h
636 SelectedLinuxBackend
637 """
638 KWALLET = auto()
639 GNOMEKEYRING = auto()
640 BASICTEXT = auto()
641
642
643SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys()
644
645
646def _get_linux_desktop_environment(env):
647 """
648 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc
649 GetDesktopEnvironment
650 """
651 xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
652 desktop_session = env.get('DESKTOP_SESSION', None)
653 if xdg_current_desktop is not None:
654 xdg_current_desktop = xdg_current_desktop.split(':')[0].strip()
655
656 if xdg_current_desktop == 'Unity':
657 if desktop_session is not None and 'gnome-fallback' in desktop_session:
658 return _LinuxDesktopEnvironment.GNOME
659 else:
660 return _LinuxDesktopEnvironment.UNITY
661 elif xdg_current_desktop == 'GNOME':
662 return _LinuxDesktopEnvironment.GNOME
663 elif xdg_current_desktop == 'X-Cinnamon':
664 return _LinuxDesktopEnvironment.CINNAMON
665 elif xdg_current_desktop == 'KDE':
666 return _LinuxDesktopEnvironment.KDE
667 elif xdg_current_desktop == 'Pantheon':
668 return _LinuxDesktopEnvironment.PANTHEON
669 elif xdg_current_desktop == 'XFCE':
670 return _LinuxDesktopEnvironment.XFCE
671 elif desktop_session is not None:
672 if desktop_session in ('mate', 'gnome'):
673 return _LinuxDesktopEnvironment.GNOME
674 elif 'kde' in desktop_session:
675 return _LinuxDesktopEnvironment.KDE
676 elif 'xfce' in desktop_session:
677 return _LinuxDesktopEnvironment.XFCE
678 else:
679 if 'GNOME_DESKTOP_SESSION_ID' in env:
680 return _LinuxDesktopEnvironment.GNOME
681 elif 'KDE_FULL_SESSION' in env:
682 return _LinuxDesktopEnvironment.KDE
fa8fd951 683 return _LinuxDesktopEnvironment.OTHER
f59f5ef8
MB
684
685
686def _choose_linux_keyring(logger):
687 """
688 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.cc
689 SelectBackend
690 """
691 desktop_environment = _get_linux_desktop_environment(os.environ)
86e5f3ed 692 logger.debug(f'detected desktop environment: {desktop_environment.name}')
f59f5ef8
MB
693 if desktop_environment == _LinuxDesktopEnvironment.KDE:
694 linux_keyring = _LinuxKeyring.KWALLET
695 elif desktop_environment == _LinuxDesktopEnvironment.OTHER:
696 linux_keyring = _LinuxKeyring.BASICTEXT
982ee69a 697 else:
f59f5ef8
MB
698 linux_keyring = _LinuxKeyring.GNOMEKEYRING
699 return linux_keyring
700
701
702def _get_kwallet_network_wallet(logger):
703 """ The name of the wallet used to store network passwords.
704
705 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/kwallet_dbus.cc
706 KWalletDBus::NetworkWallet
707 which does a dbus call to the following function:
708 https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html
709 Wallet::NetworkWallet
710 """
711 default_wallet = 'kdewallet'
712 try:
f0c9fb96 713 stdout, _, returncode = Popen.run([
f59f5ef8
MB
714 'dbus-send', '--session', '--print-reply=literal',
715 '--dest=org.kde.kwalletd5',
716 '/modules/kwalletd5',
717 'org.kde.KWallet.networkWallet'
f0c9fb96 718 ], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
f59f5ef8 719
f0c9fb96 720 if returncode:
f59f5ef8
MB
721 logger.warning('failed to read NetworkWallet')
722 return default_wallet
723 else:
f0c9fb96 724 logger.debug(f'NetworkWallet = "{stdout.strip()}"')
725 return stdout.strip()
a44ca5a4 726 except Exception as e:
86e5f3ed 727 logger.warning(f'exception while obtaining NetworkWallet: {e}')
f59f5ef8
MB
728 return default_wallet
729
730
731def _get_kwallet_password(browser_keyring_name, logger):
732 logger.debug('using kwallet-query to obtain password from kwallet')
733
734 if shutil.which('kwallet-query') is None:
735 logger.error('kwallet-query command not found. KWallet and kwallet-query '
736 'must be installed to read from KWallet. kwallet-query should be'
737 'included in the kwallet package for your distribution')
738 return b''
739
740 network_wallet = _get_kwallet_network_wallet(logger)
741
742 try:
f0c9fb96 743 stdout, _, returncode = Popen.run([
f59f5ef8 744 'kwallet-query',
86e5f3ed 745 '--read-password', f'{browser_keyring_name} Safe Storage',
746 '--folder', f'{browser_keyring_name} Keys',
f59f5ef8
MB
747 network_wallet
748 ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
749
f0c9fb96 750 if returncode:
751 logger.error(f'kwallet-query failed with return code {returncode}. '
752 'Please consult the kwallet-query man page for details')
f59f5ef8
MB
753 return b''
754 else:
755 if stdout.lower().startswith(b'failed to read'):
756 logger.debug('failed to read password from kwallet. Using empty string instead')
757 # this sometimes occurs in KDE because chrome does not check hasEntry and instead
758 # just tries to read the value (which kwallet returns "") whereas kwallet-query
759 # checks hasEntry. To verify this:
760 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
761 # while starting chrome.
762 # this may be a bug as the intended behaviour is to generate a random password and store
763 # it, but that doesn't matter here.
764 return b''
765 else:
766 logger.debug('password found')
f0c9fb96 767 return stdout.rstrip(b'\n')
a44ca5a4 768 except Exception as e:
769 logger.warning(f'exception running kwallet-query: {error_to_str(e)}')
f59f5ef8
MB
770 return b''
771
772
773def _get_gnome_keyring_password(browser_keyring_name, logger):
9b8ee23b 774 if not secretstorage:
775 logger.error(f'secretstorage not available {_SECRETSTORAGE_UNAVAILABLE_REASON}')
f59f5ef8
MB
776 return b''
777 # the Gnome keyring does not seem to organise keys in the same way as KWallet,
778 # using `dbus-monitor` during startup, it can be observed that chromium lists all keys
779 # and presumably searches for its key in the list. It appears that we must do the same.
780 # https://github.com/jaraco/keyring/issues/556
781 with contextlib.closing(secretstorage.dbus_init()) as con:
782 col = secretstorage.get_default_collection(con)
783 for item in col.get_all_items():
86e5f3ed 784 if item.get_label() == f'{browser_keyring_name} Safe Storage':
f59f5ef8
MB
785 return item.get_secret()
786 else:
787 logger.error('failed to read from keyring')
788 return b''
789
790
791def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
792 # note: chrome/chromium can be run with the following flags to determine which keyring backend
793 # it has chosen to use
794 # chromium --enable-logging=stderr --v=1 2>&1 | grep key_storage_
795 # Chromium supports a flag: --password-store=<basic|gnome|kwallet> so the automatic detection
796 # will not be sufficient in all cases.
797
2c539d49 798 keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger)
f59f5ef8
MB
799 logger.debug(f'Chosen keyring: {keyring.name}')
800
801 if keyring == _LinuxKeyring.KWALLET:
802 return _get_kwallet_password(browser_keyring_name, logger)
803 elif keyring == _LinuxKeyring.GNOMEKEYRING:
804 return _get_gnome_keyring_password(browser_keyring_name, logger)
805 elif keyring == _LinuxKeyring.BASICTEXT:
806 # when basic text is chosen, all cookies are stored as v10 (so no keyring password is required)
807 return None
808 assert False, f'Unknown keyring {keyring}'
809
810
811def _get_mac_keyring_password(browser_keyring_name, logger):
812 logger.debug('using find-generic-password to obtain password from OSX keychain')
813 try:
f0c9fb96 814 stdout, _, _ = Popen.run(
d3c93ec2 815 ['security', 'find-generic-password',
816 '-w', # write password to stdout
817 '-a', browser_keyring_name, # match 'account'
86e5f3ed 818 '-s', f'{browser_keyring_name} Safe Storage'], # match 'service'
d3c93ec2 819 stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
f0c9fb96 820 return stdout.rstrip(b'\n')
a44ca5a4 821 except Exception as e:
822 logger.warning(f'exception running find-generic-password: {error_to_str(e)}')
f59f5ef8 823 return None
982ee69a
MB
824
825
826def _get_windows_v10_key(browser_root, logger):
97ec5bc5 827 path = _find_most_recently_used_file(browser_root, 'Local State', logger)
982ee69a
MB
828 if path is None:
829 logger.error('could not find local state file')
830 return None
97ec5bc5 831 logger.debug(f'Found local state file at "{path}"')
86e5f3ed 832 with open(path, encoding='utf8') as f:
982ee69a
MB
833 data = json.load(f)
834 try:
835 base64_key = data['os_crypt']['encrypted_key']
836 except KeyError:
837 logger.error('no encrypted key in Local State')
838 return None
14f25df2 839 encrypted_key = base64.b64decode(base64_key)
982ee69a
MB
840 prefix = b'DPAPI'
841 if not encrypted_key.startswith(prefix):
842 logger.error('invalid key')
843 return None
844 return _decrypt_windows_dpapi(encrypted_key[len(prefix):], logger)
845
846
847def pbkdf2_sha1(password, salt, iterations, key_length):
848 return pbkdf2_hmac('sha1', password, salt, iterations, key_length)
849
850
851def _decrypt_aes_cbc(ciphertext, key, logger, initialization_vector=b' ' * 16):
1d3586d0 852 plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
982ee69a 853 try:
0f06bcd7 854 return plaintext.decode()
982ee69a 855 except UnicodeDecodeError:
f440b14f 856 logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
857 return None
858
859
860def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
982ee69a 861 try:
09906f55 862 plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce)
982ee69a 863 except ValueError:
f440b14f 864 logger.warning('failed to decrypt cookie (AES-GCM) because the MAC check failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
865 return None
866
867 try:
0f06bcd7 868 return plaintext.decode()
982ee69a 869 except UnicodeDecodeError:
f440b14f 870 logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
871 return None
872
873
874def _decrypt_windows_dpapi(ciphertext, logger):
875 """
876 References:
877 - https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata
878 """
879 from ctypes.wintypes import DWORD
880
881 class DATA_BLOB(ctypes.Structure):
882 _fields_ = [('cbData', DWORD),
883 ('pbData', ctypes.POINTER(ctypes.c_char))]
884
885 buffer = ctypes.create_string_buffer(ciphertext)
886 blob_in = DATA_BLOB(ctypes.sizeof(buffer), buffer)
887 blob_out = DATA_BLOB()
888 ret = ctypes.windll.crypt32.CryptUnprotectData(
889 ctypes.byref(blob_in), # pDataIn
890 None, # ppszDataDescr: human readable description of pDataIn
891 None, # pOptionalEntropy: salt?
892 None, # pvReserved: must be NULL
893 None, # pPromptStruct: information about prompts to display
894 0, # dwFlags
895 ctypes.byref(blob_out) # pDataOut
896 )
897 if not ret:
f9be9cb9 898 logger.warning('failed to decrypt with DPAPI', only_once=True)
982ee69a
MB
899 return None
900
901 result = ctypes.string_at(blob_out.pbData, blob_out.cbData)
902 ctypes.windll.kernel32.LocalFree(blob_out.pbData)
903 return result
904
905
906def _config_home():
907 return os.environ.get('XDG_CONFIG_HOME', os.path.expanduser('~/.config'))
908
909
910def _open_database_copy(database_path, tmpdir):
911 # cannot open sqlite databases if they are already in use (e.g. by the browser)
912 database_copy_path = os.path.join(tmpdir, 'temporary.sqlite')
913 shutil.copy(database_path, database_copy_path)
914 conn = sqlite3.connect(database_copy_path)
915 return conn.cursor()
916
917
918def _get_column_names(cursor, table_name):
86e5f3ed 919 table_info = cursor.execute(f'PRAGMA table_info({table_name})').fetchall()
0f06bcd7 920 return [row[1].decode() for row in table_info]
982ee69a
MB
921
922
97ec5bc5 923def _find_most_recently_used_file(root, filename, logger):
982ee69a 924 # if there are multiple browser profiles, take the most recently used one
97ec5bc5 925 i, paths = 0, []
926 with _create_progress_bar(logger) as progress_bar:
927 for curr_root, dirs, files in os.walk(root):
928 for file in files:
929 i += 1
930 progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched')
931 if file == filename:
932 paths.append(os.path.join(curr_root, file))
982ee69a
MB
933 return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime)
934
935
936def _merge_cookie_jars(jars):
937 output_jar = YoutubeDLCookieJar()
938 for jar in jars:
939 for cookie in jar:
940 output_jar.set_cookie(cookie)
941 if jar.filename is not None:
942 output_jar.filename = jar.filename
943 return output_jar
944
945
946def _is_path(value):
947 return os.path.sep in value
948
949
f59f5ef8 950def _parse_browser_specification(browser_name, profile=None, keyring=None):
982ee69a
MB
951 if browser_name not in SUPPORTED_BROWSERS:
952 raise ValueError(f'unsupported browser: "{browser_name}"')
f59f5ef8
MB
953 if keyring not in (None, *SUPPORTED_KEYRINGS):
954 raise ValueError(f'unsupported keyring: "{keyring}"')
982ee69a
MB
955 if profile is not None and _is_path(profile):
956 profile = os.path.expanduser(profile)
f59f5ef8 957 return browser_name, profile, keyring