]> jfr.im git - yt-dlp.git/blame - yt_dlp/cookies.py
[cleanup] Minor fixes (See desc)
[yt-dlp.git] / yt_dlp / cookies.py
CommitLineData
f59f5ef8 1import contextlib
982ee69a
MB
2import ctypes
3import json
4import os
5import shutil
982ee69a
MB
6import struct
7import subprocess
8import sys
9import tempfile
10from datetime import datetime, timedelta, timezone
f59f5ef8 11from enum import Enum, auto
982ee69a
MB
12from hashlib import pbkdf2_hmac
13
1d3586d0 14from .aes import (
15 aes_cbc_decrypt_bytes,
16 aes_gcm_decrypt_and_verify_bytes,
17 unpad_pkcs7,
18)
f8271158 19from .compat import compat_b64decode, compat_cookiejar_Cookie
9b8ee23b 20from .dependencies import (
21 _SECRETSTORAGE_UNAVAILABLE_REASON,
22 secretstorage,
23 sqlite3,
24)
97ec5bc5 25from .minicurses import MultilinePrinter, QuietMultilinePrinter
f8271158 26from .utils import Popen, YoutubeDLCookieJar, error_to_str, expand_path
982ee69a 27
982ee69a
MB
28CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
29SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
30
31
32class YDLLogger:
33 def __init__(self, ydl=None):
34 self._ydl = ydl
35
36 def debug(self, message):
37 if self._ydl:
38 self._ydl.write_debug(message)
39
40 def info(self, message):
41 if self._ydl:
42 self._ydl.to_screen(f'[Cookies] {message}')
43
44 def warning(self, message, only_once=False):
45 if self._ydl:
46 self._ydl.report_warning(message, only_once)
47
48 def error(self, message):
49 if self._ydl:
50 self._ydl.report_error(message)
51
97ec5bc5 52 def progress_bar(self):
53 """Return a context manager with a print method. (Optional)"""
54 # Do not print to files/pipes, loggers, or when --no-progress is used
55 if not self._ydl or self._ydl.params.get('noprogress') or self._ydl.params.get('logger'):
56 return
57 file = self._ydl._out_files['error']
58 try:
59 if not file.isatty():
60 return
61 except BaseException:
62 return
63
64 printer = MultilinePrinter(file, preserve_output=False)
65 printer.print = lambda message: printer.print_at_line(f'[Cookies] {message}', 0)
66 return printer
67
68
69def _create_progress_bar(logger):
70 if hasattr(logger, 'progress_bar'):
71 printer = logger.progress_bar()
72 if printer:
73 return printer
74 printer = QuietMultilinePrinter()
75 printer.print = lambda _: None
76 return printer
77
982ee69a
MB
78
79def load_cookies(cookie_file, browser_specification, ydl):
80 cookie_jars = []
81 if browser_specification is not None:
f59f5ef8
MB
82 browser_name, profile, keyring = _parse_browser_specification(*browser_specification)
83 cookie_jars.append(extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring))
982ee69a
MB
84
85 if cookie_file is not None:
86 cookie_file = expand_path(cookie_file)
87 jar = YoutubeDLCookieJar(cookie_file)
88 if os.access(cookie_file, os.R_OK):
89 jar.load(ignore_discard=True, ignore_expires=True)
90 cookie_jars.append(jar)
91
92 return _merge_cookie_jars(cookie_jars)
93
94
f59f5ef8 95def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None):
982ee69a
MB
96 if browser_name == 'firefox':
97 return _extract_firefox_cookies(profile, logger)
98 elif browser_name == 'safari':
99 return _extract_safari_cookies(profile, logger)
100 elif browser_name in CHROMIUM_BASED_BROWSERS:
f59f5ef8 101 return _extract_chrome_cookies(browser_name, profile, keyring, logger)
982ee69a 102 else:
86e5f3ed 103 raise ValueError(f'unknown browser: {browser_name}')
982ee69a
MB
104
105
106def _extract_firefox_cookies(profile, logger):
107 logger.info('Extracting cookies from firefox')
9b8ee23b 108 if not sqlite3:
767b02a9
MB
109 logger.warning('Cannot extract cookies from firefox without sqlite3 support. '
110 'Please use a python interpreter compiled with sqlite3 support')
111 return YoutubeDLCookieJar()
982ee69a
MB
112
113 if profile is None:
114 search_root = _firefox_browser_dir()
115 elif _is_path(profile):
116 search_root = profile
117 else:
118 search_root = os.path.join(_firefox_browser_dir(), profile)
119
97ec5bc5 120 cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite', logger)
982ee69a 121 if cookie_database_path is None:
86e5f3ed 122 raise FileNotFoundError(f'could not find firefox cookies database in {search_root}')
123 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
982ee69a 124
0930b11f 125 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
982ee69a
MB
126 cursor = None
127 try:
128 cursor = _open_database_copy(cookie_database_path, tmpdir)
129 cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
130 jar = YoutubeDLCookieJar()
97ec5bc5 131 with _create_progress_bar(logger) as progress_bar:
132 table = cursor.fetchall()
133 total_cookie_count = len(table)
134 for i, (host, name, value, path, expiry, is_secure) in enumerate(table):
135 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
136 cookie = compat_cookiejar_Cookie(
137 version=0, name=name, value=value, port=None, port_specified=False,
138 domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'),
139 path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False,
140 comment=None, comment_url=None, rest={})
141 jar.set_cookie(cookie)
86e5f3ed 142 logger.info(f'Extracted {len(jar)} cookies from firefox')
982ee69a
MB
143 return jar
144 finally:
145 if cursor is not None:
146 cursor.connection.close()
147
148
149def _firefox_browser_dir():
150 if sys.platform in ('linux', 'linux2'):
151 return os.path.expanduser('~/.mozilla/firefox')
152 elif sys.platform == 'win32':
19a03940 153 return os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles')
982ee69a
MB
154 elif sys.platform == 'darwin':
155 return os.path.expanduser('~/Library/Application Support/Firefox')
156 else:
86e5f3ed 157 raise ValueError(f'unsupported platform: {sys.platform}')
982ee69a
MB
158
159
160def _get_chromium_based_browser_settings(browser_name):
161 # https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md
162 if sys.platform in ('linux', 'linux2'):
163 config = _config_home()
164 browser_dir = {
165 'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'),
166 'chrome': os.path.join(config, 'google-chrome'),
167 'chromium': os.path.join(config, 'chromium'),
168 'edge': os.path.join(config, 'microsoft-edge'),
169 'opera': os.path.join(config, 'opera'),
170 'vivaldi': os.path.join(config, 'vivaldi'),
171 }[browser_name]
172
173 elif sys.platform == 'win32':
174 appdata_local = os.path.expandvars('%LOCALAPPDATA%')
175 appdata_roaming = os.path.expandvars('%APPDATA%')
176 browser_dir = {
19a03940 177 'brave': os.path.join(appdata_local, R'BraveSoftware\Brave-Browser\User Data'),
178 'chrome': os.path.join(appdata_local, R'Google\Chrome\User Data'),
179 'chromium': os.path.join(appdata_local, R'Chromium\User Data'),
180 'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
181 'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
182 'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
982ee69a
MB
183 }[browser_name]
184
185 elif sys.platform == 'darwin':
186 appdata = os.path.expanduser('~/Library/Application Support')
187 browser_dir = {
188 'brave': os.path.join(appdata, 'BraveSoftware/Brave-Browser'),
189 'chrome': os.path.join(appdata, 'Google/Chrome'),
190 'chromium': os.path.join(appdata, 'Chromium'),
191 'edge': os.path.join(appdata, 'Microsoft Edge'),
192 'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
193 'vivaldi': os.path.join(appdata, 'Vivaldi'),
194 }[browser_name]
195
196 else:
86e5f3ed 197 raise ValueError(f'unsupported platform: {sys.platform}')
982ee69a
MB
198
199 # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
200 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
201 keyring_name = {
202 'brave': 'Brave',
203 'chrome': 'Chrome',
204 'chromium': 'Chromium',
29b208f6 205 'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
982ee69a
MB
206 'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
207 'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
208 }[browser_name]
209
210 browsers_without_profiles = {'opera'}
211
212 return {
213 'browser_dir': browser_dir,
214 'keyring_name': keyring_name,
215 'supports_profiles': browser_name not in browsers_without_profiles
216 }
217
218
f59f5ef8 219def _extract_chrome_cookies(browser_name, profile, keyring, logger):
86e5f3ed 220 logger.info(f'Extracting cookies from {browser_name}')
767b02a9 221
9b8ee23b 222 if not sqlite3:
19a03940 223 logger.warning(f'Cannot extract cookies from {browser_name} without sqlite3 support. '
224 'Please use a python interpreter compiled with sqlite3 support')
767b02a9
MB
225 return YoutubeDLCookieJar()
226
982ee69a
MB
227 config = _get_chromium_based_browser_settings(browser_name)
228
229 if profile is None:
230 search_root = config['browser_dir']
231 elif _is_path(profile):
232 search_root = profile
233 config['browser_dir'] = os.path.dirname(profile) if config['supports_profiles'] else profile
234 else:
235 if config['supports_profiles']:
236 search_root = os.path.join(config['browser_dir'], profile)
237 else:
86e5f3ed 238 logger.error(f'{browser_name} does not support profiles')
982ee69a
MB
239 search_root = config['browser_dir']
240
97ec5bc5 241 cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies', logger)
982ee69a 242 if cookie_database_path is None:
86e5f3ed 243 raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"')
244 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
982ee69a 245
f59f5ef8 246 decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring)
982ee69a 247
0930b11f 248 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
982ee69a
MB
249 cursor = None
250 try:
251 cursor = _open_database_copy(cookie_database_path, tmpdir)
252 cursor.connection.text_factory = bytes
253 column_names = _get_column_names(cursor, 'cookies')
254 secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
19a03940 255 cursor.execute(f'SELECT host_key, name, value, encrypted_value, path, expires_utc, {secure_column} FROM cookies')
982ee69a
MB
256 jar = YoutubeDLCookieJar()
257 failed_cookies = 0
f59f5ef8 258 unencrypted_cookies = 0
97ec5bc5 259 with _create_progress_bar(logger) as progress_bar:
260 table = cursor.fetchall()
261 total_cookie_count = len(table)
262 for i, line in enumerate(table):
263 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
264 is_encrypted, cookie = _process_chrome_cookie(decryptor, *line)
265 if not cookie:
982ee69a
MB
266 failed_cookies += 1
267 continue
97ec5bc5 268 elif not is_encrypted:
269 unencrypted_cookies += 1
270 jar.set_cookie(cookie)
982ee69a 271 if failed_cookies > 0:
86e5f3ed 272 failed_message = f' ({failed_cookies} could not be decrypted)'
982ee69a
MB
273 else:
274 failed_message = ''
86e5f3ed 275 logger.info(f'Extracted {len(jar)} cookies from {browser_name}{failed_message}')
f59f5ef8
MB
276 counts = decryptor.cookie_counts.copy()
277 counts['unencrypted'] = unencrypted_cookies
86e5f3ed 278 logger.debug(f'cookie version breakdown: {counts}')
982ee69a
MB
279 return jar
280 finally:
281 if cursor is not None:
282 cursor.connection.close()
283
284
97ec5bc5 285def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, path, expires_utc, is_secure):
0f06bcd7 286 host_key = host_key.decode()
287 name = name.decode()
288 value = value.decode()
289 path = path.decode()
97ec5bc5 290 is_encrypted = not value and encrypted_value
291
292 if is_encrypted:
293 value = decryptor.decrypt(encrypted_value)
294 if value is None:
295 return is_encrypted, None
296
297 return is_encrypted, compat_cookiejar_Cookie(
298 version=0, name=name, value=value, port=None, port_specified=False,
299 domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
300 path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False,
301 comment=None, comment_url=None, rest={})
302
303
982ee69a
MB
304class ChromeCookieDecryptor:
305 """
306 Overview:
307
308 Linux:
309 - cookies are either v10 or v11
310 - v10: AES-CBC encrypted with a fixed key
311 - v11: AES-CBC encrypted with an OS protected key (keyring)
312 - v11 keys can be stored in various places depending on the activate desktop environment [2]
313
314 Mac:
315 - cookies are either v10 or not v10
316 - v10: AES-CBC encrypted with an OS protected key (keyring) and more key derivation iterations than linux
317 - not v10: 'old data' stored as plaintext
318
319 Windows:
320 - cookies are either v10 or not v10
321 - v10: AES-GCM encrypted with a key which is encrypted with DPAPI
322 - not v10: encrypted with DPAPI
323
324 Sources:
325 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/
326 - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_linux.cc
327 - KeyStorageLinux::CreateService
328 """
329
330 def decrypt(self, encrypted_value):
19a03940 331 raise NotImplementedError('Must be implemented by sub classes')
982ee69a 332
f59f5ef8
MB
333 @property
334 def cookie_counts(self):
19a03940 335 raise NotImplementedError('Must be implemented by sub classes')
f59f5ef8 336
982ee69a 337
f59f5ef8 338def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None):
982ee69a 339 if sys.platform in ('linux', 'linux2'):
f59f5ef8 340 return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring)
982ee69a
MB
341 elif sys.platform == 'darwin':
342 return MacChromeCookieDecryptor(browser_keyring_name, logger)
343 elif sys.platform == 'win32':
344 return WindowsChromeCookieDecryptor(browser_root, logger)
345 else:
19a03940 346 raise NotImplementedError(f'Chrome cookie decryption is not supported on this platform: {sys.platform}')
982ee69a
MB
347
348
349class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
f59f5ef8 350 def __init__(self, browser_keyring_name, logger, *, keyring=None):
982ee69a
MB
351 self._logger = logger
352 self._v10_key = self.derive_key(b'peanuts')
f59f5ef8
MB
353 password = _get_linux_keyring_password(browser_keyring_name, keyring, logger)
354 self._v11_key = None if password is None else self.derive_key(password)
355 self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
982ee69a
MB
356
357 @staticmethod
358 def derive_key(password):
359 # values from
360 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_linux.cc
361 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16)
362
f59f5ef8
MB
363 @property
364 def cookie_counts(self):
365 return self._cookie_counts
366
982ee69a
MB
367 def decrypt(self, encrypted_value):
368 version = encrypted_value[:3]
369 ciphertext = encrypted_value[3:]
370
371 if version == b'v10':
f59f5ef8 372 self._cookie_counts['v10'] += 1
982ee69a
MB
373 return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
374
375 elif version == b'v11':
f59f5ef8 376 self._cookie_counts['v11'] += 1
982ee69a 377 if self._v11_key is None:
f59f5ef8 378 self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
982ee69a
MB
379 return None
380 return _decrypt_aes_cbc(ciphertext, self._v11_key, self._logger)
381
382 else:
f59f5ef8 383 self._cookie_counts['other'] += 1
982ee69a
MB
384 return None
385
386
387class MacChromeCookieDecryptor(ChromeCookieDecryptor):
388 def __init__(self, browser_keyring_name, logger):
389 self._logger = logger
f440b14f 390 password = _get_mac_keyring_password(browser_keyring_name, logger)
982ee69a 391 self._v10_key = None if password is None else self.derive_key(password)
f59f5ef8 392 self._cookie_counts = {'v10': 0, 'other': 0}
982ee69a
MB
393
394 @staticmethod
395 def derive_key(password):
396 # values from
397 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
398 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16)
399
f59f5ef8
MB
400 @property
401 def cookie_counts(self):
402 return self._cookie_counts
403
982ee69a
MB
404 def decrypt(self, encrypted_value):
405 version = encrypted_value[:3]
406 ciphertext = encrypted_value[3:]
407
408 if version == b'v10':
f59f5ef8 409 self._cookie_counts['v10'] += 1
982ee69a
MB
410 if self._v10_key is None:
411 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
412 return None
413
414 return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
415
416 else:
f59f5ef8 417 self._cookie_counts['other'] += 1
982ee69a
MB
418 # other prefixes are considered 'old data' which were stored as plaintext
419 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
420 return encrypted_value
421
422
423class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
424 def __init__(self, browser_root, logger):
425 self._logger = logger
426 self._v10_key = _get_windows_v10_key(browser_root, logger)
f59f5ef8
MB
427 self._cookie_counts = {'v10': 0, 'other': 0}
428
429 @property
430 def cookie_counts(self):
431 return self._cookie_counts
982ee69a
MB
432
433 def decrypt(self, encrypted_value):
434 version = encrypted_value[:3]
435 ciphertext = encrypted_value[3:]
436
437 if version == b'v10':
f59f5ef8 438 self._cookie_counts['v10'] += 1
982ee69a
MB
439 if self._v10_key is None:
440 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
441 return None
982ee69a
MB
442
443 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
444 # kNonceLength
445 nonce_length = 96 // 8
446 # boringssl
447 # EVP_AEAD_AES_GCM_TAG_LEN
448 authentication_tag_length = 16
449
450 raw_ciphertext = ciphertext
451 nonce = raw_ciphertext[:nonce_length]
452 ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length]
453 authentication_tag = raw_ciphertext[-authentication_tag_length:]
454
455 return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger)
456
457 else:
f59f5ef8 458 self._cookie_counts['other'] += 1
982ee69a
MB
459 # any other prefix means the data is DPAPI encrypted
460 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
0f06bcd7 461 return _decrypt_windows_dpapi(encrypted_value, self._logger).decode()
982ee69a
MB
462
463
464def _extract_safari_cookies(profile, logger):
465 if profile is not None:
466 logger.error('safari does not support profiles')
467 if sys.platform != 'darwin':
86e5f3ed 468 raise ValueError(f'unsupported platform: {sys.platform}')
982ee69a
MB
469
470 cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')
471
472 if not os.path.isfile(cookies_path):
1f7db853
MP
473 logger.debug('Trying secondary cookie location')
474 cookies_path = os.path.expanduser('~/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies')
475 if not os.path.isfile(cookies_path):
476 raise FileNotFoundError('could not find safari cookies database')
982ee69a
MB
477
478 with open(cookies_path, 'rb') as f:
479 cookies_data = f.read()
480
481 jar = parse_safari_cookies(cookies_data, logger=logger)
86e5f3ed 482 logger.info(f'Extracted {len(jar)} cookies from safari')
982ee69a
MB
483 return jar
484
485
486class ParserError(Exception):
487 pass
488
489
490class DataParser:
491 def __init__(self, data, logger):
492 self._data = data
493 self.cursor = 0
494 self._logger = logger
495
496 def read_bytes(self, num_bytes):
497 if num_bytes < 0:
86e5f3ed 498 raise ParserError(f'invalid read of {num_bytes} bytes')
982ee69a
MB
499 end = self.cursor + num_bytes
500 if end > len(self._data):
501 raise ParserError('reached end of input')
502 data = self._data[self.cursor:end]
503 self.cursor = end
504 return data
505
506 def expect_bytes(self, expected_value, message):
507 value = self.read_bytes(len(expected_value))
508 if value != expected_value:
86e5f3ed 509 raise ParserError(f'unexpected value: {value} != {expected_value} ({message})')
982ee69a
MB
510
511 def read_uint(self, big_endian=False):
512 data_format = '>I' if big_endian else '<I'
513 return struct.unpack(data_format, self.read_bytes(4))[0]
514
515 def read_double(self, big_endian=False):
516 data_format = '>d' if big_endian else '<d'
517 return struct.unpack(data_format, self.read_bytes(8))[0]
518
519 def read_cstring(self):
520 buffer = []
521 while True:
522 c = self.read_bytes(1)
523 if c == b'\x00':
0f06bcd7 524 return b''.join(buffer).decode()
982ee69a
MB
525 else:
526 buffer.append(c)
527
528 def skip(self, num_bytes, description='unknown'):
529 if num_bytes > 0:
19a03940 530 self._logger.debug(f'skipping {num_bytes} bytes ({description}): {self.read_bytes(num_bytes)!r}')
982ee69a 531 elif num_bytes < 0:
86e5f3ed 532 raise ParserError(f'invalid skip of {num_bytes} bytes')
982ee69a
MB
533
534 def skip_to(self, offset, description='unknown'):
535 self.skip(offset - self.cursor, description)
536
537 def skip_to_end(self, description='unknown'):
538 self.skip_to(len(self._data), description)
539
540
541def _mac_absolute_time_to_posix(timestamp):
542 return int((datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc) + timedelta(seconds=timestamp)).timestamp())
543
544
545def _parse_safari_cookies_header(data, logger):
546 p = DataParser(data, logger)
547 p.expect_bytes(b'cook', 'database signature')
548 number_of_pages = p.read_uint(big_endian=True)
549 page_sizes = [p.read_uint(big_endian=True) for _ in range(number_of_pages)]
550 return page_sizes, p.cursor
551
552
553def _parse_safari_cookies_page(data, jar, logger):
554 p = DataParser(data, logger)
555 p.expect_bytes(b'\x00\x00\x01\x00', 'page signature')
556 number_of_cookies = p.read_uint()
557 record_offsets = [p.read_uint() for _ in range(number_of_cookies)]
558 if number_of_cookies == 0:
86e5f3ed 559 logger.debug(f'a cookies page of size {len(data)} has no cookies')
982ee69a
MB
560 return
561
562 p.skip_to(record_offsets[0], 'unknown page header field')
563
97ec5bc5 564 with _create_progress_bar(logger) as progress_bar:
565 for i, record_offset in enumerate(record_offsets):
566 progress_bar.print(f'Loading cookie {i: 6d}/{number_of_cookies: 6d}')
567 p.skip_to(record_offset, 'space between records')
568 record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger)
569 p.read_bytes(record_length)
982ee69a
MB
570 p.skip_to_end('space in between pages')
571
572
573def _parse_safari_cookies_record(data, jar, logger):
574 p = DataParser(data, logger)
575 record_size = p.read_uint()
576 p.skip(4, 'unknown record field 1')
577 flags = p.read_uint()
578 is_secure = bool(flags & 0x0001)
579 p.skip(4, 'unknown record field 2')
580 domain_offset = p.read_uint()
581 name_offset = p.read_uint()
582 path_offset = p.read_uint()
583 value_offset = p.read_uint()
584 p.skip(8, 'unknown record field 3')
585 expiration_date = _mac_absolute_time_to_posix(p.read_double())
586 _creation_date = _mac_absolute_time_to_posix(p.read_double()) # noqa: F841
587
588 try:
589 p.skip_to(domain_offset)
590 domain = p.read_cstring()
591
592 p.skip_to(name_offset)
593 name = p.read_cstring()
594
595 p.skip_to(path_offset)
596 path = p.read_cstring()
597
598 p.skip_to(value_offset)
599 value = p.read_cstring()
600 except UnicodeDecodeError:
f440b14f 601 logger.warning('failed to parse Safari cookie because UTF-8 decoding failed', only_once=True)
982ee69a
MB
602 return record_size
603
604 p.skip_to(record_size, 'space at the end of the record')
605
606 cookie = compat_cookiejar_Cookie(
607 version=0, name=name, value=value, port=None, port_specified=False,
608 domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'),
609 path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False,
610 comment=None, comment_url=None, rest={})
611 jar.set_cookie(cookie)
612 return record_size
613
614
615def parse_safari_cookies(data, jar=None, logger=YDLLogger()):
616 """
617 References:
618 - https://github.com/libyal/dtformats/blob/main/documentation/Safari%20Cookies.asciidoc
619 - this data appears to be out of date but the important parts of the database structure is the same
620 - there are a few bytes here and there which are skipped during parsing
621 """
622 if jar is None:
623 jar = YoutubeDLCookieJar()
624 page_sizes, body_start = _parse_safari_cookies_header(data, logger)
625 p = DataParser(data[body_start:], logger)
626 for page_size in page_sizes:
627 _parse_safari_cookies_page(p.read_bytes(page_size), jar, logger)
628 p.skip_to_end('footer')
629 return jar
630
631
f59f5ef8
MB
632class _LinuxDesktopEnvironment(Enum):
633 """
634 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.h
635 DesktopEnvironment
636 """
637 OTHER = auto()
638 CINNAMON = auto()
639 GNOME = auto()
640 KDE = auto()
641 PANTHEON = auto()
642 UNITY = auto()
643 XFCE = auto()
982ee69a
MB
644
645
f59f5ef8
MB
646class _LinuxKeyring(Enum):
647 """
648 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.h
649 SelectedLinuxBackend
650 """
651 KWALLET = auto()
652 GNOMEKEYRING = auto()
653 BASICTEXT = auto()
654
655
656SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys()
657
658
659def _get_linux_desktop_environment(env):
660 """
661 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc
662 GetDesktopEnvironment
663 """
664 xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
665 desktop_session = env.get('DESKTOP_SESSION', None)
666 if xdg_current_desktop is not None:
667 xdg_current_desktop = xdg_current_desktop.split(':')[0].strip()
668
669 if xdg_current_desktop == 'Unity':
670 if desktop_session is not None and 'gnome-fallback' in desktop_session:
671 return _LinuxDesktopEnvironment.GNOME
672 else:
673 return _LinuxDesktopEnvironment.UNITY
674 elif xdg_current_desktop == 'GNOME':
675 return _LinuxDesktopEnvironment.GNOME
676 elif xdg_current_desktop == 'X-Cinnamon':
677 return _LinuxDesktopEnvironment.CINNAMON
678 elif xdg_current_desktop == 'KDE':
679 return _LinuxDesktopEnvironment.KDE
680 elif xdg_current_desktop == 'Pantheon':
681 return _LinuxDesktopEnvironment.PANTHEON
682 elif xdg_current_desktop == 'XFCE':
683 return _LinuxDesktopEnvironment.XFCE
684 elif desktop_session is not None:
685 if desktop_session in ('mate', 'gnome'):
686 return _LinuxDesktopEnvironment.GNOME
687 elif 'kde' in desktop_session:
688 return _LinuxDesktopEnvironment.KDE
689 elif 'xfce' in desktop_session:
690 return _LinuxDesktopEnvironment.XFCE
691 else:
692 if 'GNOME_DESKTOP_SESSION_ID' in env:
693 return _LinuxDesktopEnvironment.GNOME
694 elif 'KDE_FULL_SESSION' in env:
695 return _LinuxDesktopEnvironment.KDE
fa8fd951 696 return _LinuxDesktopEnvironment.OTHER
f59f5ef8
MB
697
698
699def _choose_linux_keyring(logger):
700 """
701 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.cc
702 SelectBackend
703 """
704 desktop_environment = _get_linux_desktop_environment(os.environ)
86e5f3ed 705 logger.debug(f'detected desktop environment: {desktop_environment.name}')
f59f5ef8
MB
706 if desktop_environment == _LinuxDesktopEnvironment.KDE:
707 linux_keyring = _LinuxKeyring.KWALLET
708 elif desktop_environment == _LinuxDesktopEnvironment.OTHER:
709 linux_keyring = _LinuxKeyring.BASICTEXT
982ee69a 710 else:
f59f5ef8
MB
711 linux_keyring = _LinuxKeyring.GNOMEKEYRING
712 return linux_keyring
713
714
715def _get_kwallet_network_wallet(logger):
716 """ The name of the wallet used to store network passwords.
717
718 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/kwallet_dbus.cc
719 KWalletDBus::NetworkWallet
720 which does a dbus call to the following function:
721 https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html
722 Wallet::NetworkWallet
723 """
724 default_wallet = 'kdewallet'
725 try:
726 proc = Popen([
727 'dbus-send', '--session', '--print-reply=literal',
728 '--dest=org.kde.kwalletd5',
729 '/modules/kwalletd5',
730 'org.kde.KWallet.networkWallet'
731 ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
732
733 stdout, stderr = proc.communicate_or_kill()
734 if proc.returncode != 0:
735 logger.warning('failed to read NetworkWallet')
736 return default_wallet
737 else:
0f06bcd7 738 network_wallet = stdout.decode().strip()
86e5f3ed 739 logger.debug(f'NetworkWallet = "{network_wallet}"')
f59f5ef8 740 return network_wallet
a44ca5a4 741 except Exception as e:
86e5f3ed 742 logger.warning(f'exception while obtaining NetworkWallet: {e}')
f59f5ef8
MB
743 return default_wallet
744
745
746def _get_kwallet_password(browser_keyring_name, logger):
747 logger.debug('using kwallet-query to obtain password from kwallet')
748
749 if shutil.which('kwallet-query') is None:
750 logger.error('kwallet-query command not found. KWallet and kwallet-query '
751 'must be installed to read from KWallet. kwallet-query should be'
752 'included in the kwallet package for your distribution')
753 return b''
754
755 network_wallet = _get_kwallet_network_wallet(logger)
756
757 try:
758 proc = Popen([
759 'kwallet-query',
86e5f3ed 760 '--read-password', f'{browser_keyring_name} Safe Storage',
761 '--folder', f'{browser_keyring_name} Keys',
f59f5ef8
MB
762 network_wallet
763 ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
764
765 stdout, stderr = proc.communicate_or_kill()
766 if proc.returncode != 0:
19a03940 767 logger.error(f'kwallet-query failed with return code {proc.returncode}. Please consult '
768 'the kwallet-query man page for details')
f59f5ef8
MB
769 return b''
770 else:
771 if stdout.lower().startswith(b'failed to read'):
772 logger.debug('failed to read password from kwallet. Using empty string instead')
773 # this sometimes occurs in KDE because chrome does not check hasEntry and instead
774 # just tries to read the value (which kwallet returns "") whereas kwallet-query
775 # checks hasEntry. To verify this:
776 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
777 # while starting chrome.
778 # this may be a bug as the intended behaviour is to generate a random password and store
779 # it, but that doesn't matter here.
780 return b''
781 else:
782 logger.debug('password found')
783 if stdout[-1:] == b'\n':
784 stdout = stdout[:-1]
785 return stdout
a44ca5a4 786 except Exception as e:
787 logger.warning(f'exception running kwallet-query: {error_to_str(e)}')
f59f5ef8
MB
788 return b''
789
790
791def _get_gnome_keyring_password(browser_keyring_name, logger):
9b8ee23b 792 if not secretstorage:
793 logger.error(f'secretstorage not available {_SECRETSTORAGE_UNAVAILABLE_REASON}')
f59f5ef8
MB
794 return b''
795 # the Gnome keyring does not seem to organise keys in the same way as KWallet,
796 # using `dbus-monitor` during startup, it can be observed that chromium lists all keys
797 # and presumably searches for its key in the list. It appears that we must do the same.
798 # https://github.com/jaraco/keyring/issues/556
799 with contextlib.closing(secretstorage.dbus_init()) as con:
800 col = secretstorage.get_default_collection(con)
801 for item in col.get_all_items():
86e5f3ed 802 if item.get_label() == f'{browser_keyring_name} Safe Storage':
f59f5ef8
MB
803 return item.get_secret()
804 else:
805 logger.error('failed to read from keyring')
806 return b''
807
808
809def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
810 # note: chrome/chromium can be run with the following flags to determine which keyring backend
811 # it has chosen to use
812 # chromium --enable-logging=stderr --v=1 2>&1 | grep key_storage_
813 # Chromium supports a flag: --password-store=<basic|gnome|kwallet> so the automatic detection
814 # will not be sufficient in all cases.
815
2c539d49 816 keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger)
f59f5ef8
MB
817 logger.debug(f'Chosen keyring: {keyring.name}')
818
819 if keyring == _LinuxKeyring.KWALLET:
820 return _get_kwallet_password(browser_keyring_name, logger)
821 elif keyring == _LinuxKeyring.GNOMEKEYRING:
822 return _get_gnome_keyring_password(browser_keyring_name, logger)
823 elif keyring == _LinuxKeyring.BASICTEXT:
824 # when basic text is chosen, all cookies are stored as v10 (so no keyring password is required)
825 return None
826 assert False, f'Unknown keyring {keyring}'
827
828
829def _get_mac_keyring_password(browser_keyring_name, logger):
830 logger.debug('using find-generic-password to obtain password from OSX keychain')
831 try:
d3c93ec2 832 proc = Popen(
833 ['security', 'find-generic-password',
834 '-w', # write password to stdout
835 '-a', browser_keyring_name, # match 'account'
86e5f3ed 836 '-s', f'{browser_keyring_name} Safe Storage'], # match 'service'
d3c93ec2 837 stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
f59f5ef8
MB
838
839 stdout, stderr = proc.communicate_or_kill()
840 if stdout[-1:] == b'\n':
841 stdout = stdout[:-1]
842 return stdout
a44ca5a4 843 except Exception as e:
844 logger.warning(f'exception running find-generic-password: {error_to_str(e)}')
f59f5ef8 845 return None
982ee69a
MB
846
847
848def _get_windows_v10_key(browser_root, logger):
97ec5bc5 849 path = _find_most_recently_used_file(browser_root, 'Local State', logger)
982ee69a
MB
850 if path is None:
851 logger.error('could not find local state file')
852 return None
97ec5bc5 853 logger.debug(f'Found local state file at "{path}"')
86e5f3ed 854 with open(path, encoding='utf8') as f:
982ee69a
MB
855 data = json.load(f)
856 try:
857 base64_key = data['os_crypt']['encrypted_key']
858 except KeyError:
859 logger.error('no encrypted key in Local State')
860 return None
861 encrypted_key = compat_b64decode(base64_key)
862 prefix = b'DPAPI'
863 if not encrypted_key.startswith(prefix):
864 logger.error('invalid key')
865 return None
866 return _decrypt_windows_dpapi(encrypted_key[len(prefix):], logger)
867
868
869def pbkdf2_sha1(password, salt, iterations, key_length):
870 return pbkdf2_hmac('sha1', password, salt, iterations, key_length)
871
872
873def _decrypt_aes_cbc(ciphertext, key, logger, initialization_vector=b' ' * 16):
1d3586d0 874 plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
982ee69a 875 try:
0f06bcd7 876 return plaintext.decode()
982ee69a 877 except UnicodeDecodeError:
f440b14f 878 logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
879 return None
880
881
882def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
982ee69a 883 try:
09906f55 884 plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce)
982ee69a 885 except ValueError:
f440b14f 886 logger.warning('failed to decrypt cookie (AES-GCM) because the MAC check failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
887 return None
888
889 try:
0f06bcd7 890 return plaintext.decode()
982ee69a 891 except UnicodeDecodeError:
f440b14f 892 logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
893 return None
894
895
896def _decrypt_windows_dpapi(ciphertext, logger):
897 """
898 References:
899 - https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata
900 """
901 from ctypes.wintypes import DWORD
902
903 class DATA_BLOB(ctypes.Structure):
904 _fields_ = [('cbData', DWORD),
905 ('pbData', ctypes.POINTER(ctypes.c_char))]
906
907 buffer = ctypes.create_string_buffer(ciphertext)
908 blob_in = DATA_BLOB(ctypes.sizeof(buffer), buffer)
909 blob_out = DATA_BLOB()
910 ret = ctypes.windll.crypt32.CryptUnprotectData(
911 ctypes.byref(blob_in), # pDataIn
912 None, # ppszDataDescr: human readable description of pDataIn
913 None, # pOptionalEntropy: salt?
914 None, # pvReserved: must be NULL
915 None, # pPromptStruct: information about prompts to display
916 0, # dwFlags
917 ctypes.byref(blob_out) # pDataOut
918 )
919 if not ret:
f9be9cb9 920 logger.warning('failed to decrypt with DPAPI', only_once=True)
982ee69a
MB
921 return None
922
923 result = ctypes.string_at(blob_out.pbData, blob_out.cbData)
924 ctypes.windll.kernel32.LocalFree(blob_out.pbData)
925 return result
926
927
928def _config_home():
929 return os.environ.get('XDG_CONFIG_HOME', os.path.expanduser('~/.config'))
930
931
932def _open_database_copy(database_path, tmpdir):
933 # cannot open sqlite databases if they are already in use (e.g. by the browser)
934 database_copy_path = os.path.join(tmpdir, 'temporary.sqlite')
935 shutil.copy(database_path, database_copy_path)
936 conn = sqlite3.connect(database_copy_path)
937 return conn.cursor()
938
939
940def _get_column_names(cursor, table_name):
86e5f3ed 941 table_info = cursor.execute(f'PRAGMA table_info({table_name})').fetchall()
0f06bcd7 942 return [row[1].decode() for row in table_info]
982ee69a
MB
943
944
97ec5bc5 945def _find_most_recently_used_file(root, filename, logger):
982ee69a 946 # if there are multiple browser profiles, take the most recently used one
97ec5bc5 947 i, paths = 0, []
948 with _create_progress_bar(logger) as progress_bar:
949 for curr_root, dirs, files in os.walk(root):
950 for file in files:
951 i += 1
952 progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched')
953 if file == filename:
954 paths.append(os.path.join(curr_root, file))
982ee69a
MB
955 return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime)
956
957
958def _merge_cookie_jars(jars):
959 output_jar = YoutubeDLCookieJar()
960 for jar in jars:
961 for cookie in jar:
962 output_jar.set_cookie(cookie)
963 if jar.filename is not None:
964 output_jar.filename = jar.filename
965 return output_jar
966
967
968def _is_path(value):
969 return os.path.sep in value
970
971
f59f5ef8 972def _parse_browser_specification(browser_name, profile=None, keyring=None):
982ee69a
MB
973 if browser_name not in SUPPORTED_BROWSERS:
974 raise ValueError(f'unsupported browser: "{browser_name}"')
f59f5ef8
MB
975 if keyring not in (None, *SUPPORTED_KEYRINGS):
976 raise ValueError(f'unsupported keyring: "{keyring}"')
982ee69a
MB
977 if profile is not None and _is_path(profile):
978 profile = os.path.expanduser(profile)
f59f5ef8 979 return browser_name, profile, keyring