]> jfr.im git - yt-dlp.git/blame - yt_dlp/cookies.py
[cleanup] Misc cleanup
[yt-dlp.git] / yt_dlp / cookies.py
CommitLineData
f59f5ef8 1import contextlib
982ee69a
MB
2import ctypes
3import json
4import os
5import shutil
982ee69a
MB
6import struct
7import subprocess
8import sys
9import tempfile
2e4585da 10import time
982ee69a 11from datetime import datetime, timedelta, timezone
f59f5ef8 12from enum import Enum, auto
982ee69a
MB
13from hashlib import pbkdf2_hmac
14
1d3586d0 15from .aes import (
16 aes_cbc_decrypt_bytes,
17 aes_gcm_decrypt_and_verify_bytes,
18 unpad_pkcs7,
19)
f8271158 20from .compat import compat_b64decode, compat_cookiejar_Cookie
9b8ee23b 21from .dependencies import (
22 _SECRETSTORAGE_UNAVAILABLE_REASON,
23 secretstorage,
24 sqlite3,
25)
97ec5bc5 26from .minicurses import MultilinePrinter, QuietMultilinePrinter
f8271158 27from .utils import Popen, YoutubeDLCookieJar, error_to_str, expand_path
982ee69a 28
982ee69a
MB
29CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
30SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
31
32
33class YDLLogger:
34 def __init__(self, ydl=None):
35 self._ydl = ydl
36
37 def debug(self, message):
38 if self._ydl:
39 self._ydl.write_debug(message)
40
41 def info(self, message):
42 if self._ydl:
43 self._ydl.to_screen(f'[Cookies] {message}')
44
45 def warning(self, message, only_once=False):
46 if self._ydl:
47 self._ydl.report_warning(message, only_once)
48
49 def error(self, message):
50 if self._ydl:
51 self._ydl.report_error(message)
52
2e4585da 53 class ProgressBar(MultilinePrinter):
54 _DELAY, _timer = 0.1, 0
55
56 def print(self, message):
57 if time.time() - self._timer > self._DELAY:
58 self.print_at_line(f'[Cookies] {message}', 0)
59 self._timer = time.time()
60
97ec5bc5 61 def progress_bar(self):
62 """Return a context manager with a print method. (Optional)"""
63 # Do not print to files/pipes, loggers, or when --no-progress is used
64 if not self._ydl or self._ydl.params.get('noprogress') or self._ydl.params.get('logger'):
65 return
66 file = self._ydl._out_files['error']
67 try:
68 if not file.isatty():
69 return
70 except BaseException:
71 return
2e4585da 72 return self.ProgressBar(file, preserve_output=False)
97ec5bc5 73
74
75def _create_progress_bar(logger):
76 if hasattr(logger, 'progress_bar'):
77 printer = logger.progress_bar()
78 if printer:
79 return printer
80 printer = QuietMultilinePrinter()
81 printer.print = lambda _: None
82 return printer
83
982ee69a
MB
84
85def load_cookies(cookie_file, browser_specification, ydl):
86 cookie_jars = []
87 if browser_specification is not None:
f59f5ef8
MB
88 browser_name, profile, keyring = _parse_browser_specification(*browser_specification)
89 cookie_jars.append(extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring))
982ee69a
MB
90
91 if cookie_file is not None:
d76fa1f3 92 is_filename = YoutubeDLCookieJar.is_path(cookie_file)
93 if is_filename:
94 cookie_file = expand_path(cookie_file)
95
982ee69a 96 jar = YoutubeDLCookieJar(cookie_file)
d76fa1f3 97 if not is_filename or os.access(cookie_file, os.R_OK):
982ee69a
MB
98 jar.load(ignore_discard=True, ignore_expires=True)
99 cookie_jars.append(jar)
100
101 return _merge_cookie_jars(cookie_jars)
102
103
f59f5ef8 104def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None):
982ee69a
MB
105 if browser_name == 'firefox':
106 return _extract_firefox_cookies(profile, logger)
107 elif browser_name == 'safari':
108 return _extract_safari_cookies(profile, logger)
109 elif browser_name in CHROMIUM_BASED_BROWSERS:
f59f5ef8 110 return _extract_chrome_cookies(browser_name, profile, keyring, logger)
982ee69a 111 else:
86e5f3ed 112 raise ValueError(f'unknown browser: {browser_name}')
982ee69a
MB
113
114
115def _extract_firefox_cookies(profile, logger):
116 logger.info('Extracting cookies from firefox')
9b8ee23b 117 if not sqlite3:
767b02a9
MB
118 logger.warning('Cannot extract cookies from firefox without sqlite3 support. '
119 'Please use a python interpreter compiled with sqlite3 support')
120 return YoutubeDLCookieJar()
982ee69a
MB
121
122 if profile is None:
123 search_root = _firefox_browser_dir()
124 elif _is_path(profile):
125 search_root = profile
126 else:
127 search_root = os.path.join(_firefox_browser_dir(), profile)
128
97ec5bc5 129 cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite', logger)
982ee69a 130 if cookie_database_path is None:
86e5f3ed 131 raise FileNotFoundError(f'could not find firefox cookies database in {search_root}')
132 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
982ee69a 133
0930b11f 134 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
982ee69a
MB
135 cursor = None
136 try:
137 cursor = _open_database_copy(cookie_database_path, tmpdir)
138 cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
139 jar = YoutubeDLCookieJar()
97ec5bc5 140 with _create_progress_bar(logger) as progress_bar:
141 table = cursor.fetchall()
142 total_cookie_count = len(table)
143 for i, (host, name, value, path, expiry, is_secure) in enumerate(table):
144 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
145 cookie = compat_cookiejar_Cookie(
146 version=0, name=name, value=value, port=None, port_specified=False,
147 domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'),
148 path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False,
149 comment=None, comment_url=None, rest={})
150 jar.set_cookie(cookie)
86e5f3ed 151 logger.info(f'Extracted {len(jar)} cookies from firefox')
982ee69a
MB
152 return jar
153 finally:
154 if cursor is not None:
155 cursor.connection.close()
156
157
158def _firefox_browser_dir():
159 if sys.platform in ('linux', 'linux2'):
160 return os.path.expanduser('~/.mozilla/firefox')
161 elif sys.platform == 'win32':
19a03940 162 return os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles')
982ee69a
MB
163 elif sys.platform == 'darwin':
164 return os.path.expanduser('~/Library/Application Support/Firefox')
165 else:
86e5f3ed 166 raise ValueError(f'unsupported platform: {sys.platform}')
982ee69a
MB
167
168
169def _get_chromium_based_browser_settings(browser_name):
170 # https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md
171 if sys.platform in ('linux', 'linux2'):
172 config = _config_home()
173 browser_dir = {
174 'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'),
175 'chrome': os.path.join(config, 'google-chrome'),
176 'chromium': os.path.join(config, 'chromium'),
177 'edge': os.path.join(config, 'microsoft-edge'),
178 'opera': os.path.join(config, 'opera'),
179 'vivaldi': os.path.join(config, 'vivaldi'),
180 }[browser_name]
181
182 elif sys.platform == 'win32':
183 appdata_local = os.path.expandvars('%LOCALAPPDATA%')
184 appdata_roaming = os.path.expandvars('%APPDATA%')
185 browser_dir = {
19a03940 186 'brave': os.path.join(appdata_local, R'BraveSoftware\Brave-Browser\User Data'),
187 'chrome': os.path.join(appdata_local, R'Google\Chrome\User Data'),
188 'chromium': os.path.join(appdata_local, R'Chromium\User Data'),
189 'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
190 'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
191 'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
982ee69a
MB
192 }[browser_name]
193
194 elif sys.platform == 'darwin':
195 appdata = os.path.expanduser('~/Library/Application Support')
196 browser_dir = {
197 'brave': os.path.join(appdata, 'BraveSoftware/Brave-Browser'),
198 'chrome': os.path.join(appdata, 'Google/Chrome'),
199 'chromium': os.path.join(appdata, 'Chromium'),
200 'edge': os.path.join(appdata, 'Microsoft Edge'),
201 'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
202 'vivaldi': os.path.join(appdata, 'Vivaldi'),
203 }[browser_name]
204
205 else:
86e5f3ed 206 raise ValueError(f'unsupported platform: {sys.platform}')
982ee69a
MB
207
208 # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
209 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
210 keyring_name = {
211 'brave': 'Brave',
212 'chrome': 'Chrome',
213 'chromium': 'Chromium',
29b208f6 214 'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
982ee69a
MB
215 'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
216 'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
217 }[browser_name]
218
219 browsers_without_profiles = {'opera'}
220
221 return {
222 'browser_dir': browser_dir,
223 'keyring_name': keyring_name,
224 'supports_profiles': browser_name not in browsers_without_profiles
225 }
226
227
f59f5ef8 228def _extract_chrome_cookies(browser_name, profile, keyring, logger):
86e5f3ed 229 logger.info(f'Extracting cookies from {browser_name}')
767b02a9 230
9b8ee23b 231 if not sqlite3:
19a03940 232 logger.warning(f'Cannot extract cookies from {browser_name} without sqlite3 support. '
233 'Please use a python interpreter compiled with sqlite3 support')
767b02a9
MB
234 return YoutubeDLCookieJar()
235
982ee69a
MB
236 config = _get_chromium_based_browser_settings(browser_name)
237
238 if profile is None:
239 search_root = config['browser_dir']
240 elif _is_path(profile):
241 search_root = profile
242 config['browser_dir'] = os.path.dirname(profile) if config['supports_profiles'] else profile
243 else:
244 if config['supports_profiles']:
245 search_root = os.path.join(config['browser_dir'], profile)
246 else:
86e5f3ed 247 logger.error(f'{browser_name} does not support profiles')
982ee69a
MB
248 search_root = config['browser_dir']
249
97ec5bc5 250 cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies', logger)
982ee69a 251 if cookie_database_path is None:
86e5f3ed 252 raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"')
253 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
982ee69a 254
f59f5ef8 255 decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring)
982ee69a 256
0930b11f 257 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
982ee69a
MB
258 cursor = None
259 try:
260 cursor = _open_database_copy(cookie_database_path, tmpdir)
261 cursor.connection.text_factory = bytes
262 column_names = _get_column_names(cursor, 'cookies')
263 secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
19a03940 264 cursor.execute(f'SELECT host_key, name, value, encrypted_value, path, expires_utc, {secure_column} FROM cookies')
982ee69a
MB
265 jar = YoutubeDLCookieJar()
266 failed_cookies = 0
f59f5ef8 267 unencrypted_cookies = 0
97ec5bc5 268 with _create_progress_bar(logger) as progress_bar:
269 table = cursor.fetchall()
270 total_cookie_count = len(table)
271 for i, line in enumerate(table):
272 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
273 is_encrypted, cookie = _process_chrome_cookie(decryptor, *line)
274 if not cookie:
982ee69a
MB
275 failed_cookies += 1
276 continue
97ec5bc5 277 elif not is_encrypted:
278 unencrypted_cookies += 1
279 jar.set_cookie(cookie)
982ee69a 280 if failed_cookies > 0:
86e5f3ed 281 failed_message = f' ({failed_cookies} could not be decrypted)'
982ee69a
MB
282 else:
283 failed_message = ''
86e5f3ed 284 logger.info(f'Extracted {len(jar)} cookies from {browser_name}{failed_message}')
24146491 285 counts = decryptor._cookie_counts.copy()
f59f5ef8 286 counts['unencrypted'] = unencrypted_cookies
86e5f3ed 287 logger.debug(f'cookie version breakdown: {counts}')
982ee69a
MB
288 return jar
289 finally:
290 if cursor is not None:
291 cursor.connection.close()
292
293
97ec5bc5 294def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, path, expires_utc, is_secure):
0f06bcd7 295 host_key = host_key.decode()
296 name = name.decode()
297 value = value.decode()
298 path = path.decode()
97ec5bc5 299 is_encrypted = not value and encrypted_value
300
301 if is_encrypted:
302 value = decryptor.decrypt(encrypted_value)
303 if value is None:
304 return is_encrypted, None
305
306 return is_encrypted, compat_cookiejar_Cookie(
307 version=0, name=name, value=value, port=None, port_specified=False,
308 domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
309 path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False,
310 comment=None, comment_url=None, rest={})
311
312
982ee69a
MB
313class ChromeCookieDecryptor:
314 """
315 Overview:
316
317 Linux:
318 - cookies are either v10 or v11
319 - v10: AES-CBC encrypted with a fixed key
320 - v11: AES-CBC encrypted with an OS protected key (keyring)
321 - v11 keys can be stored in various places depending on the activate desktop environment [2]
322
323 Mac:
324 - cookies are either v10 or not v10
325 - v10: AES-CBC encrypted with an OS protected key (keyring) and more key derivation iterations than linux
326 - not v10: 'old data' stored as plaintext
327
328 Windows:
329 - cookies are either v10 or not v10
330 - v10: AES-GCM encrypted with a key which is encrypted with DPAPI
331 - not v10: encrypted with DPAPI
332
333 Sources:
334 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/
335 - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_linux.cc
336 - KeyStorageLinux::CreateService
337 """
338
339 def decrypt(self, encrypted_value):
19a03940 340 raise NotImplementedError('Must be implemented by sub classes')
982ee69a 341
f59f5ef8 342 @property
24146491 343 def _cookie_counts(self):
19a03940 344 raise NotImplementedError('Must be implemented by sub classes')
f59f5ef8 345
982ee69a 346
f59f5ef8 347def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None):
982ee69a 348 if sys.platform in ('linux', 'linux2'):
f59f5ef8 349 return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring)
982ee69a
MB
350 elif sys.platform == 'darwin':
351 return MacChromeCookieDecryptor(browser_keyring_name, logger)
352 elif sys.platform == 'win32':
353 return WindowsChromeCookieDecryptor(browser_root, logger)
354 else:
19a03940 355 raise NotImplementedError(f'Chrome cookie decryption is not supported on this platform: {sys.platform}')
982ee69a
MB
356
357
358class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
f59f5ef8 359 def __init__(self, browser_keyring_name, logger, *, keyring=None):
982ee69a
MB
360 self._logger = logger
361 self._v10_key = self.derive_key(b'peanuts')
f59f5ef8
MB
362 password = _get_linux_keyring_password(browser_keyring_name, keyring, logger)
363 self._v11_key = None if password is None else self.derive_key(password)
364 self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
982ee69a
MB
365
366 @staticmethod
367 def derive_key(password):
368 # values from
369 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_linux.cc
370 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16)
371
372 def decrypt(self, encrypted_value):
373 version = encrypted_value[:3]
374 ciphertext = encrypted_value[3:]
375
376 if version == b'v10':
f59f5ef8 377 self._cookie_counts['v10'] += 1
982ee69a
MB
378 return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
379
380 elif version == b'v11':
f59f5ef8 381 self._cookie_counts['v11'] += 1
982ee69a 382 if self._v11_key is None:
f59f5ef8 383 self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
982ee69a
MB
384 return None
385 return _decrypt_aes_cbc(ciphertext, self._v11_key, self._logger)
386
387 else:
f59f5ef8 388 self._cookie_counts['other'] += 1
982ee69a
MB
389 return None
390
391
392class MacChromeCookieDecryptor(ChromeCookieDecryptor):
393 def __init__(self, browser_keyring_name, logger):
394 self._logger = logger
f440b14f 395 password = _get_mac_keyring_password(browser_keyring_name, logger)
982ee69a 396 self._v10_key = None if password is None else self.derive_key(password)
f59f5ef8 397 self._cookie_counts = {'v10': 0, 'other': 0}
982ee69a
MB
398
399 @staticmethod
400 def derive_key(password):
401 # values from
402 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
403 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16)
404
405 def decrypt(self, encrypted_value):
406 version = encrypted_value[:3]
407 ciphertext = encrypted_value[3:]
408
409 if version == b'v10':
f59f5ef8 410 self._cookie_counts['v10'] += 1
982ee69a
MB
411 if self._v10_key is None:
412 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
413 return None
414
415 return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
416
417 else:
f59f5ef8 418 self._cookie_counts['other'] += 1
982ee69a
MB
419 # other prefixes are considered 'old data' which were stored as plaintext
420 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
421 return encrypted_value
422
423
424class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
425 def __init__(self, browser_root, logger):
426 self._logger = logger
427 self._v10_key = _get_windows_v10_key(browser_root, logger)
f59f5ef8
MB
428 self._cookie_counts = {'v10': 0, 'other': 0}
429
982ee69a
MB
430 def decrypt(self, encrypted_value):
431 version = encrypted_value[:3]
432 ciphertext = encrypted_value[3:]
433
434 if version == b'v10':
f59f5ef8 435 self._cookie_counts['v10'] += 1
982ee69a
MB
436 if self._v10_key is None:
437 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
438 return None
982ee69a
MB
439
440 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
441 # kNonceLength
442 nonce_length = 96 // 8
443 # boringssl
444 # EVP_AEAD_AES_GCM_TAG_LEN
445 authentication_tag_length = 16
446
447 raw_ciphertext = ciphertext
448 nonce = raw_ciphertext[:nonce_length]
449 ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length]
450 authentication_tag = raw_ciphertext[-authentication_tag_length:]
451
452 return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger)
453
454 else:
f59f5ef8 455 self._cookie_counts['other'] += 1
982ee69a
MB
456 # any other prefix means the data is DPAPI encrypted
457 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
0f06bcd7 458 return _decrypt_windows_dpapi(encrypted_value, self._logger).decode()
982ee69a
MB
459
460
461def _extract_safari_cookies(profile, logger):
462 if profile is not None:
463 logger.error('safari does not support profiles')
464 if sys.platform != 'darwin':
86e5f3ed 465 raise ValueError(f'unsupported platform: {sys.platform}')
982ee69a
MB
466
467 cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')
468
469 if not os.path.isfile(cookies_path):
1f7db853
MP
470 logger.debug('Trying secondary cookie location')
471 cookies_path = os.path.expanduser('~/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies')
472 if not os.path.isfile(cookies_path):
473 raise FileNotFoundError('could not find safari cookies database')
982ee69a
MB
474
475 with open(cookies_path, 'rb') as f:
476 cookies_data = f.read()
477
478 jar = parse_safari_cookies(cookies_data, logger=logger)
86e5f3ed 479 logger.info(f'Extracted {len(jar)} cookies from safari')
982ee69a
MB
480 return jar
481
482
483class ParserError(Exception):
484 pass
485
486
487class DataParser:
488 def __init__(self, data, logger):
489 self._data = data
490 self.cursor = 0
491 self._logger = logger
492
493 def read_bytes(self, num_bytes):
494 if num_bytes < 0:
86e5f3ed 495 raise ParserError(f'invalid read of {num_bytes} bytes')
982ee69a
MB
496 end = self.cursor + num_bytes
497 if end > len(self._data):
498 raise ParserError('reached end of input')
499 data = self._data[self.cursor:end]
500 self.cursor = end
501 return data
502
503 def expect_bytes(self, expected_value, message):
504 value = self.read_bytes(len(expected_value))
505 if value != expected_value:
86e5f3ed 506 raise ParserError(f'unexpected value: {value} != {expected_value} ({message})')
982ee69a
MB
507
508 def read_uint(self, big_endian=False):
509 data_format = '>I' if big_endian else '<I'
510 return struct.unpack(data_format, self.read_bytes(4))[0]
511
512 def read_double(self, big_endian=False):
513 data_format = '>d' if big_endian else '<d'
514 return struct.unpack(data_format, self.read_bytes(8))[0]
515
516 def read_cstring(self):
517 buffer = []
518 while True:
519 c = self.read_bytes(1)
520 if c == b'\x00':
0f06bcd7 521 return b''.join(buffer).decode()
982ee69a
MB
522 else:
523 buffer.append(c)
524
525 def skip(self, num_bytes, description='unknown'):
526 if num_bytes > 0:
19a03940 527 self._logger.debug(f'skipping {num_bytes} bytes ({description}): {self.read_bytes(num_bytes)!r}')
982ee69a 528 elif num_bytes < 0:
86e5f3ed 529 raise ParserError(f'invalid skip of {num_bytes} bytes')
982ee69a
MB
530
531 def skip_to(self, offset, description='unknown'):
532 self.skip(offset - self.cursor, description)
533
534 def skip_to_end(self, description='unknown'):
535 self.skip_to(len(self._data), description)
536
537
538def _mac_absolute_time_to_posix(timestamp):
539 return int((datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc) + timedelta(seconds=timestamp)).timestamp())
540
541
542def _parse_safari_cookies_header(data, logger):
543 p = DataParser(data, logger)
544 p.expect_bytes(b'cook', 'database signature')
545 number_of_pages = p.read_uint(big_endian=True)
546 page_sizes = [p.read_uint(big_endian=True) for _ in range(number_of_pages)]
547 return page_sizes, p.cursor
548
549
550def _parse_safari_cookies_page(data, jar, logger):
551 p = DataParser(data, logger)
552 p.expect_bytes(b'\x00\x00\x01\x00', 'page signature')
553 number_of_cookies = p.read_uint()
554 record_offsets = [p.read_uint() for _ in range(number_of_cookies)]
555 if number_of_cookies == 0:
86e5f3ed 556 logger.debug(f'a cookies page of size {len(data)} has no cookies')
982ee69a
MB
557 return
558
559 p.skip_to(record_offsets[0], 'unknown page header field')
560
97ec5bc5 561 with _create_progress_bar(logger) as progress_bar:
562 for i, record_offset in enumerate(record_offsets):
563 progress_bar.print(f'Loading cookie {i: 6d}/{number_of_cookies: 6d}')
564 p.skip_to(record_offset, 'space between records')
565 record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger)
566 p.read_bytes(record_length)
982ee69a
MB
567 p.skip_to_end('space in between pages')
568
569
570def _parse_safari_cookies_record(data, jar, logger):
571 p = DataParser(data, logger)
572 record_size = p.read_uint()
573 p.skip(4, 'unknown record field 1')
574 flags = p.read_uint()
575 is_secure = bool(flags & 0x0001)
576 p.skip(4, 'unknown record field 2')
577 domain_offset = p.read_uint()
578 name_offset = p.read_uint()
579 path_offset = p.read_uint()
580 value_offset = p.read_uint()
581 p.skip(8, 'unknown record field 3')
582 expiration_date = _mac_absolute_time_to_posix(p.read_double())
583 _creation_date = _mac_absolute_time_to_posix(p.read_double()) # noqa: F841
584
585 try:
586 p.skip_to(domain_offset)
587 domain = p.read_cstring()
588
589 p.skip_to(name_offset)
590 name = p.read_cstring()
591
592 p.skip_to(path_offset)
593 path = p.read_cstring()
594
595 p.skip_to(value_offset)
596 value = p.read_cstring()
597 except UnicodeDecodeError:
f440b14f 598 logger.warning('failed to parse Safari cookie because UTF-8 decoding failed', only_once=True)
982ee69a
MB
599 return record_size
600
601 p.skip_to(record_size, 'space at the end of the record')
602
603 cookie = compat_cookiejar_Cookie(
604 version=0, name=name, value=value, port=None, port_specified=False,
605 domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'),
606 path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False,
607 comment=None, comment_url=None, rest={})
608 jar.set_cookie(cookie)
609 return record_size
610
611
612def parse_safari_cookies(data, jar=None, logger=YDLLogger()):
613 """
614 References:
615 - https://github.com/libyal/dtformats/blob/main/documentation/Safari%20Cookies.asciidoc
616 - this data appears to be out of date but the important parts of the database structure is the same
617 - there are a few bytes here and there which are skipped during parsing
618 """
619 if jar is None:
620 jar = YoutubeDLCookieJar()
621 page_sizes, body_start = _parse_safari_cookies_header(data, logger)
622 p = DataParser(data[body_start:], logger)
623 for page_size in page_sizes:
624 _parse_safari_cookies_page(p.read_bytes(page_size), jar, logger)
625 p.skip_to_end('footer')
626 return jar
627
628
f59f5ef8
MB
629class _LinuxDesktopEnvironment(Enum):
630 """
631 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.h
632 DesktopEnvironment
633 """
634 OTHER = auto()
635 CINNAMON = auto()
636 GNOME = auto()
637 KDE = auto()
638 PANTHEON = auto()
639 UNITY = auto()
640 XFCE = auto()
982ee69a
MB
641
642
f59f5ef8
MB
643class _LinuxKeyring(Enum):
644 """
645 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.h
646 SelectedLinuxBackend
647 """
648 KWALLET = auto()
649 GNOMEKEYRING = auto()
650 BASICTEXT = auto()
651
652
653SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys()
654
655
656def _get_linux_desktop_environment(env):
657 """
658 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc
659 GetDesktopEnvironment
660 """
661 xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
662 desktop_session = env.get('DESKTOP_SESSION', None)
663 if xdg_current_desktop is not None:
664 xdg_current_desktop = xdg_current_desktop.split(':')[0].strip()
665
666 if xdg_current_desktop == 'Unity':
667 if desktop_session is not None and 'gnome-fallback' in desktop_session:
668 return _LinuxDesktopEnvironment.GNOME
669 else:
670 return _LinuxDesktopEnvironment.UNITY
671 elif xdg_current_desktop == 'GNOME':
672 return _LinuxDesktopEnvironment.GNOME
673 elif xdg_current_desktop == 'X-Cinnamon':
674 return _LinuxDesktopEnvironment.CINNAMON
675 elif xdg_current_desktop == 'KDE':
676 return _LinuxDesktopEnvironment.KDE
677 elif xdg_current_desktop == 'Pantheon':
678 return _LinuxDesktopEnvironment.PANTHEON
679 elif xdg_current_desktop == 'XFCE':
680 return _LinuxDesktopEnvironment.XFCE
681 elif desktop_session is not None:
682 if desktop_session in ('mate', 'gnome'):
683 return _LinuxDesktopEnvironment.GNOME
684 elif 'kde' in desktop_session:
685 return _LinuxDesktopEnvironment.KDE
686 elif 'xfce' in desktop_session:
687 return _LinuxDesktopEnvironment.XFCE
688 else:
689 if 'GNOME_DESKTOP_SESSION_ID' in env:
690 return _LinuxDesktopEnvironment.GNOME
691 elif 'KDE_FULL_SESSION' in env:
692 return _LinuxDesktopEnvironment.KDE
fa8fd951 693 return _LinuxDesktopEnvironment.OTHER
f59f5ef8
MB
694
695
696def _choose_linux_keyring(logger):
697 """
698 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.cc
699 SelectBackend
700 """
701 desktop_environment = _get_linux_desktop_environment(os.environ)
86e5f3ed 702 logger.debug(f'detected desktop environment: {desktop_environment.name}')
f59f5ef8
MB
703 if desktop_environment == _LinuxDesktopEnvironment.KDE:
704 linux_keyring = _LinuxKeyring.KWALLET
705 elif desktop_environment == _LinuxDesktopEnvironment.OTHER:
706 linux_keyring = _LinuxKeyring.BASICTEXT
982ee69a 707 else:
f59f5ef8
MB
708 linux_keyring = _LinuxKeyring.GNOMEKEYRING
709 return linux_keyring
710
711
712def _get_kwallet_network_wallet(logger):
713 """ The name of the wallet used to store network passwords.
714
715 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/kwallet_dbus.cc
716 KWalletDBus::NetworkWallet
717 which does a dbus call to the following function:
718 https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html
719 Wallet::NetworkWallet
720 """
721 default_wallet = 'kdewallet'
722 try:
723 proc = Popen([
724 'dbus-send', '--session', '--print-reply=literal',
725 '--dest=org.kde.kwalletd5',
726 '/modules/kwalletd5',
727 'org.kde.KWallet.networkWallet'
728 ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
729
730 stdout, stderr = proc.communicate_or_kill()
731 if proc.returncode != 0:
732 logger.warning('failed to read NetworkWallet')
733 return default_wallet
734 else:
0f06bcd7 735 network_wallet = stdout.decode().strip()
86e5f3ed 736 logger.debug(f'NetworkWallet = "{network_wallet}"')
f59f5ef8 737 return network_wallet
a44ca5a4 738 except Exception as e:
86e5f3ed 739 logger.warning(f'exception while obtaining NetworkWallet: {e}')
f59f5ef8
MB
740 return default_wallet
741
742
743def _get_kwallet_password(browser_keyring_name, logger):
744 logger.debug('using kwallet-query to obtain password from kwallet')
745
746 if shutil.which('kwallet-query') is None:
747 logger.error('kwallet-query command not found. KWallet and kwallet-query '
748 'must be installed to read from KWallet. kwallet-query should be'
749 'included in the kwallet package for your distribution')
750 return b''
751
752 network_wallet = _get_kwallet_network_wallet(logger)
753
754 try:
755 proc = Popen([
756 'kwallet-query',
86e5f3ed 757 '--read-password', f'{browser_keyring_name} Safe Storage',
758 '--folder', f'{browser_keyring_name} Keys',
f59f5ef8
MB
759 network_wallet
760 ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
761
762 stdout, stderr = proc.communicate_or_kill()
763 if proc.returncode != 0:
19a03940 764 logger.error(f'kwallet-query failed with return code {proc.returncode}. Please consult '
765 'the kwallet-query man page for details')
f59f5ef8
MB
766 return b''
767 else:
768 if stdout.lower().startswith(b'failed to read'):
769 logger.debug('failed to read password from kwallet. Using empty string instead')
770 # this sometimes occurs in KDE because chrome does not check hasEntry and instead
771 # just tries to read the value (which kwallet returns "") whereas kwallet-query
772 # checks hasEntry. To verify this:
773 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
774 # while starting chrome.
775 # this may be a bug as the intended behaviour is to generate a random password and store
776 # it, but that doesn't matter here.
777 return b''
778 else:
779 logger.debug('password found')
780 if stdout[-1:] == b'\n':
781 stdout = stdout[:-1]
782 return stdout
a44ca5a4 783 except Exception as e:
784 logger.warning(f'exception running kwallet-query: {error_to_str(e)}')
f59f5ef8
MB
785 return b''
786
787
788def _get_gnome_keyring_password(browser_keyring_name, logger):
9b8ee23b 789 if not secretstorage:
790 logger.error(f'secretstorage not available {_SECRETSTORAGE_UNAVAILABLE_REASON}')
f59f5ef8
MB
791 return b''
792 # the Gnome keyring does not seem to organise keys in the same way as KWallet,
793 # using `dbus-monitor` during startup, it can be observed that chromium lists all keys
794 # and presumably searches for its key in the list. It appears that we must do the same.
795 # https://github.com/jaraco/keyring/issues/556
796 with contextlib.closing(secretstorage.dbus_init()) as con:
797 col = secretstorage.get_default_collection(con)
798 for item in col.get_all_items():
86e5f3ed 799 if item.get_label() == f'{browser_keyring_name} Safe Storage':
f59f5ef8
MB
800 return item.get_secret()
801 else:
802 logger.error('failed to read from keyring')
803 return b''
804
805
806def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
807 # note: chrome/chromium can be run with the following flags to determine which keyring backend
808 # it has chosen to use
809 # chromium --enable-logging=stderr --v=1 2>&1 | grep key_storage_
810 # Chromium supports a flag: --password-store=<basic|gnome|kwallet> so the automatic detection
811 # will not be sufficient in all cases.
812
2c539d49 813 keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger)
f59f5ef8
MB
814 logger.debug(f'Chosen keyring: {keyring.name}')
815
816 if keyring == _LinuxKeyring.KWALLET:
817 return _get_kwallet_password(browser_keyring_name, logger)
818 elif keyring == _LinuxKeyring.GNOMEKEYRING:
819 return _get_gnome_keyring_password(browser_keyring_name, logger)
820 elif keyring == _LinuxKeyring.BASICTEXT:
821 # when basic text is chosen, all cookies are stored as v10 (so no keyring password is required)
822 return None
823 assert False, f'Unknown keyring {keyring}'
824
825
826def _get_mac_keyring_password(browser_keyring_name, logger):
827 logger.debug('using find-generic-password to obtain password from OSX keychain')
828 try:
d3c93ec2 829 proc = Popen(
830 ['security', 'find-generic-password',
831 '-w', # write password to stdout
832 '-a', browser_keyring_name, # match 'account'
86e5f3ed 833 '-s', f'{browser_keyring_name} Safe Storage'], # match 'service'
d3c93ec2 834 stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
f59f5ef8
MB
835
836 stdout, stderr = proc.communicate_or_kill()
837 if stdout[-1:] == b'\n':
838 stdout = stdout[:-1]
839 return stdout
a44ca5a4 840 except Exception as e:
841 logger.warning(f'exception running find-generic-password: {error_to_str(e)}')
f59f5ef8 842 return None
982ee69a
MB
843
844
845def _get_windows_v10_key(browser_root, logger):
97ec5bc5 846 path = _find_most_recently_used_file(browser_root, 'Local State', logger)
982ee69a
MB
847 if path is None:
848 logger.error('could not find local state file')
849 return None
97ec5bc5 850 logger.debug(f'Found local state file at "{path}"')
86e5f3ed 851 with open(path, encoding='utf8') as f:
982ee69a
MB
852 data = json.load(f)
853 try:
854 base64_key = data['os_crypt']['encrypted_key']
855 except KeyError:
856 logger.error('no encrypted key in Local State')
857 return None
858 encrypted_key = compat_b64decode(base64_key)
859 prefix = b'DPAPI'
860 if not encrypted_key.startswith(prefix):
861 logger.error('invalid key')
862 return None
863 return _decrypt_windows_dpapi(encrypted_key[len(prefix):], logger)
864
865
866def pbkdf2_sha1(password, salt, iterations, key_length):
867 return pbkdf2_hmac('sha1', password, salt, iterations, key_length)
868
869
870def _decrypt_aes_cbc(ciphertext, key, logger, initialization_vector=b' ' * 16):
1d3586d0 871 plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
982ee69a 872 try:
0f06bcd7 873 return plaintext.decode()
982ee69a 874 except UnicodeDecodeError:
f440b14f 875 logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
876 return None
877
878
879def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
982ee69a 880 try:
09906f55 881 plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce)
982ee69a 882 except ValueError:
f440b14f 883 logger.warning('failed to decrypt cookie (AES-GCM) because the MAC check failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
884 return None
885
886 try:
0f06bcd7 887 return plaintext.decode()
982ee69a 888 except UnicodeDecodeError:
f440b14f 889 logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
890 return None
891
892
893def _decrypt_windows_dpapi(ciphertext, logger):
894 """
895 References:
896 - https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata
897 """
898 from ctypes.wintypes import DWORD
899
900 class DATA_BLOB(ctypes.Structure):
901 _fields_ = [('cbData', DWORD),
902 ('pbData', ctypes.POINTER(ctypes.c_char))]
903
904 buffer = ctypes.create_string_buffer(ciphertext)
905 blob_in = DATA_BLOB(ctypes.sizeof(buffer), buffer)
906 blob_out = DATA_BLOB()
907 ret = ctypes.windll.crypt32.CryptUnprotectData(
908 ctypes.byref(blob_in), # pDataIn
909 None, # ppszDataDescr: human readable description of pDataIn
910 None, # pOptionalEntropy: salt?
911 None, # pvReserved: must be NULL
912 None, # pPromptStruct: information about prompts to display
913 0, # dwFlags
914 ctypes.byref(blob_out) # pDataOut
915 )
916 if not ret:
f9be9cb9 917 logger.warning('failed to decrypt with DPAPI', only_once=True)
982ee69a
MB
918 return None
919
920 result = ctypes.string_at(blob_out.pbData, blob_out.cbData)
921 ctypes.windll.kernel32.LocalFree(blob_out.pbData)
922 return result
923
924
925def _config_home():
926 return os.environ.get('XDG_CONFIG_HOME', os.path.expanduser('~/.config'))
927
928
929def _open_database_copy(database_path, tmpdir):
930 # cannot open sqlite databases if they are already in use (e.g. by the browser)
931 database_copy_path = os.path.join(tmpdir, 'temporary.sqlite')
932 shutil.copy(database_path, database_copy_path)
933 conn = sqlite3.connect(database_copy_path)
934 return conn.cursor()
935
936
937def _get_column_names(cursor, table_name):
86e5f3ed 938 table_info = cursor.execute(f'PRAGMA table_info({table_name})').fetchall()
0f06bcd7 939 return [row[1].decode() for row in table_info]
982ee69a
MB
940
941
97ec5bc5 942def _find_most_recently_used_file(root, filename, logger):
982ee69a 943 # if there are multiple browser profiles, take the most recently used one
97ec5bc5 944 i, paths = 0, []
945 with _create_progress_bar(logger) as progress_bar:
946 for curr_root, dirs, files in os.walk(root):
947 for file in files:
948 i += 1
949 progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched')
950 if file == filename:
951 paths.append(os.path.join(curr_root, file))
982ee69a
MB
952 return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime)
953
954
955def _merge_cookie_jars(jars):
956 output_jar = YoutubeDLCookieJar()
957 for jar in jars:
958 for cookie in jar:
959 output_jar.set_cookie(cookie)
960 if jar.filename is not None:
961 output_jar.filename = jar.filename
962 return output_jar
963
964
965def _is_path(value):
966 return os.path.sep in value
967
968
f59f5ef8 969def _parse_browser_specification(browser_name, profile=None, keyring=None):
982ee69a
MB
970 if browser_name not in SUPPORTED_BROWSERS:
971 raise ValueError(f'unsupported browser: "{browser_name}"')
f59f5ef8
MB
972 if keyring not in (None, *SUPPORTED_KEYRINGS):
973 raise ValueError(f'unsupported keyring: "{keyring}"')
982ee69a
MB
974 if profile is not None and _is_path(profile):
975 profile = os.path.expanduser(profile)
f59f5ef8 976 return browser_name, profile, keyring