]> jfr.im git - yt-dlp.git/blame - yt_dlp/cookies.py
Don't imply `-s` for later stages of `-O`
[yt-dlp.git] / yt_dlp / cookies.py
CommitLineData
f59f5ef8 1import contextlib
982ee69a
MB
2import ctypes
3import json
4import os
5import shutil
982ee69a
MB
6import struct
7import subprocess
8import sys
9import tempfile
10from datetime import datetime, timedelta, timezone
f59f5ef8 11from enum import Enum, auto
982ee69a
MB
12from hashlib import pbkdf2_hmac
13
1d3586d0 14from .aes import (
15 aes_cbc_decrypt_bytes,
16 aes_gcm_decrypt_and_verify_bytes,
17 unpad_pkcs7,
18)
f8271158 19from .compat import compat_b64decode, compat_cookiejar_Cookie
97ec5bc5 20from .minicurses import MultilinePrinter, QuietMultilinePrinter
f8271158 21from .utils import Popen, YoutubeDLCookieJar, error_to_str, expand_path
982ee69a 22
767b02a9
MB
23try:
24 import sqlite3
25 SQLITE_AVAILABLE = True
26except ImportError:
27 # although sqlite3 is part of the standard library, it is possible to compile python without
28 # sqlite support. See: https://github.com/yt-dlp/yt-dlp/issues/544
29 SQLITE_AVAILABLE = False
30
31
982ee69a 32try:
f59f5ef8
MB
33 import secretstorage
34 SECRETSTORAGE_AVAILABLE = True
982ee69a 35except ImportError:
f59f5ef8
MB
36 SECRETSTORAGE_AVAILABLE = False
37 SECRETSTORAGE_UNAVAILABLE_REASON = (
38 'as the `secretstorage` module is not installed. '
39 'Please install by running `python3 -m pip install secretstorage`.')
063c409d 40except Exception as _err:
f59f5ef8
MB
41 SECRETSTORAGE_AVAILABLE = False
42 SECRETSTORAGE_UNAVAILABLE_REASON = f'as the `secretstorage` module could not be initialized. {_err}'
982ee69a
MB
43
44
45CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
46SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
47
48
49class YDLLogger:
50 def __init__(self, ydl=None):
51 self._ydl = ydl
52
53 def debug(self, message):
54 if self._ydl:
55 self._ydl.write_debug(message)
56
57 def info(self, message):
58 if self._ydl:
59 self._ydl.to_screen(f'[Cookies] {message}')
60
61 def warning(self, message, only_once=False):
62 if self._ydl:
63 self._ydl.report_warning(message, only_once)
64
65 def error(self, message):
66 if self._ydl:
67 self._ydl.report_error(message)
68
97ec5bc5 69 def progress_bar(self):
70 """Return a context manager with a print method. (Optional)"""
71 # Do not print to files/pipes, loggers, or when --no-progress is used
72 if not self._ydl or self._ydl.params.get('noprogress') or self._ydl.params.get('logger'):
73 return
74 file = self._ydl._out_files['error']
75 try:
76 if not file.isatty():
77 return
78 except BaseException:
79 return
80
81 printer = MultilinePrinter(file, preserve_output=False)
82 printer.print = lambda message: printer.print_at_line(f'[Cookies] {message}', 0)
83 return printer
84
85
86def _create_progress_bar(logger):
87 if hasattr(logger, 'progress_bar'):
88 printer = logger.progress_bar()
89 if printer:
90 return printer
91 printer = QuietMultilinePrinter()
92 printer.print = lambda _: None
93 return printer
94
982ee69a
MB
95
96def load_cookies(cookie_file, browser_specification, ydl):
97 cookie_jars = []
98 if browser_specification is not None:
f59f5ef8
MB
99 browser_name, profile, keyring = _parse_browser_specification(*browser_specification)
100 cookie_jars.append(extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring))
982ee69a
MB
101
102 if cookie_file is not None:
103 cookie_file = expand_path(cookie_file)
104 jar = YoutubeDLCookieJar(cookie_file)
105 if os.access(cookie_file, os.R_OK):
106 jar.load(ignore_discard=True, ignore_expires=True)
107 cookie_jars.append(jar)
108
109 return _merge_cookie_jars(cookie_jars)
110
111
f59f5ef8 112def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None):
982ee69a
MB
113 if browser_name == 'firefox':
114 return _extract_firefox_cookies(profile, logger)
115 elif browser_name == 'safari':
116 return _extract_safari_cookies(profile, logger)
117 elif browser_name in CHROMIUM_BASED_BROWSERS:
f59f5ef8 118 return _extract_chrome_cookies(browser_name, profile, keyring, logger)
982ee69a 119 else:
86e5f3ed 120 raise ValueError(f'unknown browser: {browser_name}')
982ee69a
MB
121
122
123def _extract_firefox_cookies(profile, logger):
124 logger.info('Extracting cookies from firefox')
767b02a9
MB
125 if not SQLITE_AVAILABLE:
126 logger.warning('Cannot extract cookies from firefox without sqlite3 support. '
127 'Please use a python interpreter compiled with sqlite3 support')
128 return YoutubeDLCookieJar()
982ee69a
MB
129
130 if profile is None:
131 search_root = _firefox_browser_dir()
132 elif _is_path(profile):
133 search_root = profile
134 else:
135 search_root = os.path.join(_firefox_browser_dir(), profile)
136
97ec5bc5 137 cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite', logger)
982ee69a 138 if cookie_database_path is None:
86e5f3ed 139 raise FileNotFoundError(f'could not find firefox cookies database in {search_root}')
140 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
982ee69a 141
0930b11f 142 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
982ee69a
MB
143 cursor = None
144 try:
145 cursor = _open_database_copy(cookie_database_path, tmpdir)
146 cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
147 jar = YoutubeDLCookieJar()
97ec5bc5 148 with _create_progress_bar(logger) as progress_bar:
149 table = cursor.fetchall()
150 total_cookie_count = len(table)
151 for i, (host, name, value, path, expiry, is_secure) in enumerate(table):
152 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
153 cookie = compat_cookiejar_Cookie(
154 version=0, name=name, value=value, port=None, port_specified=False,
155 domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'),
156 path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False,
157 comment=None, comment_url=None, rest={})
158 jar.set_cookie(cookie)
86e5f3ed 159 logger.info(f'Extracted {len(jar)} cookies from firefox')
982ee69a
MB
160 return jar
161 finally:
162 if cursor is not None:
163 cursor.connection.close()
164
165
166def _firefox_browser_dir():
167 if sys.platform in ('linux', 'linux2'):
168 return os.path.expanduser('~/.mozilla/firefox')
169 elif sys.platform == 'win32':
19a03940 170 return os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles')
982ee69a
MB
171 elif sys.platform == 'darwin':
172 return os.path.expanduser('~/Library/Application Support/Firefox')
173 else:
86e5f3ed 174 raise ValueError(f'unsupported platform: {sys.platform}')
982ee69a
MB
175
176
177def _get_chromium_based_browser_settings(browser_name):
178 # https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md
179 if sys.platform in ('linux', 'linux2'):
180 config = _config_home()
181 browser_dir = {
182 'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'),
183 'chrome': os.path.join(config, 'google-chrome'),
184 'chromium': os.path.join(config, 'chromium'),
185 'edge': os.path.join(config, 'microsoft-edge'),
186 'opera': os.path.join(config, 'opera'),
187 'vivaldi': os.path.join(config, 'vivaldi'),
188 }[browser_name]
189
190 elif sys.platform == 'win32':
191 appdata_local = os.path.expandvars('%LOCALAPPDATA%')
192 appdata_roaming = os.path.expandvars('%APPDATA%')
193 browser_dir = {
19a03940 194 'brave': os.path.join(appdata_local, R'BraveSoftware\Brave-Browser\User Data'),
195 'chrome': os.path.join(appdata_local, R'Google\Chrome\User Data'),
196 'chromium': os.path.join(appdata_local, R'Chromium\User Data'),
197 'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
198 'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
199 'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
982ee69a
MB
200 }[browser_name]
201
202 elif sys.platform == 'darwin':
203 appdata = os.path.expanduser('~/Library/Application Support')
204 browser_dir = {
205 'brave': os.path.join(appdata, 'BraveSoftware/Brave-Browser'),
206 'chrome': os.path.join(appdata, 'Google/Chrome'),
207 'chromium': os.path.join(appdata, 'Chromium'),
208 'edge': os.path.join(appdata, 'Microsoft Edge'),
209 'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
210 'vivaldi': os.path.join(appdata, 'Vivaldi'),
211 }[browser_name]
212
213 else:
86e5f3ed 214 raise ValueError(f'unsupported platform: {sys.platform}')
982ee69a
MB
215
216 # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
217 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
218 keyring_name = {
219 'brave': 'Brave',
220 'chrome': 'Chrome',
221 'chromium': 'Chromium',
29b208f6 222 'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
982ee69a
MB
223 'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
224 'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
225 }[browser_name]
226
227 browsers_without_profiles = {'opera'}
228
229 return {
230 'browser_dir': browser_dir,
231 'keyring_name': keyring_name,
232 'supports_profiles': browser_name not in browsers_without_profiles
233 }
234
235
f59f5ef8 236def _extract_chrome_cookies(browser_name, profile, keyring, logger):
86e5f3ed 237 logger.info(f'Extracting cookies from {browser_name}')
767b02a9
MB
238
239 if not SQLITE_AVAILABLE:
19a03940 240 logger.warning(f'Cannot extract cookies from {browser_name} without sqlite3 support. '
241 'Please use a python interpreter compiled with sqlite3 support')
767b02a9
MB
242 return YoutubeDLCookieJar()
243
982ee69a
MB
244 config = _get_chromium_based_browser_settings(browser_name)
245
246 if profile is None:
247 search_root = config['browser_dir']
248 elif _is_path(profile):
249 search_root = profile
250 config['browser_dir'] = os.path.dirname(profile) if config['supports_profiles'] else profile
251 else:
252 if config['supports_profiles']:
253 search_root = os.path.join(config['browser_dir'], profile)
254 else:
86e5f3ed 255 logger.error(f'{browser_name} does not support profiles')
982ee69a
MB
256 search_root = config['browser_dir']
257
97ec5bc5 258 cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies', logger)
982ee69a 259 if cookie_database_path is None:
86e5f3ed 260 raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"')
261 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
982ee69a 262
f59f5ef8 263 decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring)
982ee69a 264
0930b11f 265 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
982ee69a
MB
266 cursor = None
267 try:
268 cursor = _open_database_copy(cookie_database_path, tmpdir)
269 cursor.connection.text_factory = bytes
270 column_names = _get_column_names(cursor, 'cookies')
271 secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
19a03940 272 cursor.execute(f'SELECT host_key, name, value, encrypted_value, path, expires_utc, {secure_column} FROM cookies')
982ee69a
MB
273 jar = YoutubeDLCookieJar()
274 failed_cookies = 0
f59f5ef8 275 unencrypted_cookies = 0
97ec5bc5 276 with _create_progress_bar(logger) as progress_bar:
277 table = cursor.fetchall()
278 total_cookie_count = len(table)
279 for i, line in enumerate(table):
280 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
281 is_encrypted, cookie = _process_chrome_cookie(decryptor, *line)
282 if not cookie:
982ee69a
MB
283 failed_cookies += 1
284 continue
97ec5bc5 285 elif not is_encrypted:
286 unencrypted_cookies += 1
287 jar.set_cookie(cookie)
982ee69a 288 if failed_cookies > 0:
86e5f3ed 289 failed_message = f' ({failed_cookies} could not be decrypted)'
982ee69a
MB
290 else:
291 failed_message = ''
86e5f3ed 292 logger.info(f'Extracted {len(jar)} cookies from {browser_name}{failed_message}')
f59f5ef8
MB
293 counts = decryptor.cookie_counts.copy()
294 counts['unencrypted'] = unencrypted_cookies
86e5f3ed 295 logger.debug(f'cookie version breakdown: {counts}')
982ee69a
MB
296 return jar
297 finally:
298 if cursor is not None:
299 cursor.connection.close()
300
301
97ec5bc5 302def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, path, expires_utc, is_secure):
303 host_key = host_key.decode('utf-8')
304 name = name.decode('utf-8')
305 value = value.decode('utf-8')
306 path = path.decode('utf-8')
307 is_encrypted = not value and encrypted_value
308
309 if is_encrypted:
310 value = decryptor.decrypt(encrypted_value)
311 if value is None:
312 return is_encrypted, None
313
314 return is_encrypted, compat_cookiejar_Cookie(
315 version=0, name=name, value=value, port=None, port_specified=False,
316 domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
317 path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False,
318 comment=None, comment_url=None, rest={})
319
320
982ee69a
MB
321class ChromeCookieDecryptor:
322 """
323 Overview:
324
325 Linux:
326 - cookies are either v10 or v11
327 - v10: AES-CBC encrypted with a fixed key
328 - v11: AES-CBC encrypted with an OS protected key (keyring)
329 - v11 keys can be stored in various places depending on the activate desktop environment [2]
330
331 Mac:
332 - cookies are either v10 or not v10
333 - v10: AES-CBC encrypted with an OS protected key (keyring) and more key derivation iterations than linux
334 - not v10: 'old data' stored as plaintext
335
336 Windows:
337 - cookies are either v10 or not v10
338 - v10: AES-GCM encrypted with a key which is encrypted with DPAPI
339 - not v10: encrypted with DPAPI
340
341 Sources:
342 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/
343 - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_linux.cc
344 - KeyStorageLinux::CreateService
345 """
346
347 def decrypt(self, encrypted_value):
19a03940 348 raise NotImplementedError('Must be implemented by sub classes')
982ee69a 349
f59f5ef8
MB
350 @property
351 def cookie_counts(self):
19a03940 352 raise NotImplementedError('Must be implemented by sub classes')
f59f5ef8 353
982ee69a 354
f59f5ef8 355def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None):
982ee69a 356 if sys.platform in ('linux', 'linux2'):
f59f5ef8 357 return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring)
982ee69a
MB
358 elif sys.platform == 'darwin':
359 return MacChromeCookieDecryptor(browser_keyring_name, logger)
360 elif sys.platform == 'win32':
361 return WindowsChromeCookieDecryptor(browser_root, logger)
362 else:
19a03940 363 raise NotImplementedError(f'Chrome cookie decryption is not supported on this platform: {sys.platform}')
982ee69a
MB
364
365
366class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
f59f5ef8 367 def __init__(self, browser_keyring_name, logger, *, keyring=None):
982ee69a
MB
368 self._logger = logger
369 self._v10_key = self.derive_key(b'peanuts')
f59f5ef8
MB
370 password = _get_linux_keyring_password(browser_keyring_name, keyring, logger)
371 self._v11_key = None if password is None else self.derive_key(password)
372 self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
982ee69a
MB
373
374 @staticmethod
375 def derive_key(password):
376 # values from
377 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_linux.cc
378 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16)
379
f59f5ef8
MB
380 @property
381 def cookie_counts(self):
382 return self._cookie_counts
383
982ee69a
MB
384 def decrypt(self, encrypted_value):
385 version = encrypted_value[:3]
386 ciphertext = encrypted_value[3:]
387
388 if version == b'v10':
f59f5ef8 389 self._cookie_counts['v10'] += 1
982ee69a
MB
390 return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
391
392 elif version == b'v11':
f59f5ef8 393 self._cookie_counts['v11'] += 1
982ee69a 394 if self._v11_key is None:
f59f5ef8 395 self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
982ee69a
MB
396 return None
397 return _decrypt_aes_cbc(ciphertext, self._v11_key, self._logger)
398
399 else:
f59f5ef8 400 self._cookie_counts['other'] += 1
982ee69a
MB
401 return None
402
403
404class MacChromeCookieDecryptor(ChromeCookieDecryptor):
405 def __init__(self, browser_keyring_name, logger):
406 self._logger = logger
f440b14f 407 password = _get_mac_keyring_password(browser_keyring_name, logger)
982ee69a 408 self._v10_key = None if password is None else self.derive_key(password)
f59f5ef8 409 self._cookie_counts = {'v10': 0, 'other': 0}
982ee69a
MB
410
411 @staticmethod
412 def derive_key(password):
413 # values from
414 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
415 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16)
416
f59f5ef8
MB
417 @property
418 def cookie_counts(self):
419 return self._cookie_counts
420
982ee69a
MB
421 def decrypt(self, encrypted_value):
422 version = encrypted_value[:3]
423 ciphertext = encrypted_value[3:]
424
425 if version == b'v10':
f59f5ef8 426 self._cookie_counts['v10'] += 1
982ee69a
MB
427 if self._v10_key is None:
428 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
429 return None
430
431 return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
432
433 else:
f59f5ef8 434 self._cookie_counts['other'] += 1
982ee69a
MB
435 # other prefixes are considered 'old data' which were stored as plaintext
436 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
437 return encrypted_value
438
439
440class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
441 def __init__(self, browser_root, logger):
442 self._logger = logger
443 self._v10_key = _get_windows_v10_key(browser_root, logger)
f59f5ef8
MB
444 self._cookie_counts = {'v10': 0, 'other': 0}
445
446 @property
447 def cookie_counts(self):
448 return self._cookie_counts
982ee69a
MB
449
450 def decrypt(self, encrypted_value):
451 version = encrypted_value[:3]
452 ciphertext = encrypted_value[3:]
453
454 if version == b'v10':
f59f5ef8 455 self._cookie_counts['v10'] += 1
982ee69a
MB
456 if self._v10_key is None:
457 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
458 return None
982ee69a
MB
459
460 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
461 # kNonceLength
462 nonce_length = 96 // 8
463 # boringssl
464 # EVP_AEAD_AES_GCM_TAG_LEN
465 authentication_tag_length = 16
466
467 raw_ciphertext = ciphertext
468 nonce = raw_ciphertext[:nonce_length]
469 ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length]
470 authentication_tag = raw_ciphertext[-authentication_tag_length:]
471
472 return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger)
473
474 else:
f59f5ef8 475 self._cookie_counts['other'] += 1
982ee69a
MB
476 # any other prefix means the data is DPAPI encrypted
477 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
478 return _decrypt_windows_dpapi(encrypted_value, self._logger).decode('utf-8')
479
480
481def _extract_safari_cookies(profile, logger):
482 if profile is not None:
483 logger.error('safari does not support profiles')
484 if sys.platform != 'darwin':
86e5f3ed 485 raise ValueError(f'unsupported platform: {sys.platform}')
982ee69a
MB
486
487 cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')
488
489 if not os.path.isfile(cookies_path):
1f7db853
MP
490 logger.debug('Trying secondary cookie location')
491 cookies_path = os.path.expanduser('~/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies')
492 if not os.path.isfile(cookies_path):
493 raise FileNotFoundError('could not find safari cookies database')
982ee69a
MB
494
495 with open(cookies_path, 'rb') as f:
496 cookies_data = f.read()
497
498 jar = parse_safari_cookies(cookies_data, logger=logger)
86e5f3ed 499 logger.info(f'Extracted {len(jar)} cookies from safari')
982ee69a
MB
500 return jar
501
502
503class ParserError(Exception):
504 pass
505
506
507class DataParser:
508 def __init__(self, data, logger):
509 self._data = data
510 self.cursor = 0
511 self._logger = logger
512
513 def read_bytes(self, num_bytes):
514 if num_bytes < 0:
86e5f3ed 515 raise ParserError(f'invalid read of {num_bytes} bytes')
982ee69a
MB
516 end = self.cursor + num_bytes
517 if end > len(self._data):
518 raise ParserError('reached end of input')
519 data = self._data[self.cursor:end]
520 self.cursor = end
521 return data
522
523 def expect_bytes(self, expected_value, message):
524 value = self.read_bytes(len(expected_value))
525 if value != expected_value:
86e5f3ed 526 raise ParserError(f'unexpected value: {value} != {expected_value} ({message})')
982ee69a
MB
527
528 def read_uint(self, big_endian=False):
529 data_format = '>I' if big_endian else '<I'
530 return struct.unpack(data_format, self.read_bytes(4))[0]
531
532 def read_double(self, big_endian=False):
533 data_format = '>d' if big_endian else '<d'
534 return struct.unpack(data_format, self.read_bytes(8))[0]
535
536 def read_cstring(self):
537 buffer = []
538 while True:
539 c = self.read_bytes(1)
540 if c == b'\x00':
541 return b''.join(buffer).decode('utf-8')
542 else:
543 buffer.append(c)
544
545 def skip(self, num_bytes, description='unknown'):
546 if num_bytes > 0:
19a03940 547 self._logger.debug(f'skipping {num_bytes} bytes ({description}): {self.read_bytes(num_bytes)!r}')
982ee69a 548 elif num_bytes < 0:
86e5f3ed 549 raise ParserError(f'invalid skip of {num_bytes} bytes')
982ee69a
MB
550
551 def skip_to(self, offset, description='unknown'):
552 self.skip(offset - self.cursor, description)
553
554 def skip_to_end(self, description='unknown'):
555 self.skip_to(len(self._data), description)
556
557
558def _mac_absolute_time_to_posix(timestamp):
559 return int((datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc) + timedelta(seconds=timestamp)).timestamp())
560
561
562def _parse_safari_cookies_header(data, logger):
563 p = DataParser(data, logger)
564 p.expect_bytes(b'cook', 'database signature')
565 number_of_pages = p.read_uint(big_endian=True)
566 page_sizes = [p.read_uint(big_endian=True) for _ in range(number_of_pages)]
567 return page_sizes, p.cursor
568
569
570def _parse_safari_cookies_page(data, jar, logger):
571 p = DataParser(data, logger)
572 p.expect_bytes(b'\x00\x00\x01\x00', 'page signature')
573 number_of_cookies = p.read_uint()
574 record_offsets = [p.read_uint() for _ in range(number_of_cookies)]
575 if number_of_cookies == 0:
86e5f3ed 576 logger.debug(f'a cookies page of size {len(data)} has no cookies')
982ee69a
MB
577 return
578
579 p.skip_to(record_offsets[0], 'unknown page header field')
580
97ec5bc5 581 with _create_progress_bar(logger) as progress_bar:
582 for i, record_offset in enumerate(record_offsets):
583 progress_bar.print(f'Loading cookie {i: 6d}/{number_of_cookies: 6d}')
584 p.skip_to(record_offset, 'space between records')
585 record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger)
586 p.read_bytes(record_length)
982ee69a
MB
587 p.skip_to_end('space in between pages')
588
589
590def _parse_safari_cookies_record(data, jar, logger):
591 p = DataParser(data, logger)
592 record_size = p.read_uint()
593 p.skip(4, 'unknown record field 1')
594 flags = p.read_uint()
595 is_secure = bool(flags & 0x0001)
596 p.skip(4, 'unknown record field 2')
597 domain_offset = p.read_uint()
598 name_offset = p.read_uint()
599 path_offset = p.read_uint()
600 value_offset = p.read_uint()
601 p.skip(8, 'unknown record field 3')
602 expiration_date = _mac_absolute_time_to_posix(p.read_double())
603 _creation_date = _mac_absolute_time_to_posix(p.read_double()) # noqa: F841
604
605 try:
606 p.skip_to(domain_offset)
607 domain = p.read_cstring()
608
609 p.skip_to(name_offset)
610 name = p.read_cstring()
611
612 p.skip_to(path_offset)
613 path = p.read_cstring()
614
615 p.skip_to(value_offset)
616 value = p.read_cstring()
617 except UnicodeDecodeError:
f440b14f 618 logger.warning('failed to parse Safari cookie because UTF-8 decoding failed', only_once=True)
982ee69a
MB
619 return record_size
620
621 p.skip_to(record_size, 'space at the end of the record')
622
623 cookie = compat_cookiejar_Cookie(
624 version=0, name=name, value=value, port=None, port_specified=False,
625 domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'),
626 path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False,
627 comment=None, comment_url=None, rest={})
628 jar.set_cookie(cookie)
629 return record_size
630
631
632def parse_safari_cookies(data, jar=None, logger=YDLLogger()):
633 """
634 References:
635 - https://github.com/libyal/dtformats/blob/main/documentation/Safari%20Cookies.asciidoc
636 - this data appears to be out of date but the important parts of the database structure is the same
637 - there are a few bytes here and there which are skipped during parsing
638 """
639 if jar is None:
640 jar = YoutubeDLCookieJar()
641 page_sizes, body_start = _parse_safari_cookies_header(data, logger)
642 p = DataParser(data[body_start:], logger)
643 for page_size in page_sizes:
644 _parse_safari_cookies_page(p.read_bytes(page_size), jar, logger)
645 p.skip_to_end('footer')
646 return jar
647
648
f59f5ef8
MB
649class _LinuxDesktopEnvironment(Enum):
650 """
651 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.h
652 DesktopEnvironment
653 """
654 OTHER = auto()
655 CINNAMON = auto()
656 GNOME = auto()
657 KDE = auto()
658 PANTHEON = auto()
659 UNITY = auto()
660 XFCE = auto()
982ee69a
MB
661
662
f59f5ef8
MB
663class _LinuxKeyring(Enum):
664 """
665 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.h
666 SelectedLinuxBackend
667 """
668 KWALLET = auto()
669 GNOMEKEYRING = auto()
670 BASICTEXT = auto()
671
672
673SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys()
674
675
676def _get_linux_desktop_environment(env):
677 """
678 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc
679 GetDesktopEnvironment
680 """
681 xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
682 desktop_session = env.get('DESKTOP_SESSION', None)
683 if xdg_current_desktop is not None:
684 xdg_current_desktop = xdg_current_desktop.split(':')[0].strip()
685
686 if xdg_current_desktop == 'Unity':
687 if desktop_session is not None and 'gnome-fallback' in desktop_session:
688 return _LinuxDesktopEnvironment.GNOME
689 else:
690 return _LinuxDesktopEnvironment.UNITY
691 elif xdg_current_desktop == 'GNOME':
692 return _LinuxDesktopEnvironment.GNOME
693 elif xdg_current_desktop == 'X-Cinnamon':
694 return _LinuxDesktopEnvironment.CINNAMON
695 elif xdg_current_desktop == 'KDE':
696 return _LinuxDesktopEnvironment.KDE
697 elif xdg_current_desktop == 'Pantheon':
698 return _LinuxDesktopEnvironment.PANTHEON
699 elif xdg_current_desktop == 'XFCE':
700 return _LinuxDesktopEnvironment.XFCE
701 elif desktop_session is not None:
702 if desktop_session in ('mate', 'gnome'):
703 return _LinuxDesktopEnvironment.GNOME
704 elif 'kde' in desktop_session:
705 return _LinuxDesktopEnvironment.KDE
706 elif 'xfce' in desktop_session:
707 return _LinuxDesktopEnvironment.XFCE
708 else:
709 if 'GNOME_DESKTOP_SESSION_ID' in env:
710 return _LinuxDesktopEnvironment.GNOME
711 elif 'KDE_FULL_SESSION' in env:
712 return _LinuxDesktopEnvironment.KDE
fa8fd951 713 return _LinuxDesktopEnvironment.OTHER
f59f5ef8
MB
714
715
716def _choose_linux_keyring(logger):
717 """
718 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.cc
719 SelectBackend
720 """
721 desktop_environment = _get_linux_desktop_environment(os.environ)
86e5f3ed 722 logger.debug(f'detected desktop environment: {desktop_environment.name}')
f59f5ef8
MB
723 if desktop_environment == _LinuxDesktopEnvironment.KDE:
724 linux_keyring = _LinuxKeyring.KWALLET
725 elif desktop_environment == _LinuxDesktopEnvironment.OTHER:
726 linux_keyring = _LinuxKeyring.BASICTEXT
982ee69a 727 else:
f59f5ef8
MB
728 linux_keyring = _LinuxKeyring.GNOMEKEYRING
729 return linux_keyring
730
731
732def _get_kwallet_network_wallet(logger):
733 """ The name of the wallet used to store network passwords.
734
735 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/kwallet_dbus.cc
736 KWalletDBus::NetworkWallet
737 which does a dbus call to the following function:
738 https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html
739 Wallet::NetworkWallet
740 """
741 default_wallet = 'kdewallet'
742 try:
743 proc = Popen([
744 'dbus-send', '--session', '--print-reply=literal',
745 '--dest=org.kde.kwalletd5',
746 '/modules/kwalletd5',
747 'org.kde.KWallet.networkWallet'
748 ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
749
750 stdout, stderr = proc.communicate_or_kill()
751 if proc.returncode != 0:
752 logger.warning('failed to read NetworkWallet')
753 return default_wallet
754 else:
755 network_wallet = stdout.decode('utf-8').strip()
86e5f3ed 756 logger.debug(f'NetworkWallet = "{network_wallet}"')
f59f5ef8 757 return network_wallet
a44ca5a4 758 except Exception as e:
86e5f3ed 759 logger.warning(f'exception while obtaining NetworkWallet: {e}')
f59f5ef8
MB
760 return default_wallet
761
762
763def _get_kwallet_password(browser_keyring_name, logger):
764 logger.debug('using kwallet-query to obtain password from kwallet')
765
766 if shutil.which('kwallet-query') is None:
767 logger.error('kwallet-query command not found. KWallet and kwallet-query '
768 'must be installed to read from KWallet. kwallet-query should be'
769 'included in the kwallet package for your distribution')
770 return b''
771
772 network_wallet = _get_kwallet_network_wallet(logger)
773
774 try:
775 proc = Popen([
776 'kwallet-query',
86e5f3ed 777 '--read-password', f'{browser_keyring_name} Safe Storage',
778 '--folder', f'{browser_keyring_name} Keys',
f59f5ef8
MB
779 network_wallet
780 ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
781
782 stdout, stderr = proc.communicate_or_kill()
783 if proc.returncode != 0:
19a03940 784 logger.error(f'kwallet-query failed with return code {proc.returncode}. Please consult '
785 'the kwallet-query man page for details')
f59f5ef8
MB
786 return b''
787 else:
788 if stdout.lower().startswith(b'failed to read'):
789 logger.debug('failed to read password from kwallet. Using empty string instead')
790 # this sometimes occurs in KDE because chrome does not check hasEntry and instead
791 # just tries to read the value (which kwallet returns "") whereas kwallet-query
792 # checks hasEntry. To verify this:
793 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
794 # while starting chrome.
795 # this may be a bug as the intended behaviour is to generate a random password and store
796 # it, but that doesn't matter here.
797 return b''
798 else:
799 logger.debug('password found')
800 if stdout[-1:] == b'\n':
801 stdout = stdout[:-1]
802 return stdout
a44ca5a4 803 except Exception as e:
804 logger.warning(f'exception running kwallet-query: {error_to_str(e)}')
f59f5ef8
MB
805 return b''
806
807
808def _get_gnome_keyring_password(browser_keyring_name, logger):
809 if not SECRETSTORAGE_AVAILABLE:
86e5f3ed 810 logger.error(f'secretstorage not available {SECRETSTORAGE_UNAVAILABLE_REASON}')
f59f5ef8
MB
811 return b''
812 # the Gnome keyring does not seem to organise keys in the same way as KWallet,
813 # using `dbus-monitor` during startup, it can be observed that chromium lists all keys
814 # and presumably searches for its key in the list. It appears that we must do the same.
815 # https://github.com/jaraco/keyring/issues/556
816 with contextlib.closing(secretstorage.dbus_init()) as con:
817 col = secretstorage.get_default_collection(con)
818 for item in col.get_all_items():
86e5f3ed 819 if item.get_label() == f'{browser_keyring_name} Safe Storage':
f59f5ef8
MB
820 return item.get_secret()
821 else:
822 logger.error('failed to read from keyring')
823 return b''
824
825
826def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
827 # note: chrome/chromium can be run with the following flags to determine which keyring backend
828 # it has chosen to use
829 # chromium --enable-logging=stderr --v=1 2>&1 | grep key_storage_
830 # Chromium supports a flag: --password-store=<basic|gnome|kwallet> so the automatic detection
831 # will not be sufficient in all cases.
832
2c539d49 833 keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger)
f59f5ef8
MB
834 logger.debug(f'Chosen keyring: {keyring.name}')
835
836 if keyring == _LinuxKeyring.KWALLET:
837 return _get_kwallet_password(browser_keyring_name, logger)
838 elif keyring == _LinuxKeyring.GNOMEKEYRING:
839 return _get_gnome_keyring_password(browser_keyring_name, logger)
840 elif keyring == _LinuxKeyring.BASICTEXT:
841 # when basic text is chosen, all cookies are stored as v10 (so no keyring password is required)
842 return None
843 assert False, f'Unknown keyring {keyring}'
844
845
846def _get_mac_keyring_password(browser_keyring_name, logger):
847 logger.debug('using find-generic-password to obtain password from OSX keychain')
848 try:
d3c93ec2 849 proc = Popen(
850 ['security', 'find-generic-password',
851 '-w', # write password to stdout
852 '-a', browser_keyring_name, # match 'account'
86e5f3ed 853 '-s', f'{browser_keyring_name} Safe Storage'], # match 'service'
d3c93ec2 854 stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
f59f5ef8
MB
855
856 stdout, stderr = proc.communicate_or_kill()
857 if stdout[-1:] == b'\n':
858 stdout = stdout[:-1]
859 return stdout
a44ca5a4 860 except Exception as e:
861 logger.warning(f'exception running find-generic-password: {error_to_str(e)}')
f59f5ef8 862 return None
982ee69a
MB
863
864
865def _get_windows_v10_key(browser_root, logger):
97ec5bc5 866 path = _find_most_recently_used_file(browser_root, 'Local State', logger)
982ee69a
MB
867 if path is None:
868 logger.error('could not find local state file')
869 return None
97ec5bc5 870 logger.debug(f'Found local state file at "{path}"')
86e5f3ed 871 with open(path, encoding='utf8') as f:
982ee69a
MB
872 data = json.load(f)
873 try:
874 base64_key = data['os_crypt']['encrypted_key']
875 except KeyError:
876 logger.error('no encrypted key in Local State')
877 return None
878 encrypted_key = compat_b64decode(base64_key)
879 prefix = b'DPAPI'
880 if not encrypted_key.startswith(prefix):
881 logger.error('invalid key')
882 return None
883 return _decrypt_windows_dpapi(encrypted_key[len(prefix):], logger)
884
885
886def pbkdf2_sha1(password, salt, iterations, key_length):
887 return pbkdf2_hmac('sha1', password, salt, iterations, key_length)
888
889
890def _decrypt_aes_cbc(ciphertext, key, logger, initialization_vector=b' ' * 16):
1d3586d0 891 plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
982ee69a 892 try:
1d3586d0 893 return plaintext.decode('utf-8')
982ee69a 894 except UnicodeDecodeError:
f440b14f 895 logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
896 return None
897
898
899def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
982ee69a 900 try:
09906f55 901 plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce)
982ee69a 902 except ValueError:
f440b14f 903 logger.warning('failed to decrypt cookie (AES-GCM) because the MAC check failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
904 return None
905
906 try:
907 return plaintext.decode('utf-8')
908 except UnicodeDecodeError:
f440b14f 909 logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
910 return None
911
912
913def _decrypt_windows_dpapi(ciphertext, logger):
914 """
915 References:
916 - https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata
917 """
918 from ctypes.wintypes import DWORD
919
920 class DATA_BLOB(ctypes.Structure):
921 _fields_ = [('cbData', DWORD),
922 ('pbData', ctypes.POINTER(ctypes.c_char))]
923
924 buffer = ctypes.create_string_buffer(ciphertext)
925 blob_in = DATA_BLOB(ctypes.sizeof(buffer), buffer)
926 blob_out = DATA_BLOB()
927 ret = ctypes.windll.crypt32.CryptUnprotectData(
928 ctypes.byref(blob_in), # pDataIn
929 None, # ppszDataDescr: human readable description of pDataIn
930 None, # pOptionalEntropy: salt?
931 None, # pvReserved: must be NULL
932 None, # pPromptStruct: information about prompts to display
933 0, # dwFlags
934 ctypes.byref(blob_out) # pDataOut
935 )
936 if not ret:
f9be9cb9 937 logger.warning('failed to decrypt with DPAPI', only_once=True)
982ee69a
MB
938 return None
939
940 result = ctypes.string_at(blob_out.pbData, blob_out.cbData)
941 ctypes.windll.kernel32.LocalFree(blob_out.pbData)
942 return result
943
944
945def _config_home():
946 return os.environ.get('XDG_CONFIG_HOME', os.path.expanduser('~/.config'))
947
948
949def _open_database_copy(database_path, tmpdir):
950 # cannot open sqlite databases if they are already in use (e.g. by the browser)
951 database_copy_path = os.path.join(tmpdir, 'temporary.sqlite')
952 shutil.copy(database_path, database_copy_path)
953 conn = sqlite3.connect(database_copy_path)
954 return conn.cursor()
955
956
957def _get_column_names(cursor, table_name):
86e5f3ed 958 table_info = cursor.execute(f'PRAGMA table_info({table_name})').fetchall()
982ee69a
MB
959 return [row[1].decode('utf-8') for row in table_info]
960
961
97ec5bc5 962def _find_most_recently_used_file(root, filename, logger):
982ee69a 963 # if there are multiple browser profiles, take the most recently used one
97ec5bc5 964 i, paths = 0, []
965 with _create_progress_bar(logger) as progress_bar:
966 for curr_root, dirs, files in os.walk(root):
967 for file in files:
968 i += 1
969 progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched')
970 if file == filename:
971 paths.append(os.path.join(curr_root, file))
982ee69a
MB
972 return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime)
973
974
975def _merge_cookie_jars(jars):
976 output_jar = YoutubeDLCookieJar()
977 for jar in jars:
978 for cookie in jar:
979 output_jar.set_cookie(cookie)
980 if jar.filename is not None:
981 output_jar.filename = jar.filename
982 return output_jar
983
984
985def _is_path(value):
986 return os.path.sep in value
987
988
f59f5ef8 989def _parse_browser_specification(browser_name, profile=None, keyring=None):
982ee69a
MB
990 if browser_name not in SUPPORTED_BROWSERS:
991 raise ValueError(f'unsupported browser: "{browser_name}"')
f59f5ef8
MB
992 if keyring not in (None, *SUPPORTED_KEYRINGS):
993 raise ValueError(f'unsupported keyring: "{keyring}"')
982ee69a
MB
994 if profile is not None and _is_path(profile):
995 profile = os.path.expanduser(profile)
f59f5ef8 996 return browser_name, profile, keyring