]> jfr.im git - yt-dlp.git/blame - yt_dlp/cookies.py
[compat] Fix `compat.WINDOWS_VT_MODE`
[yt-dlp.git] / yt_dlp / cookies.py
CommitLineData
f59f5ef8 1import contextlib
982ee69a
MB
2import ctypes
3import json
4import os
5import shutil
982ee69a
MB
6import struct
7import subprocess
8import sys
9import tempfile
2e4585da 10import time
982ee69a 11from datetime import datetime, timedelta, timezone
f59f5ef8 12from enum import Enum, auto
982ee69a
MB
13from hashlib import pbkdf2_hmac
14
1d3586d0 15from .aes import (
16 aes_cbc_decrypt_bytes,
17 aes_gcm_decrypt_and_verify_bytes,
18 unpad_pkcs7,
19)
f8271158 20from .compat import compat_b64decode, compat_cookiejar_Cookie
9b8ee23b 21from .dependencies import (
22 _SECRETSTORAGE_UNAVAILABLE_REASON,
23 secretstorage,
24 sqlite3,
25)
97ec5bc5 26from .minicurses import MultilinePrinter, QuietMultilinePrinter
f8271158 27from .utils import Popen, YoutubeDLCookieJar, error_to_str, expand_path
982ee69a 28
982ee69a
MB
29CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
30SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
31
32
33class YDLLogger:
34 def __init__(self, ydl=None):
35 self._ydl = ydl
36
37 def debug(self, message):
38 if self._ydl:
39 self._ydl.write_debug(message)
40
41 def info(self, message):
42 if self._ydl:
43 self._ydl.to_screen(f'[Cookies] {message}')
44
45 def warning(self, message, only_once=False):
46 if self._ydl:
47 self._ydl.report_warning(message, only_once)
48
49 def error(self, message):
50 if self._ydl:
51 self._ydl.report_error(message)
52
2e4585da 53 class ProgressBar(MultilinePrinter):
54 _DELAY, _timer = 0.1, 0
55
56 def print(self, message):
57 if time.time() - self._timer > self._DELAY:
58 self.print_at_line(f'[Cookies] {message}', 0)
59 self._timer = time.time()
60
97ec5bc5 61 def progress_bar(self):
62 """Return a context manager with a print method. (Optional)"""
63 # Do not print to files/pipes, loggers, or when --no-progress is used
64 if not self._ydl or self._ydl.params.get('noprogress') or self._ydl.params.get('logger'):
65 return
591bb9d3 66 file = self._ydl._out_files.error
97ec5bc5 67 try:
68 if not file.isatty():
69 return
70 except BaseException:
71 return
2e4585da 72 return self.ProgressBar(file, preserve_output=False)
97ec5bc5 73
74
75def _create_progress_bar(logger):
76 if hasattr(logger, 'progress_bar'):
77 printer = logger.progress_bar()
78 if printer:
79 return printer
80 printer = QuietMultilinePrinter()
81 printer.print = lambda _: None
82 return printer
83
982ee69a
MB
84
85def load_cookies(cookie_file, browser_specification, ydl):
86 cookie_jars = []
87 if browser_specification is not None:
f59f5ef8
MB
88 browser_name, profile, keyring = _parse_browser_specification(*browser_specification)
89 cookie_jars.append(extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring))
982ee69a
MB
90
91 if cookie_file is not None:
d76fa1f3 92 is_filename = YoutubeDLCookieJar.is_path(cookie_file)
93 if is_filename:
94 cookie_file = expand_path(cookie_file)
95
982ee69a 96 jar = YoutubeDLCookieJar(cookie_file)
d76fa1f3 97 if not is_filename or os.access(cookie_file, os.R_OK):
982ee69a
MB
98 jar.load(ignore_discard=True, ignore_expires=True)
99 cookie_jars.append(jar)
100
101 return _merge_cookie_jars(cookie_jars)
102
103
f59f5ef8 104def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None):
982ee69a
MB
105 if browser_name == 'firefox':
106 return _extract_firefox_cookies(profile, logger)
107 elif browser_name == 'safari':
108 return _extract_safari_cookies(profile, logger)
109 elif browser_name in CHROMIUM_BASED_BROWSERS:
f59f5ef8 110 return _extract_chrome_cookies(browser_name, profile, keyring, logger)
982ee69a 111 else:
86e5f3ed 112 raise ValueError(f'unknown browser: {browser_name}')
982ee69a
MB
113
114
115def _extract_firefox_cookies(profile, logger):
116 logger.info('Extracting cookies from firefox')
9b8ee23b 117 if not sqlite3:
767b02a9
MB
118 logger.warning('Cannot extract cookies from firefox without sqlite3 support. '
119 'Please use a python interpreter compiled with sqlite3 support')
120 return YoutubeDLCookieJar()
982ee69a
MB
121
122 if profile is None:
123 search_root = _firefox_browser_dir()
124 elif _is_path(profile):
125 search_root = profile
126 else:
127 search_root = os.path.join(_firefox_browser_dir(), profile)
128
97ec5bc5 129 cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite', logger)
982ee69a 130 if cookie_database_path is None:
86e5f3ed 131 raise FileNotFoundError(f'could not find firefox cookies database in {search_root}')
132 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
982ee69a 133
0930b11f 134 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
982ee69a
MB
135 cursor = None
136 try:
137 cursor = _open_database_copy(cookie_database_path, tmpdir)
138 cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
139 jar = YoutubeDLCookieJar()
97ec5bc5 140 with _create_progress_bar(logger) as progress_bar:
141 table = cursor.fetchall()
142 total_cookie_count = len(table)
143 for i, (host, name, value, path, expiry, is_secure) in enumerate(table):
144 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
145 cookie = compat_cookiejar_Cookie(
146 version=0, name=name, value=value, port=None, port_specified=False,
147 domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'),
148 path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False,
149 comment=None, comment_url=None, rest={})
150 jar.set_cookie(cookie)
86e5f3ed 151 logger.info(f'Extracted {len(jar)} cookies from firefox')
982ee69a
MB
152 return jar
153 finally:
154 if cursor is not None:
155 cursor.connection.close()
156
157
158def _firefox_browser_dir():
dec30912 159 if sys.platform in ('cygwin', 'win32'):
19a03940 160 return os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles')
982ee69a
MB
161 elif sys.platform == 'darwin':
162 return os.path.expanduser('~/Library/Application Support/Firefox')
dec30912 163 return os.path.expanduser('~/.mozilla/firefox')
982ee69a
MB
164
165
166def _get_chromium_based_browser_settings(browser_name):
167 # https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md
dec30912 168 if sys.platform in ('cygwin', 'win32'):
982ee69a
MB
169 appdata_local = os.path.expandvars('%LOCALAPPDATA%')
170 appdata_roaming = os.path.expandvars('%APPDATA%')
171 browser_dir = {
19a03940 172 'brave': os.path.join(appdata_local, R'BraveSoftware\Brave-Browser\User Data'),
173 'chrome': os.path.join(appdata_local, R'Google\Chrome\User Data'),
174 'chromium': os.path.join(appdata_local, R'Chromium\User Data'),
175 'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
176 'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
177 'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
982ee69a
MB
178 }[browser_name]
179
180 elif sys.platform == 'darwin':
181 appdata = os.path.expanduser('~/Library/Application Support')
182 browser_dir = {
183 'brave': os.path.join(appdata, 'BraveSoftware/Brave-Browser'),
184 'chrome': os.path.join(appdata, 'Google/Chrome'),
185 'chromium': os.path.join(appdata, 'Chromium'),
186 'edge': os.path.join(appdata, 'Microsoft Edge'),
187 'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
188 'vivaldi': os.path.join(appdata, 'Vivaldi'),
189 }[browser_name]
190
191 else:
dec30912
CMT
192 config = _config_home()
193 browser_dir = {
194 'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'),
195 'chrome': os.path.join(config, 'google-chrome'),
196 'chromium': os.path.join(config, 'chromium'),
197 'edge': os.path.join(config, 'microsoft-edge'),
198 'opera': os.path.join(config, 'opera'),
199 'vivaldi': os.path.join(config, 'vivaldi'),
200 }[browser_name]
982ee69a
MB
201
202 # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
203 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
204 keyring_name = {
205 'brave': 'Brave',
206 'chrome': 'Chrome',
207 'chromium': 'Chromium',
29b208f6 208 'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
982ee69a
MB
209 'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
210 'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
211 }[browser_name]
212
213 browsers_without_profiles = {'opera'}
214
215 return {
216 'browser_dir': browser_dir,
217 'keyring_name': keyring_name,
218 'supports_profiles': browser_name not in browsers_without_profiles
219 }
220
221
f59f5ef8 222def _extract_chrome_cookies(browser_name, profile, keyring, logger):
86e5f3ed 223 logger.info(f'Extracting cookies from {browser_name}')
767b02a9 224
9b8ee23b 225 if not sqlite3:
19a03940 226 logger.warning(f'Cannot extract cookies from {browser_name} without sqlite3 support. '
227 'Please use a python interpreter compiled with sqlite3 support')
767b02a9
MB
228 return YoutubeDLCookieJar()
229
982ee69a
MB
230 config = _get_chromium_based_browser_settings(browser_name)
231
232 if profile is None:
233 search_root = config['browser_dir']
234 elif _is_path(profile):
235 search_root = profile
236 config['browser_dir'] = os.path.dirname(profile) if config['supports_profiles'] else profile
237 else:
238 if config['supports_profiles']:
239 search_root = os.path.join(config['browser_dir'], profile)
240 else:
86e5f3ed 241 logger.error(f'{browser_name} does not support profiles')
982ee69a
MB
242 search_root = config['browser_dir']
243
97ec5bc5 244 cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies', logger)
982ee69a 245 if cookie_database_path is None:
86e5f3ed 246 raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"')
247 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
982ee69a 248
f59f5ef8 249 decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring)
982ee69a 250
0930b11f 251 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
982ee69a
MB
252 cursor = None
253 try:
254 cursor = _open_database_copy(cookie_database_path, tmpdir)
255 cursor.connection.text_factory = bytes
256 column_names = _get_column_names(cursor, 'cookies')
257 secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
19a03940 258 cursor.execute(f'SELECT host_key, name, value, encrypted_value, path, expires_utc, {secure_column} FROM cookies')
982ee69a
MB
259 jar = YoutubeDLCookieJar()
260 failed_cookies = 0
f59f5ef8 261 unencrypted_cookies = 0
97ec5bc5 262 with _create_progress_bar(logger) as progress_bar:
263 table = cursor.fetchall()
264 total_cookie_count = len(table)
265 for i, line in enumerate(table):
266 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
267 is_encrypted, cookie = _process_chrome_cookie(decryptor, *line)
268 if not cookie:
982ee69a
MB
269 failed_cookies += 1
270 continue
97ec5bc5 271 elif not is_encrypted:
272 unencrypted_cookies += 1
273 jar.set_cookie(cookie)
982ee69a 274 if failed_cookies > 0:
86e5f3ed 275 failed_message = f' ({failed_cookies} could not be decrypted)'
982ee69a
MB
276 else:
277 failed_message = ''
86e5f3ed 278 logger.info(f'Extracted {len(jar)} cookies from {browser_name}{failed_message}')
24146491 279 counts = decryptor._cookie_counts.copy()
f59f5ef8 280 counts['unencrypted'] = unencrypted_cookies
86e5f3ed 281 logger.debug(f'cookie version breakdown: {counts}')
982ee69a
MB
282 return jar
283 finally:
284 if cursor is not None:
285 cursor.connection.close()
286
287
97ec5bc5 288def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, path, expires_utc, is_secure):
0f06bcd7 289 host_key = host_key.decode()
290 name = name.decode()
291 value = value.decode()
292 path = path.decode()
97ec5bc5 293 is_encrypted = not value and encrypted_value
294
295 if is_encrypted:
296 value = decryptor.decrypt(encrypted_value)
297 if value is None:
298 return is_encrypted, None
299
300 return is_encrypted, compat_cookiejar_Cookie(
301 version=0, name=name, value=value, port=None, port_specified=False,
302 domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
303 path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False,
304 comment=None, comment_url=None, rest={})
305
306
982ee69a
MB
307class ChromeCookieDecryptor:
308 """
309 Overview:
310
311 Linux:
312 - cookies are either v10 or v11
313 - v10: AES-CBC encrypted with a fixed key
314 - v11: AES-CBC encrypted with an OS protected key (keyring)
315 - v11 keys can be stored in various places depending on the activate desktop environment [2]
316
317 Mac:
318 - cookies are either v10 or not v10
319 - v10: AES-CBC encrypted with an OS protected key (keyring) and more key derivation iterations than linux
320 - not v10: 'old data' stored as plaintext
321
322 Windows:
323 - cookies are either v10 or not v10
324 - v10: AES-GCM encrypted with a key which is encrypted with DPAPI
325 - not v10: encrypted with DPAPI
326
327 Sources:
328 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/
329 - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_linux.cc
330 - KeyStorageLinux::CreateService
331 """
332
0fa7d2c8 333 _cookie_counts = {}
982ee69a 334
0fa7d2c8 335 def decrypt(self, encrypted_value):
19a03940 336 raise NotImplementedError('Must be implemented by sub classes')
f59f5ef8 337
982ee69a 338
f59f5ef8 339def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None):
56ba69e4 340 if sys.platform == 'darwin':
982ee69a 341 return MacChromeCookieDecryptor(browser_keyring_name, logger)
56ba69e4 342 elif sys.platform in ('win32', 'cygwin'):
982ee69a 343 return WindowsChromeCookieDecryptor(browser_root, logger)
56ba69e4 344 return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring)
982ee69a
MB
345
346
347class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
f59f5ef8 348 def __init__(self, browser_keyring_name, logger, *, keyring=None):
982ee69a
MB
349 self._logger = logger
350 self._v10_key = self.derive_key(b'peanuts')
f59f5ef8
MB
351 password = _get_linux_keyring_password(browser_keyring_name, keyring, logger)
352 self._v11_key = None if password is None else self.derive_key(password)
353 self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
982ee69a
MB
354
355 @staticmethod
356 def derive_key(password):
357 # values from
358 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_linux.cc
359 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16)
360
361 def decrypt(self, encrypted_value):
362 version = encrypted_value[:3]
363 ciphertext = encrypted_value[3:]
364
365 if version == b'v10':
f59f5ef8 366 self._cookie_counts['v10'] += 1
982ee69a
MB
367 return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
368
369 elif version == b'v11':
f59f5ef8 370 self._cookie_counts['v11'] += 1
982ee69a 371 if self._v11_key is None:
f59f5ef8 372 self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
982ee69a
MB
373 return None
374 return _decrypt_aes_cbc(ciphertext, self._v11_key, self._logger)
375
376 else:
f59f5ef8 377 self._cookie_counts['other'] += 1
982ee69a
MB
378 return None
379
380
381class MacChromeCookieDecryptor(ChromeCookieDecryptor):
382 def __init__(self, browser_keyring_name, logger):
383 self._logger = logger
f440b14f 384 password = _get_mac_keyring_password(browser_keyring_name, logger)
982ee69a 385 self._v10_key = None if password is None else self.derive_key(password)
f59f5ef8 386 self._cookie_counts = {'v10': 0, 'other': 0}
982ee69a
MB
387
388 @staticmethod
389 def derive_key(password):
390 # values from
391 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
392 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16)
393
394 def decrypt(self, encrypted_value):
395 version = encrypted_value[:3]
396 ciphertext = encrypted_value[3:]
397
398 if version == b'v10':
f59f5ef8 399 self._cookie_counts['v10'] += 1
982ee69a
MB
400 if self._v10_key is None:
401 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
402 return None
403
404 return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
405
406 else:
f59f5ef8 407 self._cookie_counts['other'] += 1
982ee69a
MB
408 # other prefixes are considered 'old data' which were stored as plaintext
409 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
410 return encrypted_value
411
412
413class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
414 def __init__(self, browser_root, logger):
415 self._logger = logger
416 self._v10_key = _get_windows_v10_key(browser_root, logger)
f59f5ef8
MB
417 self._cookie_counts = {'v10': 0, 'other': 0}
418
982ee69a
MB
419 def decrypt(self, encrypted_value):
420 version = encrypted_value[:3]
421 ciphertext = encrypted_value[3:]
422
423 if version == b'v10':
f59f5ef8 424 self._cookie_counts['v10'] += 1
982ee69a
MB
425 if self._v10_key is None:
426 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
427 return None
982ee69a
MB
428
429 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
430 # kNonceLength
431 nonce_length = 96 // 8
432 # boringssl
433 # EVP_AEAD_AES_GCM_TAG_LEN
434 authentication_tag_length = 16
435
436 raw_ciphertext = ciphertext
437 nonce = raw_ciphertext[:nonce_length]
438 ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length]
439 authentication_tag = raw_ciphertext[-authentication_tag_length:]
440
441 return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger)
442
443 else:
f59f5ef8 444 self._cookie_counts['other'] += 1
982ee69a
MB
445 # any other prefix means the data is DPAPI encrypted
446 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
0f06bcd7 447 return _decrypt_windows_dpapi(encrypted_value, self._logger).decode()
982ee69a
MB
448
449
450def _extract_safari_cookies(profile, logger):
451 if profile is not None:
452 logger.error('safari does not support profiles')
453 if sys.platform != 'darwin':
86e5f3ed 454 raise ValueError(f'unsupported platform: {sys.platform}')
982ee69a
MB
455
456 cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')
457
458 if not os.path.isfile(cookies_path):
1f7db853
MP
459 logger.debug('Trying secondary cookie location')
460 cookies_path = os.path.expanduser('~/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies')
461 if not os.path.isfile(cookies_path):
462 raise FileNotFoundError('could not find safari cookies database')
982ee69a
MB
463
464 with open(cookies_path, 'rb') as f:
465 cookies_data = f.read()
466
467 jar = parse_safari_cookies(cookies_data, logger=logger)
86e5f3ed 468 logger.info(f'Extracted {len(jar)} cookies from safari')
982ee69a
MB
469 return jar
470
471
472class ParserError(Exception):
473 pass
474
475
476class DataParser:
477 def __init__(self, data, logger):
478 self._data = data
479 self.cursor = 0
480 self._logger = logger
481
482 def read_bytes(self, num_bytes):
483 if num_bytes < 0:
86e5f3ed 484 raise ParserError(f'invalid read of {num_bytes} bytes')
982ee69a
MB
485 end = self.cursor + num_bytes
486 if end > len(self._data):
487 raise ParserError('reached end of input')
488 data = self._data[self.cursor:end]
489 self.cursor = end
490 return data
491
492 def expect_bytes(self, expected_value, message):
493 value = self.read_bytes(len(expected_value))
494 if value != expected_value:
86e5f3ed 495 raise ParserError(f'unexpected value: {value} != {expected_value} ({message})')
982ee69a
MB
496
497 def read_uint(self, big_endian=False):
498 data_format = '>I' if big_endian else '<I'
499 return struct.unpack(data_format, self.read_bytes(4))[0]
500
501 def read_double(self, big_endian=False):
502 data_format = '>d' if big_endian else '<d'
503 return struct.unpack(data_format, self.read_bytes(8))[0]
504
505 def read_cstring(self):
506 buffer = []
507 while True:
508 c = self.read_bytes(1)
509 if c == b'\x00':
0f06bcd7 510 return b''.join(buffer).decode()
982ee69a
MB
511 else:
512 buffer.append(c)
513
514 def skip(self, num_bytes, description='unknown'):
515 if num_bytes > 0:
19a03940 516 self._logger.debug(f'skipping {num_bytes} bytes ({description}): {self.read_bytes(num_bytes)!r}')
982ee69a 517 elif num_bytes < 0:
86e5f3ed 518 raise ParserError(f'invalid skip of {num_bytes} bytes')
982ee69a
MB
519
520 def skip_to(self, offset, description='unknown'):
521 self.skip(offset - self.cursor, description)
522
523 def skip_to_end(self, description='unknown'):
524 self.skip_to(len(self._data), description)
525
526
527def _mac_absolute_time_to_posix(timestamp):
528 return int((datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc) + timedelta(seconds=timestamp)).timestamp())
529
530
531def _parse_safari_cookies_header(data, logger):
532 p = DataParser(data, logger)
533 p.expect_bytes(b'cook', 'database signature')
534 number_of_pages = p.read_uint(big_endian=True)
535 page_sizes = [p.read_uint(big_endian=True) for _ in range(number_of_pages)]
536 return page_sizes, p.cursor
537
538
539def _parse_safari_cookies_page(data, jar, logger):
540 p = DataParser(data, logger)
541 p.expect_bytes(b'\x00\x00\x01\x00', 'page signature')
542 number_of_cookies = p.read_uint()
543 record_offsets = [p.read_uint() for _ in range(number_of_cookies)]
544 if number_of_cookies == 0:
86e5f3ed 545 logger.debug(f'a cookies page of size {len(data)} has no cookies')
982ee69a
MB
546 return
547
548 p.skip_to(record_offsets[0], 'unknown page header field')
549
97ec5bc5 550 with _create_progress_bar(logger) as progress_bar:
551 for i, record_offset in enumerate(record_offsets):
552 progress_bar.print(f'Loading cookie {i: 6d}/{number_of_cookies: 6d}')
553 p.skip_to(record_offset, 'space between records')
554 record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger)
555 p.read_bytes(record_length)
982ee69a
MB
556 p.skip_to_end('space in between pages')
557
558
559def _parse_safari_cookies_record(data, jar, logger):
560 p = DataParser(data, logger)
561 record_size = p.read_uint()
562 p.skip(4, 'unknown record field 1')
563 flags = p.read_uint()
564 is_secure = bool(flags & 0x0001)
565 p.skip(4, 'unknown record field 2')
566 domain_offset = p.read_uint()
567 name_offset = p.read_uint()
568 path_offset = p.read_uint()
569 value_offset = p.read_uint()
570 p.skip(8, 'unknown record field 3')
571 expiration_date = _mac_absolute_time_to_posix(p.read_double())
572 _creation_date = _mac_absolute_time_to_posix(p.read_double()) # noqa: F841
573
574 try:
575 p.skip_to(domain_offset)
576 domain = p.read_cstring()
577
578 p.skip_to(name_offset)
579 name = p.read_cstring()
580
581 p.skip_to(path_offset)
582 path = p.read_cstring()
583
584 p.skip_to(value_offset)
585 value = p.read_cstring()
586 except UnicodeDecodeError:
f440b14f 587 logger.warning('failed to parse Safari cookie because UTF-8 decoding failed', only_once=True)
982ee69a
MB
588 return record_size
589
590 p.skip_to(record_size, 'space at the end of the record')
591
592 cookie = compat_cookiejar_Cookie(
593 version=0, name=name, value=value, port=None, port_specified=False,
594 domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'),
595 path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False,
596 comment=None, comment_url=None, rest={})
597 jar.set_cookie(cookie)
598 return record_size
599
600
601def parse_safari_cookies(data, jar=None, logger=YDLLogger()):
602 """
603 References:
604 - https://github.com/libyal/dtformats/blob/main/documentation/Safari%20Cookies.asciidoc
605 - this data appears to be out of date but the important parts of the database structure is the same
606 - there are a few bytes here and there which are skipped during parsing
607 """
608 if jar is None:
609 jar = YoutubeDLCookieJar()
610 page_sizes, body_start = _parse_safari_cookies_header(data, logger)
611 p = DataParser(data[body_start:], logger)
612 for page_size in page_sizes:
613 _parse_safari_cookies_page(p.read_bytes(page_size), jar, logger)
614 p.skip_to_end('footer')
615 return jar
616
617
f59f5ef8
MB
618class _LinuxDesktopEnvironment(Enum):
619 """
620 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.h
621 DesktopEnvironment
622 """
623 OTHER = auto()
624 CINNAMON = auto()
625 GNOME = auto()
626 KDE = auto()
627 PANTHEON = auto()
628 UNITY = auto()
629 XFCE = auto()
982ee69a
MB
630
631
f59f5ef8
MB
632class _LinuxKeyring(Enum):
633 """
634 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.h
635 SelectedLinuxBackend
636 """
637 KWALLET = auto()
638 GNOMEKEYRING = auto()
639 BASICTEXT = auto()
640
641
642SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys()
643
644
645def _get_linux_desktop_environment(env):
646 """
647 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc
648 GetDesktopEnvironment
649 """
650 xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
651 desktop_session = env.get('DESKTOP_SESSION', None)
652 if xdg_current_desktop is not None:
653 xdg_current_desktop = xdg_current_desktop.split(':')[0].strip()
654
655 if xdg_current_desktop == 'Unity':
656 if desktop_session is not None and 'gnome-fallback' in desktop_session:
657 return _LinuxDesktopEnvironment.GNOME
658 else:
659 return _LinuxDesktopEnvironment.UNITY
660 elif xdg_current_desktop == 'GNOME':
661 return _LinuxDesktopEnvironment.GNOME
662 elif xdg_current_desktop == 'X-Cinnamon':
663 return _LinuxDesktopEnvironment.CINNAMON
664 elif xdg_current_desktop == 'KDE':
665 return _LinuxDesktopEnvironment.KDE
666 elif xdg_current_desktop == 'Pantheon':
667 return _LinuxDesktopEnvironment.PANTHEON
668 elif xdg_current_desktop == 'XFCE':
669 return _LinuxDesktopEnvironment.XFCE
670 elif desktop_session is not None:
671 if desktop_session in ('mate', 'gnome'):
672 return _LinuxDesktopEnvironment.GNOME
673 elif 'kde' in desktop_session:
674 return _LinuxDesktopEnvironment.KDE
675 elif 'xfce' in desktop_session:
676 return _LinuxDesktopEnvironment.XFCE
677 else:
678 if 'GNOME_DESKTOP_SESSION_ID' in env:
679 return _LinuxDesktopEnvironment.GNOME
680 elif 'KDE_FULL_SESSION' in env:
681 return _LinuxDesktopEnvironment.KDE
fa8fd951 682 return _LinuxDesktopEnvironment.OTHER
f59f5ef8
MB
683
684
685def _choose_linux_keyring(logger):
686 """
687 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.cc
688 SelectBackend
689 """
690 desktop_environment = _get_linux_desktop_environment(os.environ)
86e5f3ed 691 logger.debug(f'detected desktop environment: {desktop_environment.name}')
f59f5ef8
MB
692 if desktop_environment == _LinuxDesktopEnvironment.KDE:
693 linux_keyring = _LinuxKeyring.KWALLET
694 elif desktop_environment == _LinuxDesktopEnvironment.OTHER:
695 linux_keyring = _LinuxKeyring.BASICTEXT
982ee69a 696 else:
f59f5ef8
MB
697 linux_keyring = _LinuxKeyring.GNOMEKEYRING
698 return linux_keyring
699
700
701def _get_kwallet_network_wallet(logger):
702 """ The name of the wallet used to store network passwords.
703
704 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/kwallet_dbus.cc
705 KWalletDBus::NetworkWallet
706 which does a dbus call to the following function:
707 https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html
708 Wallet::NetworkWallet
709 """
710 default_wallet = 'kdewallet'
711 try:
f0c9fb96 712 stdout, _, returncode = Popen.run([
f59f5ef8
MB
713 'dbus-send', '--session', '--print-reply=literal',
714 '--dest=org.kde.kwalletd5',
715 '/modules/kwalletd5',
716 'org.kde.KWallet.networkWallet'
f0c9fb96 717 ], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
f59f5ef8 718
f0c9fb96 719 if returncode:
f59f5ef8
MB
720 logger.warning('failed to read NetworkWallet')
721 return default_wallet
722 else:
f0c9fb96 723 logger.debug(f'NetworkWallet = "{stdout.strip()}"')
724 return stdout.strip()
a44ca5a4 725 except Exception as e:
86e5f3ed 726 logger.warning(f'exception while obtaining NetworkWallet: {e}')
f59f5ef8
MB
727 return default_wallet
728
729
730def _get_kwallet_password(browser_keyring_name, logger):
731 logger.debug('using kwallet-query to obtain password from kwallet')
732
733 if shutil.which('kwallet-query') is None:
734 logger.error('kwallet-query command not found. KWallet and kwallet-query '
735 'must be installed to read from KWallet. kwallet-query should be'
736 'included in the kwallet package for your distribution')
737 return b''
738
739 network_wallet = _get_kwallet_network_wallet(logger)
740
741 try:
f0c9fb96 742 stdout, _, returncode = Popen.run([
f59f5ef8 743 'kwallet-query',
86e5f3ed 744 '--read-password', f'{browser_keyring_name} Safe Storage',
745 '--folder', f'{browser_keyring_name} Keys',
f59f5ef8
MB
746 network_wallet
747 ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
748
f0c9fb96 749 if returncode:
750 logger.error(f'kwallet-query failed with return code {returncode}. '
751 'Please consult the kwallet-query man page for details')
f59f5ef8
MB
752 return b''
753 else:
754 if stdout.lower().startswith(b'failed to read'):
755 logger.debug('failed to read password from kwallet. Using empty string instead')
756 # this sometimes occurs in KDE because chrome does not check hasEntry and instead
757 # just tries to read the value (which kwallet returns "") whereas kwallet-query
758 # checks hasEntry. To verify this:
759 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
760 # while starting chrome.
761 # this may be a bug as the intended behaviour is to generate a random password and store
762 # it, but that doesn't matter here.
763 return b''
764 else:
765 logger.debug('password found')
f0c9fb96 766 return stdout.rstrip(b'\n')
a44ca5a4 767 except Exception as e:
768 logger.warning(f'exception running kwallet-query: {error_to_str(e)}')
f59f5ef8
MB
769 return b''
770
771
772def _get_gnome_keyring_password(browser_keyring_name, logger):
9b8ee23b 773 if not secretstorage:
774 logger.error(f'secretstorage not available {_SECRETSTORAGE_UNAVAILABLE_REASON}')
f59f5ef8
MB
775 return b''
776 # the Gnome keyring does not seem to organise keys in the same way as KWallet,
777 # using `dbus-monitor` during startup, it can be observed that chromium lists all keys
778 # and presumably searches for its key in the list. It appears that we must do the same.
779 # https://github.com/jaraco/keyring/issues/556
780 with contextlib.closing(secretstorage.dbus_init()) as con:
781 col = secretstorage.get_default_collection(con)
782 for item in col.get_all_items():
86e5f3ed 783 if item.get_label() == f'{browser_keyring_name} Safe Storage':
f59f5ef8
MB
784 return item.get_secret()
785 else:
786 logger.error('failed to read from keyring')
787 return b''
788
789
790def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
791 # note: chrome/chromium can be run with the following flags to determine which keyring backend
792 # it has chosen to use
793 # chromium --enable-logging=stderr --v=1 2>&1 | grep key_storage_
794 # Chromium supports a flag: --password-store=<basic|gnome|kwallet> so the automatic detection
795 # will not be sufficient in all cases.
796
2c539d49 797 keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger)
f59f5ef8
MB
798 logger.debug(f'Chosen keyring: {keyring.name}')
799
800 if keyring == _LinuxKeyring.KWALLET:
801 return _get_kwallet_password(browser_keyring_name, logger)
802 elif keyring == _LinuxKeyring.GNOMEKEYRING:
803 return _get_gnome_keyring_password(browser_keyring_name, logger)
804 elif keyring == _LinuxKeyring.BASICTEXT:
805 # when basic text is chosen, all cookies are stored as v10 (so no keyring password is required)
806 return None
807 assert False, f'Unknown keyring {keyring}'
808
809
810def _get_mac_keyring_password(browser_keyring_name, logger):
811 logger.debug('using find-generic-password to obtain password from OSX keychain')
812 try:
f0c9fb96 813 stdout, _, _ = Popen.run(
d3c93ec2 814 ['security', 'find-generic-password',
815 '-w', # write password to stdout
816 '-a', browser_keyring_name, # match 'account'
86e5f3ed 817 '-s', f'{browser_keyring_name} Safe Storage'], # match 'service'
d3c93ec2 818 stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
f0c9fb96 819 return stdout.rstrip(b'\n')
a44ca5a4 820 except Exception as e:
821 logger.warning(f'exception running find-generic-password: {error_to_str(e)}')
f59f5ef8 822 return None
982ee69a
MB
823
824
825def _get_windows_v10_key(browser_root, logger):
97ec5bc5 826 path = _find_most_recently_used_file(browser_root, 'Local State', logger)
982ee69a
MB
827 if path is None:
828 logger.error('could not find local state file')
829 return None
97ec5bc5 830 logger.debug(f'Found local state file at "{path}"')
86e5f3ed 831 with open(path, encoding='utf8') as f:
982ee69a
MB
832 data = json.load(f)
833 try:
834 base64_key = data['os_crypt']['encrypted_key']
835 except KeyError:
836 logger.error('no encrypted key in Local State')
837 return None
838 encrypted_key = compat_b64decode(base64_key)
839 prefix = b'DPAPI'
840 if not encrypted_key.startswith(prefix):
841 logger.error('invalid key')
842 return None
843 return _decrypt_windows_dpapi(encrypted_key[len(prefix):], logger)
844
845
846def pbkdf2_sha1(password, salt, iterations, key_length):
847 return pbkdf2_hmac('sha1', password, salt, iterations, key_length)
848
849
850def _decrypt_aes_cbc(ciphertext, key, logger, initialization_vector=b' ' * 16):
1d3586d0 851 plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
982ee69a 852 try:
0f06bcd7 853 return plaintext.decode()
982ee69a 854 except UnicodeDecodeError:
f440b14f 855 logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
856 return None
857
858
859def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
982ee69a 860 try:
09906f55 861 plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce)
982ee69a 862 except ValueError:
f440b14f 863 logger.warning('failed to decrypt cookie (AES-GCM) because the MAC check failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
864 return None
865
866 try:
0f06bcd7 867 return plaintext.decode()
982ee69a 868 except UnicodeDecodeError:
f440b14f 869 logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
870 return None
871
872
873def _decrypt_windows_dpapi(ciphertext, logger):
874 """
875 References:
876 - https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata
877 """
878 from ctypes.wintypes import DWORD
879
880 class DATA_BLOB(ctypes.Structure):
881 _fields_ = [('cbData', DWORD),
882 ('pbData', ctypes.POINTER(ctypes.c_char))]
883
884 buffer = ctypes.create_string_buffer(ciphertext)
885 blob_in = DATA_BLOB(ctypes.sizeof(buffer), buffer)
886 blob_out = DATA_BLOB()
887 ret = ctypes.windll.crypt32.CryptUnprotectData(
888 ctypes.byref(blob_in), # pDataIn
889 None, # ppszDataDescr: human readable description of pDataIn
890 None, # pOptionalEntropy: salt?
891 None, # pvReserved: must be NULL
892 None, # pPromptStruct: information about prompts to display
893 0, # dwFlags
894 ctypes.byref(blob_out) # pDataOut
895 )
896 if not ret:
f9be9cb9 897 logger.warning('failed to decrypt with DPAPI', only_once=True)
982ee69a
MB
898 return None
899
900 result = ctypes.string_at(blob_out.pbData, blob_out.cbData)
901 ctypes.windll.kernel32.LocalFree(blob_out.pbData)
902 return result
903
904
905def _config_home():
906 return os.environ.get('XDG_CONFIG_HOME', os.path.expanduser('~/.config'))
907
908
909def _open_database_copy(database_path, tmpdir):
910 # cannot open sqlite databases if they are already in use (e.g. by the browser)
911 database_copy_path = os.path.join(tmpdir, 'temporary.sqlite')
912 shutil.copy(database_path, database_copy_path)
913 conn = sqlite3.connect(database_copy_path)
914 return conn.cursor()
915
916
917def _get_column_names(cursor, table_name):
86e5f3ed 918 table_info = cursor.execute(f'PRAGMA table_info({table_name})').fetchall()
0f06bcd7 919 return [row[1].decode() for row in table_info]
982ee69a
MB
920
921
97ec5bc5 922def _find_most_recently_used_file(root, filename, logger):
982ee69a 923 # if there are multiple browser profiles, take the most recently used one
97ec5bc5 924 i, paths = 0, []
925 with _create_progress_bar(logger) as progress_bar:
926 for curr_root, dirs, files in os.walk(root):
927 for file in files:
928 i += 1
929 progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched')
930 if file == filename:
931 paths.append(os.path.join(curr_root, file))
982ee69a
MB
932 return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime)
933
934
935def _merge_cookie_jars(jars):
936 output_jar = YoutubeDLCookieJar()
937 for jar in jars:
938 for cookie in jar:
939 output_jar.set_cookie(cookie)
940 if jar.filename is not None:
941 output_jar.filename = jar.filename
942 return output_jar
943
944
945def _is_path(value):
946 return os.path.sep in value
947
948
f59f5ef8 949def _parse_browser_specification(browser_name, profile=None, keyring=None):
982ee69a
MB
950 if browser_name not in SUPPORTED_BROWSERS:
951 raise ValueError(f'unsupported browser: "{browser_name}"')
f59f5ef8
MB
952 if keyring not in (None, *SUPPORTED_KEYRINGS):
953 raise ValueError(f'unsupported keyring: "{keyring}"')
982ee69a
MB
954 if profile is not None and _is_path(profile):
955 profile = os.path.expanduser(profile)
f59f5ef8 956 return browser_name, profile, keyring