]> jfr.im git - yt-dlp.git/blame - yt_dlp/cookies.py
[Hotstar] Bugfix for a1ddaa899ca8693f31f34770f7263ace7e8c8841
[yt-dlp.git] / yt_dlp / cookies.py
CommitLineData
f59f5ef8 1import contextlib
982ee69a
MB
2import ctypes
3import json
4import os
5import shutil
982ee69a
MB
6import struct
7import subprocess
8import sys
9import tempfile
2e4585da 10import time
982ee69a 11from datetime import datetime, timedelta, timezone
f59f5ef8 12from enum import Enum, auto
982ee69a
MB
13from hashlib import pbkdf2_hmac
14
1d3586d0 15from .aes import (
16 aes_cbc_decrypt_bytes,
17 aes_gcm_decrypt_and_verify_bytes,
18 unpad_pkcs7,
19)
f8271158 20from .compat import compat_b64decode, compat_cookiejar_Cookie
9b8ee23b 21from .dependencies import (
22 _SECRETSTORAGE_UNAVAILABLE_REASON,
23 secretstorage,
24 sqlite3,
25)
97ec5bc5 26from .minicurses import MultilinePrinter, QuietMultilinePrinter
f8271158 27from .utils import Popen, YoutubeDLCookieJar, error_to_str, expand_path
982ee69a 28
982ee69a
MB
29CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
30SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
31
32
33class YDLLogger:
34 def __init__(self, ydl=None):
35 self._ydl = ydl
36
37 def debug(self, message):
38 if self._ydl:
39 self._ydl.write_debug(message)
40
41 def info(self, message):
42 if self._ydl:
43 self._ydl.to_screen(f'[Cookies] {message}')
44
45 def warning(self, message, only_once=False):
46 if self._ydl:
47 self._ydl.report_warning(message, only_once)
48
49 def error(self, message):
50 if self._ydl:
51 self._ydl.report_error(message)
52
2e4585da 53 class ProgressBar(MultilinePrinter):
54 _DELAY, _timer = 0.1, 0
55
56 def print(self, message):
57 if time.time() - self._timer > self._DELAY:
58 self.print_at_line(f'[Cookies] {message}', 0)
59 self._timer = time.time()
60
97ec5bc5 61 def progress_bar(self):
62 """Return a context manager with a print method. (Optional)"""
63 # Do not print to files/pipes, loggers, or when --no-progress is used
64 if not self._ydl or self._ydl.params.get('noprogress') or self._ydl.params.get('logger'):
65 return
66 file = self._ydl._out_files['error']
67 try:
68 if not file.isatty():
69 return
70 except BaseException:
71 return
2e4585da 72 return self.ProgressBar(file, preserve_output=False)
97ec5bc5 73
74
75def _create_progress_bar(logger):
76 if hasattr(logger, 'progress_bar'):
77 printer = logger.progress_bar()
78 if printer:
79 return printer
80 printer = QuietMultilinePrinter()
81 printer.print = lambda _: None
82 return printer
83
982ee69a
MB
84
85def load_cookies(cookie_file, browser_specification, ydl):
86 cookie_jars = []
87 if browser_specification is not None:
f59f5ef8
MB
88 browser_name, profile, keyring = _parse_browser_specification(*browser_specification)
89 cookie_jars.append(extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring))
982ee69a
MB
90
91 if cookie_file is not None:
d76fa1f3 92 is_filename = YoutubeDLCookieJar.is_path(cookie_file)
93 if is_filename:
94 cookie_file = expand_path(cookie_file)
95
982ee69a 96 jar = YoutubeDLCookieJar(cookie_file)
d76fa1f3 97 if not is_filename or os.access(cookie_file, os.R_OK):
982ee69a
MB
98 jar.load(ignore_discard=True, ignore_expires=True)
99 cookie_jars.append(jar)
100
101 return _merge_cookie_jars(cookie_jars)
102
103
f59f5ef8 104def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None):
982ee69a
MB
105 if browser_name == 'firefox':
106 return _extract_firefox_cookies(profile, logger)
107 elif browser_name == 'safari':
108 return _extract_safari_cookies(profile, logger)
109 elif browser_name in CHROMIUM_BASED_BROWSERS:
f59f5ef8 110 return _extract_chrome_cookies(browser_name, profile, keyring, logger)
982ee69a 111 else:
86e5f3ed 112 raise ValueError(f'unknown browser: {browser_name}')
982ee69a
MB
113
114
115def _extract_firefox_cookies(profile, logger):
116 logger.info('Extracting cookies from firefox')
9b8ee23b 117 if not sqlite3:
767b02a9
MB
118 logger.warning('Cannot extract cookies from firefox without sqlite3 support. '
119 'Please use a python interpreter compiled with sqlite3 support')
120 return YoutubeDLCookieJar()
982ee69a
MB
121
122 if profile is None:
123 search_root = _firefox_browser_dir()
124 elif _is_path(profile):
125 search_root = profile
126 else:
127 search_root = os.path.join(_firefox_browser_dir(), profile)
128
97ec5bc5 129 cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite', logger)
982ee69a 130 if cookie_database_path is None:
86e5f3ed 131 raise FileNotFoundError(f'could not find firefox cookies database in {search_root}')
132 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
982ee69a 133
0930b11f 134 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
982ee69a
MB
135 cursor = None
136 try:
137 cursor = _open_database_copy(cookie_database_path, tmpdir)
138 cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
139 jar = YoutubeDLCookieJar()
97ec5bc5 140 with _create_progress_bar(logger) as progress_bar:
141 table = cursor.fetchall()
142 total_cookie_count = len(table)
143 for i, (host, name, value, path, expiry, is_secure) in enumerate(table):
144 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
145 cookie = compat_cookiejar_Cookie(
146 version=0, name=name, value=value, port=None, port_specified=False,
147 domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'),
148 path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False,
149 comment=None, comment_url=None, rest={})
150 jar.set_cookie(cookie)
86e5f3ed 151 logger.info(f'Extracted {len(jar)} cookies from firefox')
982ee69a
MB
152 return jar
153 finally:
154 if cursor is not None:
155 cursor.connection.close()
156
157
158def _firefox_browser_dir():
159 if sys.platform in ('linux', 'linux2'):
160 return os.path.expanduser('~/.mozilla/firefox')
161 elif sys.platform == 'win32':
19a03940 162 return os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles')
982ee69a
MB
163 elif sys.platform == 'darwin':
164 return os.path.expanduser('~/Library/Application Support/Firefox')
165 else:
86e5f3ed 166 raise ValueError(f'unsupported platform: {sys.platform}')
982ee69a
MB
167
168
169def _get_chromium_based_browser_settings(browser_name):
170 # https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md
171 if sys.platform in ('linux', 'linux2'):
172 config = _config_home()
173 browser_dir = {
174 'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'),
175 'chrome': os.path.join(config, 'google-chrome'),
176 'chromium': os.path.join(config, 'chromium'),
177 'edge': os.path.join(config, 'microsoft-edge'),
178 'opera': os.path.join(config, 'opera'),
179 'vivaldi': os.path.join(config, 'vivaldi'),
180 }[browser_name]
181
182 elif sys.platform == 'win32':
183 appdata_local = os.path.expandvars('%LOCALAPPDATA%')
184 appdata_roaming = os.path.expandvars('%APPDATA%')
185 browser_dir = {
19a03940 186 'brave': os.path.join(appdata_local, R'BraveSoftware\Brave-Browser\User Data'),
187 'chrome': os.path.join(appdata_local, R'Google\Chrome\User Data'),
188 'chromium': os.path.join(appdata_local, R'Chromium\User Data'),
189 'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
190 'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
191 'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
982ee69a
MB
192 }[browser_name]
193
194 elif sys.platform == 'darwin':
195 appdata = os.path.expanduser('~/Library/Application Support')
196 browser_dir = {
197 'brave': os.path.join(appdata, 'BraveSoftware/Brave-Browser'),
198 'chrome': os.path.join(appdata, 'Google/Chrome'),
199 'chromium': os.path.join(appdata, 'Chromium'),
200 'edge': os.path.join(appdata, 'Microsoft Edge'),
201 'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
202 'vivaldi': os.path.join(appdata, 'Vivaldi'),
203 }[browser_name]
204
205 else:
86e5f3ed 206 raise ValueError(f'unsupported platform: {sys.platform}')
982ee69a
MB
207
208 # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
209 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
210 keyring_name = {
211 'brave': 'Brave',
212 'chrome': 'Chrome',
213 'chromium': 'Chromium',
29b208f6 214 'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
982ee69a
MB
215 'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
216 'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
217 }[browser_name]
218
219 browsers_without_profiles = {'opera'}
220
221 return {
222 'browser_dir': browser_dir,
223 'keyring_name': keyring_name,
224 'supports_profiles': browser_name not in browsers_without_profiles
225 }
226
227
f59f5ef8 228def _extract_chrome_cookies(browser_name, profile, keyring, logger):
86e5f3ed 229 logger.info(f'Extracting cookies from {browser_name}')
767b02a9 230
9b8ee23b 231 if not sqlite3:
19a03940 232 logger.warning(f'Cannot extract cookies from {browser_name} without sqlite3 support. '
233 'Please use a python interpreter compiled with sqlite3 support')
767b02a9
MB
234 return YoutubeDLCookieJar()
235
982ee69a
MB
236 config = _get_chromium_based_browser_settings(browser_name)
237
238 if profile is None:
239 search_root = config['browser_dir']
240 elif _is_path(profile):
241 search_root = profile
242 config['browser_dir'] = os.path.dirname(profile) if config['supports_profiles'] else profile
243 else:
244 if config['supports_profiles']:
245 search_root = os.path.join(config['browser_dir'], profile)
246 else:
86e5f3ed 247 logger.error(f'{browser_name} does not support profiles')
982ee69a
MB
248 search_root = config['browser_dir']
249
97ec5bc5 250 cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies', logger)
982ee69a 251 if cookie_database_path is None:
86e5f3ed 252 raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"')
253 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
982ee69a 254
f59f5ef8 255 decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring)
982ee69a 256
0930b11f 257 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
982ee69a
MB
258 cursor = None
259 try:
260 cursor = _open_database_copy(cookie_database_path, tmpdir)
261 cursor.connection.text_factory = bytes
262 column_names = _get_column_names(cursor, 'cookies')
263 secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
19a03940 264 cursor.execute(f'SELECT host_key, name, value, encrypted_value, path, expires_utc, {secure_column} FROM cookies')
982ee69a
MB
265 jar = YoutubeDLCookieJar()
266 failed_cookies = 0
f59f5ef8 267 unencrypted_cookies = 0
97ec5bc5 268 with _create_progress_bar(logger) as progress_bar:
269 table = cursor.fetchall()
270 total_cookie_count = len(table)
271 for i, line in enumerate(table):
272 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
273 is_encrypted, cookie = _process_chrome_cookie(decryptor, *line)
274 if not cookie:
982ee69a
MB
275 failed_cookies += 1
276 continue
97ec5bc5 277 elif not is_encrypted:
278 unencrypted_cookies += 1
279 jar.set_cookie(cookie)
982ee69a 280 if failed_cookies > 0:
86e5f3ed 281 failed_message = f' ({failed_cookies} could not be decrypted)'
982ee69a
MB
282 else:
283 failed_message = ''
86e5f3ed 284 logger.info(f'Extracted {len(jar)} cookies from {browser_name}{failed_message}')
f59f5ef8
MB
285 counts = decryptor.cookie_counts.copy()
286 counts['unencrypted'] = unencrypted_cookies
86e5f3ed 287 logger.debug(f'cookie version breakdown: {counts}')
982ee69a
MB
288 return jar
289 finally:
290 if cursor is not None:
291 cursor.connection.close()
292
293
97ec5bc5 294def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, path, expires_utc, is_secure):
0f06bcd7 295 host_key = host_key.decode()
296 name = name.decode()
297 value = value.decode()
298 path = path.decode()
97ec5bc5 299 is_encrypted = not value and encrypted_value
300
301 if is_encrypted:
302 value = decryptor.decrypt(encrypted_value)
303 if value is None:
304 return is_encrypted, None
305
306 return is_encrypted, compat_cookiejar_Cookie(
307 version=0, name=name, value=value, port=None, port_specified=False,
308 domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
309 path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False,
310 comment=None, comment_url=None, rest={})
311
312
982ee69a
MB
313class ChromeCookieDecryptor:
314 """
315 Overview:
316
317 Linux:
318 - cookies are either v10 or v11
319 - v10: AES-CBC encrypted with a fixed key
320 - v11: AES-CBC encrypted with an OS protected key (keyring)
321 - v11 keys can be stored in various places depending on the activate desktop environment [2]
322
323 Mac:
324 - cookies are either v10 or not v10
325 - v10: AES-CBC encrypted with an OS protected key (keyring) and more key derivation iterations than linux
326 - not v10: 'old data' stored as plaintext
327
328 Windows:
329 - cookies are either v10 or not v10
330 - v10: AES-GCM encrypted with a key which is encrypted with DPAPI
331 - not v10: encrypted with DPAPI
332
333 Sources:
334 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/
335 - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_linux.cc
336 - KeyStorageLinux::CreateService
337 """
338
339 def decrypt(self, encrypted_value):
19a03940 340 raise NotImplementedError('Must be implemented by sub classes')
982ee69a 341
f59f5ef8
MB
342 @property
343 def cookie_counts(self):
19a03940 344 raise NotImplementedError('Must be implemented by sub classes')
f59f5ef8 345
982ee69a 346
f59f5ef8 347def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None):
982ee69a 348 if sys.platform in ('linux', 'linux2'):
f59f5ef8 349 return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring)
982ee69a
MB
350 elif sys.platform == 'darwin':
351 return MacChromeCookieDecryptor(browser_keyring_name, logger)
352 elif sys.platform == 'win32':
353 return WindowsChromeCookieDecryptor(browser_root, logger)
354 else:
19a03940 355 raise NotImplementedError(f'Chrome cookie decryption is not supported on this platform: {sys.platform}')
982ee69a
MB
356
357
358class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
f59f5ef8 359 def __init__(self, browser_keyring_name, logger, *, keyring=None):
982ee69a
MB
360 self._logger = logger
361 self._v10_key = self.derive_key(b'peanuts')
f59f5ef8
MB
362 password = _get_linux_keyring_password(browser_keyring_name, keyring, logger)
363 self._v11_key = None if password is None else self.derive_key(password)
364 self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
982ee69a
MB
365
366 @staticmethod
367 def derive_key(password):
368 # values from
369 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_linux.cc
370 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16)
371
f59f5ef8
MB
372 @property
373 def cookie_counts(self):
374 return self._cookie_counts
375
982ee69a
MB
376 def decrypt(self, encrypted_value):
377 version = encrypted_value[:3]
378 ciphertext = encrypted_value[3:]
379
380 if version == b'v10':
f59f5ef8 381 self._cookie_counts['v10'] += 1
982ee69a
MB
382 return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
383
384 elif version == b'v11':
f59f5ef8 385 self._cookie_counts['v11'] += 1
982ee69a 386 if self._v11_key is None:
f59f5ef8 387 self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
982ee69a
MB
388 return None
389 return _decrypt_aes_cbc(ciphertext, self._v11_key, self._logger)
390
391 else:
f59f5ef8 392 self._cookie_counts['other'] += 1
982ee69a
MB
393 return None
394
395
396class MacChromeCookieDecryptor(ChromeCookieDecryptor):
397 def __init__(self, browser_keyring_name, logger):
398 self._logger = logger
f440b14f 399 password = _get_mac_keyring_password(browser_keyring_name, logger)
982ee69a 400 self._v10_key = None if password is None else self.derive_key(password)
f59f5ef8 401 self._cookie_counts = {'v10': 0, 'other': 0}
982ee69a
MB
402
403 @staticmethod
404 def derive_key(password):
405 # values from
406 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
407 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16)
408
f59f5ef8
MB
409 @property
410 def cookie_counts(self):
411 return self._cookie_counts
412
982ee69a
MB
413 def decrypt(self, encrypted_value):
414 version = encrypted_value[:3]
415 ciphertext = encrypted_value[3:]
416
417 if version == b'v10':
f59f5ef8 418 self._cookie_counts['v10'] += 1
982ee69a
MB
419 if self._v10_key is None:
420 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
421 return None
422
423 return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
424
425 else:
f59f5ef8 426 self._cookie_counts['other'] += 1
982ee69a
MB
427 # other prefixes are considered 'old data' which were stored as plaintext
428 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
429 return encrypted_value
430
431
432class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
433 def __init__(self, browser_root, logger):
434 self._logger = logger
435 self._v10_key = _get_windows_v10_key(browser_root, logger)
f59f5ef8
MB
436 self._cookie_counts = {'v10': 0, 'other': 0}
437
438 @property
439 def cookie_counts(self):
440 return self._cookie_counts
982ee69a
MB
441
442 def decrypt(self, encrypted_value):
443 version = encrypted_value[:3]
444 ciphertext = encrypted_value[3:]
445
446 if version == b'v10':
f59f5ef8 447 self._cookie_counts['v10'] += 1
982ee69a
MB
448 if self._v10_key is None:
449 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
450 return None
982ee69a
MB
451
452 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
453 # kNonceLength
454 nonce_length = 96 // 8
455 # boringssl
456 # EVP_AEAD_AES_GCM_TAG_LEN
457 authentication_tag_length = 16
458
459 raw_ciphertext = ciphertext
460 nonce = raw_ciphertext[:nonce_length]
461 ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length]
462 authentication_tag = raw_ciphertext[-authentication_tag_length:]
463
464 return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger)
465
466 else:
f59f5ef8 467 self._cookie_counts['other'] += 1
982ee69a
MB
468 # any other prefix means the data is DPAPI encrypted
469 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
0f06bcd7 470 return _decrypt_windows_dpapi(encrypted_value, self._logger).decode()
982ee69a
MB
471
472
473def _extract_safari_cookies(profile, logger):
474 if profile is not None:
475 logger.error('safari does not support profiles')
476 if sys.platform != 'darwin':
86e5f3ed 477 raise ValueError(f'unsupported platform: {sys.platform}')
982ee69a
MB
478
479 cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')
480
481 if not os.path.isfile(cookies_path):
1f7db853
MP
482 logger.debug('Trying secondary cookie location')
483 cookies_path = os.path.expanduser('~/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies')
484 if not os.path.isfile(cookies_path):
485 raise FileNotFoundError('could not find safari cookies database')
982ee69a
MB
486
487 with open(cookies_path, 'rb') as f:
488 cookies_data = f.read()
489
490 jar = parse_safari_cookies(cookies_data, logger=logger)
86e5f3ed 491 logger.info(f'Extracted {len(jar)} cookies from safari')
982ee69a
MB
492 return jar
493
494
495class ParserError(Exception):
496 pass
497
498
499class DataParser:
500 def __init__(self, data, logger):
501 self._data = data
502 self.cursor = 0
503 self._logger = logger
504
505 def read_bytes(self, num_bytes):
506 if num_bytes < 0:
86e5f3ed 507 raise ParserError(f'invalid read of {num_bytes} bytes')
982ee69a
MB
508 end = self.cursor + num_bytes
509 if end > len(self._data):
510 raise ParserError('reached end of input')
511 data = self._data[self.cursor:end]
512 self.cursor = end
513 return data
514
515 def expect_bytes(self, expected_value, message):
516 value = self.read_bytes(len(expected_value))
517 if value != expected_value:
86e5f3ed 518 raise ParserError(f'unexpected value: {value} != {expected_value} ({message})')
982ee69a
MB
519
520 def read_uint(self, big_endian=False):
521 data_format = '>I' if big_endian else '<I'
522 return struct.unpack(data_format, self.read_bytes(4))[0]
523
524 def read_double(self, big_endian=False):
525 data_format = '>d' if big_endian else '<d'
526 return struct.unpack(data_format, self.read_bytes(8))[0]
527
528 def read_cstring(self):
529 buffer = []
530 while True:
531 c = self.read_bytes(1)
532 if c == b'\x00':
0f06bcd7 533 return b''.join(buffer).decode()
982ee69a
MB
534 else:
535 buffer.append(c)
536
537 def skip(self, num_bytes, description='unknown'):
538 if num_bytes > 0:
19a03940 539 self._logger.debug(f'skipping {num_bytes} bytes ({description}): {self.read_bytes(num_bytes)!r}')
982ee69a 540 elif num_bytes < 0:
86e5f3ed 541 raise ParserError(f'invalid skip of {num_bytes} bytes')
982ee69a
MB
542
543 def skip_to(self, offset, description='unknown'):
544 self.skip(offset - self.cursor, description)
545
546 def skip_to_end(self, description='unknown'):
547 self.skip_to(len(self._data), description)
548
549
550def _mac_absolute_time_to_posix(timestamp):
551 return int((datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc) + timedelta(seconds=timestamp)).timestamp())
552
553
554def _parse_safari_cookies_header(data, logger):
555 p = DataParser(data, logger)
556 p.expect_bytes(b'cook', 'database signature')
557 number_of_pages = p.read_uint(big_endian=True)
558 page_sizes = [p.read_uint(big_endian=True) for _ in range(number_of_pages)]
559 return page_sizes, p.cursor
560
561
562def _parse_safari_cookies_page(data, jar, logger):
563 p = DataParser(data, logger)
564 p.expect_bytes(b'\x00\x00\x01\x00', 'page signature')
565 number_of_cookies = p.read_uint()
566 record_offsets = [p.read_uint() for _ in range(number_of_cookies)]
567 if number_of_cookies == 0:
86e5f3ed 568 logger.debug(f'a cookies page of size {len(data)} has no cookies')
982ee69a
MB
569 return
570
571 p.skip_to(record_offsets[0], 'unknown page header field')
572
97ec5bc5 573 with _create_progress_bar(logger) as progress_bar:
574 for i, record_offset in enumerate(record_offsets):
575 progress_bar.print(f'Loading cookie {i: 6d}/{number_of_cookies: 6d}')
576 p.skip_to(record_offset, 'space between records')
577 record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger)
578 p.read_bytes(record_length)
982ee69a
MB
579 p.skip_to_end('space in between pages')
580
581
582def _parse_safari_cookies_record(data, jar, logger):
583 p = DataParser(data, logger)
584 record_size = p.read_uint()
585 p.skip(4, 'unknown record field 1')
586 flags = p.read_uint()
587 is_secure = bool(flags & 0x0001)
588 p.skip(4, 'unknown record field 2')
589 domain_offset = p.read_uint()
590 name_offset = p.read_uint()
591 path_offset = p.read_uint()
592 value_offset = p.read_uint()
593 p.skip(8, 'unknown record field 3')
594 expiration_date = _mac_absolute_time_to_posix(p.read_double())
595 _creation_date = _mac_absolute_time_to_posix(p.read_double()) # noqa: F841
596
597 try:
598 p.skip_to(domain_offset)
599 domain = p.read_cstring()
600
601 p.skip_to(name_offset)
602 name = p.read_cstring()
603
604 p.skip_to(path_offset)
605 path = p.read_cstring()
606
607 p.skip_to(value_offset)
608 value = p.read_cstring()
609 except UnicodeDecodeError:
f440b14f 610 logger.warning('failed to parse Safari cookie because UTF-8 decoding failed', only_once=True)
982ee69a
MB
611 return record_size
612
613 p.skip_to(record_size, 'space at the end of the record')
614
615 cookie = compat_cookiejar_Cookie(
616 version=0, name=name, value=value, port=None, port_specified=False,
617 domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'),
618 path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False,
619 comment=None, comment_url=None, rest={})
620 jar.set_cookie(cookie)
621 return record_size
622
623
624def parse_safari_cookies(data, jar=None, logger=YDLLogger()):
625 """
626 References:
627 - https://github.com/libyal/dtformats/blob/main/documentation/Safari%20Cookies.asciidoc
628 - this data appears to be out of date but the important parts of the database structure is the same
629 - there are a few bytes here and there which are skipped during parsing
630 """
631 if jar is None:
632 jar = YoutubeDLCookieJar()
633 page_sizes, body_start = _parse_safari_cookies_header(data, logger)
634 p = DataParser(data[body_start:], logger)
635 for page_size in page_sizes:
636 _parse_safari_cookies_page(p.read_bytes(page_size), jar, logger)
637 p.skip_to_end('footer')
638 return jar
639
640
f59f5ef8
MB
641class _LinuxDesktopEnvironment(Enum):
642 """
643 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.h
644 DesktopEnvironment
645 """
646 OTHER = auto()
647 CINNAMON = auto()
648 GNOME = auto()
649 KDE = auto()
650 PANTHEON = auto()
651 UNITY = auto()
652 XFCE = auto()
982ee69a
MB
653
654
f59f5ef8
MB
655class _LinuxKeyring(Enum):
656 """
657 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.h
658 SelectedLinuxBackend
659 """
660 KWALLET = auto()
661 GNOMEKEYRING = auto()
662 BASICTEXT = auto()
663
664
665SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys()
666
667
668def _get_linux_desktop_environment(env):
669 """
670 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc
671 GetDesktopEnvironment
672 """
673 xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
674 desktop_session = env.get('DESKTOP_SESSION', None)
675 if xdg_current_desktop is not None:
676 xdg_current_desktop = xdg_current_desktop.split(':')[0].strip()
677
678 if xdg_current_desktop == 'Unity':
679 if desktop_session is not None and 'gnome-fallback' in desktop_session:
680 return _LinuxDesktopEnvironment.GNOME
681 else:
682 return _LinuxDesktopEnvironment.UNITY
683 elif xdg_current_desktop == 'GNOME':
684 return _LinuxDesktopEnvironment.GNOME
685 elif xdg_current_desktop == 'X-Cinnamon':
686 return _LinuxDesktopEnvironment.CINNAMON
687 elif xdg_current_desktop == 'KDE':
688 return _LinuxDesktopEnvironment.KDE
689 elif xdg_current_desktop == 'Pantheon':
690 return _LinuxDesktopEnvironment.PANTHEON
691 elif xdg_current_desktop == 'XFCE':
692 return _LinuxDesktopEnvironment.XFCE
693 elif desktop_session is not None:
694 if desktop_session in ('mate', 'gnome'):
695 return _LinuxDesktopEnvironment.GNOME
696 elif 'kde' in desktop_session:
697 return _LinuxDesktopEnvironment.KDE
698 elif 'xfce' in desktop_session:
699 return _LinuxDesktopEnvironment.XFCE
700 else:
701 if 'GNOME_DESKTOP_SESSION_ID' in env:
702 return _LinuxDesktopEnvironment.GNOME
703 elif 'KDE_FULL_SESSION' in env:
704 return _LinuxDesktopEnvironment.KDE
fa8fd951 705 return _LinuxDesktopEnvironment.OTHER
f59f5ef8
MB
706
707
708def _choose_linux_keyring(logger):
709 """
710 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.cc
711 SelectBackend
712 """
713 desktop_environment = _get_linux_desktop_environment(os.environ)
86e5f3ed 714 logger.debug(f'detected desktop environment: {desktop_environment.name}')
f59f5ef8
MB
715 if desktop_environment == _LinuxDesktopEnvironment.KDE:
716 linux_keyring = _LinuxKeyring.KWALLET
717 elif desktop_environment == _LinuxDesktopEnvironment.OTHER:
718 linux_keyring = _LinuxKeyring.BASICTEXT
982ee69a 719 else:
f59f5ef8
MB
720 linux_keyring = _LinuxKeyring.GNOMEKEYRING
721 return linux_keyring
722
723
724def _get_kwallet_network_wallet(logger):
725 """ The name of the wallet used to store network passwords.
726
727 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/kwallet_dbus.cc
728 KWalletDBus::NetworkWallet
729 which does a dbus call to the following function:
730 https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html
731 Wallet::NetworkWallet
732 """
733 default_wallet = 'kdewallet'
734 try:
735 proc = Popen([
736 'dbus-send', '--session', '--print-reply=literal',
737 '--dest=org.kde.kwalletd5',
738 '/modules/kwalletd5',
739 'org.kde.KWallet.networkWallet'
740 ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
741
742 stdout, stderr = proc.communicate_or_kill()
743 if proc.returncode != 0:
744 logger.warning('failed to read NetworkWallet')
745 return default_wallet
746 else:
0f06bcd7 747 network_wallet = stdout.decode().strip()
86e5f3ed 748 logger.debug(f'NetworkWallet = "{network_wallet}"')
f59f5ef8 749 return network_wallet
a44ca5a4 750 except Exception as e:
86e5f3ed 751 logger.warning(f'exception while obtaining NetworkWallet: {e}')
f59f5ef8
MB
752 return default_wallet
753
754
755def _get_kwallet_password(browser_keyring_name, logger):
756 logger.debug('using kwallet-query to obtain password from kwallet')
757
758 if shutil.which('kwallet-query') is None:
759 logger.error('kwallet-query command not found. KWallet and kwallet-query '
760 'must be installed to read from KWallet. kwallet-query should be'
761 'included in the kwallet package for your distribution')
762 return b''
763
764 network_wallet = _get_kwallet_network_wallet(logger)
765
766 try:
767 proc = Popen([
768 'kwallet-query',
86e5f3ed 769 '--read-password', f'{browser_keyring_name} Safe Storage',
770 '--folder', f'{browser_keyring_name} Keys',
f59f5ef8
MB
771 network_wallet
772 ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
773
774 stdout, stderr = proc.communicate_or_kill()
775 if proc.returncode != 0:
19a03940 776 logger.error(f'kwallet-query failed with return code {proc.returncode}. Please consult '
777 'the kwallet-query man page for details')
f59f5ef8
MB
778 return b''
779 else:
780 if stdout.lower().startswith(b'failed to read'):
781 logger.debug('failed to read password from kwallet. Using empty string instead')
782 # this sometimes occurs in KDE because chrome does not check hasEntry and instead
783 # just tries to read the value (which kwallet returns "") whereas kwallet-query
784 # checks hasEntry. To verify this:
785 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
786 # while starting chrome.
787 # this may be a bug as the intended behaviour is to generate a random password and store
788 # it, but that doesn't matter here.
789 return b''
790 else:
791 logger.debug('password found')
792 if stdout[-1:] == b'\n':
793 stdout = stdout[:-1]
794 return stdout
a44ca5a4 795 except Exception as e:
796 logger.warning(f'exception running kwallet-query: {error_to_str(e)}')
f59f5ef8
MB
797 return b''
798
799
800def _get_gnome_keyring_password(browser_keyring_name, logger):
9b8ee23b 801 if not secretstorage:
802 logger.error(f'secretstorage not available {_SECRETSTORAGE_UNAVAILABLE_REASON}')
f59f5ef8
MB
803 return b''
804 # the Gnome keyring does not seem to organise keys in the same way as KWallet,
805 # using `dbus-monitor` during startup, it can be observed that chromium lists all keys
806 # and presumably searches for its key in the list. It appears that we must do the same.
807 # https://github.com/jaraco/keyring/issues/556
808 with contextlib.closing(secretstorage.dbus_init()) as con:
809 col = secretstorage.get_default_collection(con)
810 for item in col.get_all_items():
86e5f3ed 811 if item.get_label() == f'{browser_keyring_name} Safe Storage':
f59f5ef8
MB
812 return item.get_secret()
813 else:
814 logger.error('failed to read from keyring')
815 return b''
816
817
818def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
819 # note: chrome/chromium can be run with the following flags to determine which keyring backend
820 # it has chosen to use
821 # chromium --enable-logging=stderr --v=1 2>&1 | grep key_storage_
822 # Chromium supports a flag: --password-store=<basic|gnome|kwallet> so the automatic detection
823 # will not be sufficient in all cases.
824
2c539d49 825 keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger)
f59f5ef8
MB
826 logger.debug(f'Chosen keyring: {keyring.name}')
827
828 if keyring == _LinuxKeyring.KWALLET:
829 return _get_kwallet_password(browser_keyring_name, logger)
830 elif keyring == _LinuxKeyring.GNOMEKEYRING:
831 return _get_gnome_keyring_password(browser_keyring_name, logger)
832 elif keyring == _LinuxKeyring.BASICTEXT:
833 # when basic text is chosen, all cookies are stored as v10 (so no keyring password is required)
834 return None
835 assert False, f'Unknown keyring {keyring}'
836
837
838def _get_mac_keyring_password(browser_keyring_name, logger):
839 logger.debug('using find-generic-password to obtain password from OSX keychain')
840 try:
d3c93ec2 841 proc = Popen(
842 ['security', 'find-generic-password',
843 '-w', # write password to stdout
844 '-a', browser_keyring_name, # match 'account'
86e5f3ed 845 '-s', f'{browser_keyring_name} Safe Storage'], # match 'service'
d3c93ec2 846 stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
f59f5ef8
MB
847
848 stdout, stderr = proc.communicate_or_kill()
849 if stdout[-1:] == b'\n':
850 stdout = stdout[:-1]
851 return stdout
a44ca5a4 852 except Exception as e:
853 logger.warning(f'exception running find-generic-password: {error_to_str(e)}')
f59f5ef8 854 return None
982ee69a
MB
855
856
857def _get_windows_v10_key(browser_root, logger):
97ec5bc5 858 path = _find_most_recently_used_file(browser_root, 'Local State', logger)
982ee69a
MB
859 if path is None:
860 logger.error('could not find local state file')
861 return None
97ec5bc5 862 logger.debug(f'Found local state file at "{path}"')
86e5f3ed 863 with open(path, encoding='utf8') as f:
982ee69a
MB
864 data = json.load(f)
865 try:
866 base64_key = data['os_crypt']['encrypted_key']
867 except KeyError:
868 logger.error('no encrypted key in Local State')
869 return None
870 encrypted_key = compat_b64decode(base64_key)
871 prefix = b'DPAPI'
872 if not encrypted_key.startswith(prefix):
873 logger.error('invalid key')
874 return None
875 return _decrypt_windows_dpapi(encrypted_key[len(prefix):], logger)
876
877
878def pbkdf2_sha1(password, salt, iterations, key_length):
879 return pbkdf2_hmac('sha1', password, salt, iterations, key_length)
880
881
882def _decrypt_aes_cbc(ciphertext, key, logger, initialization_vector=b' ' * 16):
1d3586d0 883 plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
982ee69a 884 try:
0f06bcd7 885 return plaintext.decode()
982ee69a 886 except UnicodeDecodeError:
f440b14f 887 logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
888 return None
889
890
891def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
982ee69a 892 try:
09906f55 893 plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce)
982ee69a 894 except ValueError:
f440b14f 895 logger.warning('failed to decrypt cookie (AES-GCM) because the MAC check failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
896 return None
897
898 try:
0f06bcd7 899 return plaintext.decode()
982ee69a 900 except UnicodeDecodeError:
f440b14f 901 logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
902 return None
903
904
905def _decrypt_windows_dpapi(ciphertext, logger):
906 """
907 References:
908 - https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata
909 """
910 from ctypes.wintypes import DWORD
911
912 class DATA_BLOB(ctypes.Structure):
913 _fields_ = [('cbData', DWORD),
914 ('pbData', ctypes.POINTER(ctypes.c_char))]
915
916 buffer = ctypes.create_string_buffer(ciphertext)
917 blob_in = DATA_BLOB(ctypes.sizeof(buffer), buffer)
918 blob_out = DATA_BLOB()
919 ret = ctypes.windll.crypt32.CryptUnprotectData(
920 ctypes.byref(blob_in), # pDataIn
921 None, # ppszDataDescr: human readable description of pDataIn
922 None, # pOptionalEntropy: salt?
923 None, # pvReserved: must be NULL
924 None, # pPromptStruct: information about prompts to display
925 0, # dwFlags
926 ctypes.byref(blob_out) # pDataOut
927 )
928 if not ret:
f9be9cb9 929 logger.warning('failed to decrypt with DPAPI', only_once=True)
982ee69a
MB
930 return None
931
932 result = ctypes.string_at(blob_out.pbData, blob_out.cbData)
933 ctypes.windll.kernel32.LocalFree(blob_out.pbData)
934 return result
935
936
937def _config_home():
938 return os.environ.get('XDG_CONFIG_HOME', os.path.expanduser('~/.config'))
939
940
941def _open_database_copy(database_path, tmpdir):
942 # cannot open sqlite databases if they are already in use (e.g. by the browser)
943 database_copy_path = os.path.join(tmpdir, 'temporary.sqlite')
944 shutil.copy(database_path, database_copy_path)
945 conn = sqlite3.connect(database_copy_path)
946 return conn.cursor()
947
948
949def _get_column_names(cursor, table_name):
86e5f3ed 950 table_info = cursor.execute(f'PRAGMA table_info({table_name})').fetchall()
0f06bcd7 951 return [row[1].decode() for row in table_info]
982ee69a
MB
952
953
97ec5bc5 954def _find_most_recently_used_file(root, filename, logger):
982ee69a 955 # if there are multiple browser profiles, take the most recently used one
97ec5bc5 956 i, paths = 0, []
957 with _create_progress_bar(logger) as progress_bar:
958 for curr_root, dirs, files in os.walk(root):
959 for file in files:
960 i += 1
961 progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched')
962 if file == filename:
963 paths.append(os.path.join(curr_root, file))
982ee69a
MB
964 return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime)
965
966
967def _merge_cookie_jars(jars):
968 output_jar = YoutubeDLCookieJar()
969 for jar in jars:
970 for cookie in jar:
971 output_jar.set_cookie(cookie)
972 if jar.filename is not None:
973 output_jar.filename = jar.filename
974 return output_jar
975
976
977def _is_path(value):
978 return os.path.sep in value
979
980
f59f5ef8 981def _parse_browser_specification(browser_name, profile=None, keyring=None):
982ee69a
MB
982 if browser_name not in SUPPORTED_BROWSERS:
983 raise ValueError(f'unsupported browser: "{browser_name}"')
f59f5ef8
MB
984 if keyring not in (None, *SUPPORTED_KEYRINGS):
985 raise ValueError(f'unsupported keyring: "{keyring}"')
982ee69a
MB
986 if profile is not None and _is_path(profile):
987 profile = os.path.expanduser(profile)
f59f5ef8 988 return browser_name, profile, keyring