]> jfr.im git - yt-dlp.git/blame - yt_dlp/cookies.py
Add option `--download-sections` to download video partially
[yt-dlp.git] / yt_dlp / cookies.py
CommitLineData
f59f5ef8 1import contextlib
982ee69a
MB
2import ctypes
3import json
4import os
5import shutil
982ee69a
MB
6import struct
7import subprocess
8import sys
9import tempfile
2e4585da 10import time
982ee69a 11from datetime import datetime, timedelta, timezone
f59f5ef8 12from enum import Enum, auto
982ee69a
MB
13from hashlib import pbkdf2_hmac
14
1d3586d0 15from .aes import (
16 aes_cbc_decrypt_bytes,
17 aes_gcm_decrypt_and_verify_bytes,
18 unpad_pkcs7,
19)
f8271158 20from .compat import compat_b64decode, compat_cookiejar_Cookie
9b8ee23b 21from .dependencies import (
22 _SECRETSTORAGE_UNAVAILABLE_REASON,
23 secretstorage,
24 sqlite3,
25)
97ec5bc5 26from .minicurses import MultilinePrinter, QuietMultilinePrinter
f8271158 27from .utils import Popen, YoutubeDLCookieJar, error_to_str, expand_path
982ee69a 28
982ee69a
MB
29CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
30SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
31
32
33class YDLLogger:
34 def __init__(self, ydl=None):
35 self._ydl = ydl
36
37 def debug(self, message):
38 if self._ydl:
39 self._ydl.write_debug(message)
40
41 def info(self, message):
42 if self._ydl:
43 self._ydl.to_screen(f'[Cookies] {message}')
44
45 def warning(self, message, only_once=False):
46 if self._ydl:
47 self._ydl.report_warning(message, only_once)
48
49 def error(self, message):
50 if self._ydl:
51 self._ydl.report_error(message)
52
2e4585da 53 class ProgressBar(MultilinePrinter):
54 _DELAY, _timer = 0.1, 0
55
56 def print(self, message):
57 if time.time() - self._timer > self._DELAY:
58 self.print_at_line(f'[Cookies] {message}', 0)
59 self._timer = time.time()
60
97ec5bc5 61 def progress_bar(self):
62 """Return a context manager with a print method. (Optional)"""
63 # Do not print to files/pipes, loggers, or when --no-progress is used
64 if not self._ydl or self._ydl.params.get('noprogress') or self._ydl.params.get('logger'):
65 return
591bb9d3 66 file = self._ydl._out_files.error
97ec5bc5 67 try:
68 if not file.isatty():
69 return
70 except BaseException:
71 return
2e4585da 72 return self.ProgressBar(file, preserve_output=False)
97ec5bc5 73
74
75def _create_progress_bar(logger):
76 if hasattr(logger, 'progress_bar'):
77 printer = logger.progress_bar()
78 if printer:
79 return printer
80 printer = QuietMultilinePrinter()
81 printer.print = lambda _: None
82 return printer
83
982ee69a
MB
84
85def load_cookies(cookie_file, browser_specification, ydl):
86 cookie_jars = []
87 if browser_specification is not None:
f59f5ef8
MB
88 browser_name, profile, keyring = _parse_browser_specification(*browser_specification)
89 cookie_jars.append(extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring))
982ee69a
MB
90
91 if cookie_file is not None:
d76fa1f3 92 is_filename = YoutubeDLCookieJar.is_path(cookie_file)
93 if is_filename:
94 cookie_file = expand_path(cookie_file)
95
982ee69a 96 jar = YoutubeDLCookieJar(cookie_file)
d76fa1f3 97 if not is_filename or os.access(cookie_file, os.R_OK):
982ee69a
MB
98 jar.load(ignore_discard=True, ignore_expires=True)
99 cookie_jars.append(jar)
100
101 return _merge_cookie_jars(cookie_jars)
102
103
f59f5ef8 104def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None):
982ee69a
MB
105 if browser_name == 'firefox':
106 return _extract_firefox_cookies(profile, logger)
107 elif browser_name == 'safari':
108 return _extract_safari_cookies(profile, logger)
109 elif browser_name in CHROMIUM_BASED_BROWSERS:
f59f5ef8 110 return _extract_chrome_cookies(browser_name, profile, keyring, logger)
982ee69a 111 else:
86e5f3ed 112 raise ValueError(f'unknown browser: {browser_name}')
982ee69a
MB
113
114
115def _extract_firefox_cookies(profile, logger):
116 logger.info('Extracting cookies from firefox')
9b8ee23b 117 if not sqlite3:
767b02a9
MB
118 logger.warning('Cannot extract cookies from firefox without sqlite3 support. '
119 'Please use a python interpreter compiled with sqlite3 support')
120 return YoutubeDLCookieJar()
982ee69a
MB
121
122 if profile is None:
123 search_root = _firefox_browser_dir()
124 elif _is_path(profile):
125 search_root = profile
126 else:
127 search_root = os.path.join(_firefox_browser_dir(), profile)
128
97ec5bc5 129 cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite', logger)
982ee69a 130 if cookie_database_path is None:
86e5f3ed 131 raise FileNotFoundError(f'could not find firefox cookies database in {search_root}')
132 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
982ee69a 133
0930b11f 134 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
982ee69a
MB
135 cursor = None
136 try:
137 cursor = _open_database_copy(cookie_database_path, tmpdir)
138 cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
139 jar = YoutubeDLCookieJar()
97ec5bc5 140 with _create_progress_bar(logger) as progress_bar:
141 table = cursor.fetchall()
142 total_cookie_count = len(table)
143 for i, (host, name, value, path, expiry, is_secure) in enumerate(table):
144 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
145 cookie = compat_cookiejar_Cookie(
146 version=0, name=name, value=value, port=None, port_specified=False,
147 domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'),
148 path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False,
149 comment=None, comment_url=None, rest={})
150 jar.set_cookie(cookie)
86e5f3ed 151 logger.info(f'Extracted {len(jar)} cookies from firefox')
982ee69a
MB
152 return jar
153 finally:
154 if cursor is not None:
155 cursor.connection.close()
156
157
158def _firefox_browser_dir():
159 if sys.platform in ('linux', 'linux2'):
160 return os.path.expanduser('~/.mozilla/firefox')
161 elif sys.platform == 'win32':
19a03940 162 return os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles')
982ee69a
MB
163 elif sys.platform == 'darwin':
164 return os.path.expanduser('~/Library/Application Support/Firefox')
165 else:
86e5f3ed 166 raise ValueError(f'unsupported platform: {sys.platform}')
982ee69a
MB
167
168
169def _get_chromium_based_browser_settings(browser_name):
170 # https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md
171 if sys.platform in ('linux', 'linux2'):
172 config = _config_home()
173 browser_dir = {
174 'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'),
175 'chrome': os.path.join(config, 'google-chrome'),
176 'chromium': os.path.join(config, 'chromium'),
177 'edge': os.path.join(config, 'microsoft-edge'),
178 'opera': os.path.join(config, 'opera'),
179 'vivaldi': os.path.join(config, 'vivaldi'),
180 }[browser_name]
181
182 elif sys.platform == 'win32':
183 appdata_local = os.path.expandvars('%LOCALAPPDATA%')
184 appdata_roaming = os.path.expandvars('%APPDATA%')
185 browser_dir = {
19a03940 186 'brave': os.path.join(appdata_local, R'BraveSoftware\Brave-Browser\User Data'),
187 'chrome': os.path.join(appdata_local, R'Google\Chrome\User Data'),
188 'chromium': os.path.join(appdata_local, R'Chromium\User Data'),
189 'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
190 'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
191 'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
982ee69a
MB
192 }[browser_name]
193
194 elif sys.platform == 'darwin':
195 appdata = os.path.expanduser('~/Library/Application Support')
196 browser_dir = {
197 'brave': os.path.join(appdata, 'BraveSoftware/Brave-Browser'),
198 'chrome': os.path.join(appdata, 'Google/Chrome'),
199 'chromium': os.path.join(appdata, 'Chromium'),
200 'edge': os.path.join(appdata, 'Microsoft Edge'),
201 'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
202 'vivaldi': os.path.join(appdata, 'Vivaldi'),
203 }[browser_name]
204
205 else:
86e5f3ed 206 raise ValueError(f'unsupported platform: {sys.platform}')
982ee69a
MB
207
208 # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
209 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
210 keyring_name = {
211 'brave': 'Brave',
212 'chrome': 'Chrome',
213 'chromium': 'Chromium',
29b208f6 214 'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
982ee69a
MB
215 'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
216 'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
217 }[browser_name]
218
219 browsers_without_profiles = {'opera'}
220
221 return {
222 'browser_dir': browser_dir,
223 'keyring_name': keyring_name,
224 'supports_profiles': browser_name not in browsers_without_profiles
225 }
226
227
f59f5ef8 228def _extract_chrome_cookies(browser_name, profile, keyring, logger):
86e5f3ed 229 logger.info(f'Extracting cookies from {browser_name}')
767b02a9 230
9b8ee23b 231 if not sqlite3:
19a03940 232 logger.warning(f'Cannot extract cookies from {browser_name} without sqlite3 support. '
233 'Please use a python interpreter compiled with sqlite3 support')
767b02a9
MB
234 return YoutubeDLCookieJar()
235
982ee69a
MB
236 config = _get_chromium_based_browser_settings(browser_name)
237
238 if profile is None:
239 search_root = config['browser_dir']
240 elif _is_path(profile):
241 search_root = profile
242 config['browser_dir'] = os.path.dirname(profile) if config['supports_profiles'] else profile
243 else:
244 if config['supports_profiles']:
245 search_root = os.path.join(config['browser_dir'], profile)
246 else:
86e5f3ed 247 logger.error(f'{browser_name} does not support profiles')
982ee69a
MB
248 search_root = config['browser_dir']
249
97ec5bc5 250 cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies', logger)
982ee69a 251 if cookie_database_path is None:
86e5f3ed 252 raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"')
253 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
982ee69a 254
f59f5ef8 255 decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring)
982ee69a 256
0930b11f 257 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
982ee69a
MB
258 cursor = None
259 try:
260 cursor = _open_database_copy(cookie_database_path, tmpdir)
261 cursor.connection.text_factory = bytes
262 column_names = _get_column_names(cursor, 'cookies')
263 secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
19a03940 264 cursor.execute(f'SELECT host_key, name, value, encrypted_value, path, expires_utc, {secure_column} FROM cookies')
982ee69a
MB
265 jar = YoutubeDLCookieJar()
266 failed_cookies = 0
f59f5ef8 267 unencrypted_cookies = 0
97ec5bc5 268 with _create_progress_bar(logger) as progress_bar:
269 table = cursor.fetchall()
270 total_cookie_count = len(table)
271 for i, line in enumerate(table):
272 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
273 is_encrypted, cookie = _process_chrome_cookie(decryptor, *line)
274 if not cookie:
982ee69a
MB
275 failed_cookies += 1
276 continue
97ec5bc5 277 elif not is_encrypted:
278 unencrypted_cookies += 1
279 jar.set_cookie(cookie)
982ee69a 280 if failed_cookies > 0:
86e5f3ed 281 failed_message = f' ({failed_cookies} could not be decrypted)'
982ee69a
MB
282 else:
283 failed_message = ''
86e5f3ed 284 logger.info(f'Extracted {len(jar)} cookies from {browser_name}{failed_message}')
24146491 285 counts = decryptor._cookie_counts.copy()
f59f5ef8 286 counts['unencrypted'] = unencrypted_cookies
86e5f3ed 287 logger.debug(f'cookie version breakdown: {counts}')
982ee69a
MB
288 return jar
289 finally:
290 if cursor is not None:
291 cursor.connection.close()
292
293
97ec5bc5 294def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, path, expires_utc, is_secure):
0f06bcd7 295 host_key = host_key.decode()
296 name = name.decode()
297 value = value.decode()
298 path = path.decode()
97ec5bc5 299 is_encrypted = not value and encrypted_value
300
301 if is_encrypted:
302 value = decryptor.decrypt(encrypted_value)
303 if value is None:
304 return is_encrypted, None
305
306 return is_encrypted, compat_cookiejar_Cookie(
307 version=0, name=name, value=value, port=None, port_specified=False,
308 domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
309 path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False,
310 comment=None, comment_url=None, rest={})
311
312
982ee69a
MB
313class ChromeCookieDecryptor:
314 """
315 Overview:
316
317 Linux:
318 - cookies are either v10 or v11
319 - v10: AES-CBC encrypted with a fixed key
320 - v11: AES-CBC encrypted with an OS protected key (keyring)
321 - v11 keys can be stored in various places depending on the activate desktop environment [2]
322
323 Mac:
324 - cookies are either v10 or not v10
325 - v10: AES-CBC encrypted with an OS protected key (keyring) and more key derivation iterations than linux
326 - not v10: 'old data' stored as plaintext
327
328 Windows:
329 - cookies are either v10 or not v10
330 - v10: AES-GCM encrypted with a key which is encrypted with DPAPI
331 - not v10: encrypted with DPAPI
332
333 Sources:
334 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/
335 - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_linux.cc
336 - KeyStorageLinux::CreateService
337 """
338
0fa7d2c8 339 _cookie_counts = {}
982ee69a 340
0fa7d2c8 341 def decrypt(self, encrypted_value):
19a03940 342 raise NotImplementedError('Must be implemented by sub classes')
f59f5ef8 343
982ee69a 344
f59f5ef8 345def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None):
982ee69a 346 if sys.platform in ('linux', 'linux2'):
f59f5ef8 347 return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring)
982ee69a
MB
348 elif sys.platform == 'darwin':
349 return MacChromeCookieDecryptor(browser_keyring_name, logger)
350 elif sys.platform == 'win32':
351 return WindowsChromeCookieDecryptor(browser_root, logger)
352 else:
19a03940 353 raise NotImplementedError(f'Chrome cookie decryption is not supported on this platform: {sys.platform}')
982ee69a
MB
354
355
356class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
f59f5ef8 357 def __init__(self, browser_keyring_name, logger, *, keyring=None):
982ee69a
MB
358 self._logger = logger
359 self._v10_key = self.derive_key(b'peanuts')
f59f5ef8
MB
360 password = _get_linux_keyring_password(browser_keyring_name, keyring, logger)
361 self._v11_key = None if password is None else self.derive_key(password)
362 self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
982ee69a
MB
363
364 @staticmethod
365 def derive_key(password):
366 # values from
367 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_linux.cc
368 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16)
369
370 def decrypt(self, encrypted_value):
371 version = encrypted_value[:3]
372 ciphertext = encrypted_value[3:]
373
374 if version == b'v10':
f59f5ef8 375 self._cookie_counts['v10'] += 1
982ee69a
MB
376 return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
377
378 elif version == b'v11':
f59f5ef8 379 self._cookie_counts['v11'] += 1
982ee69a 380 if self._v11_key is None:
f59f5ef8 381 self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
982ee69a
MB
382 return None
383 return _decrypt_aes_cbc(ciphertext, self._v11_key, self._logger)
384
385 else:
f59f5ef8 386 self._cookie_counts['other'] += 1
982ee69a
MB
387 return None
388
389
390class MacChromeCookieDecryptor(ChromeCookieDecryptor):
391 def __init__(self, browser_keyring_name, logger):
392 self._logger = logger
f440b14f 393 password = _get_mac_keyring_password(browser_keyring_name, logger)
982ee69a 394 self._v10_key = None if password is None else self.derive_key(password)
f59f5ef8 395 self._cookie_counts = {'v10': 0, 'other': 0}
982ee69a
MB
396
397 @staticmethod
398 def derive_key(password):
399 # values from
400 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
401 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16)
402
403 def decrypt(self, encrypted_value):
404 version = encrypted_value[:3]
405 ciphertext = encrypted_value[3:]
406
407 if version == b'v10':
f59f5ef8 408 self._cookie_counts['v10'] += 1
982ee69a
MB
409 if self._v10_key is None:
410 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
411 return None
412
413 return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
414
415 else:
f59f5ef8 416 self._cookie_counts['other'] += 1
982ee69a
MB
417 # other prefixes are considered 'old data' which were stored as plaintext
418 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
419 return encrypted_value
420
421
422class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
423 def __init__(self, browser_root, logger):
424 self._logger = logger
425 self._v10_key = _get_windows_v10_key(browser_root, logger)
f59f5ef8
MB
426 self._cookie_counts = {'v10': 0, 'other': 0}
427
982ee69a
MB
428 def decrypt(self, encrypted_value):
429 version = encrypted_value[:3]
430 ciphertext = encrypted_value[3:]
431
432 if version == b'v10':
f59f5ef8 433 self._cookie_counts['v10'] += 1
982ee69a
MB
434 if self._v10_key is None:
435 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
436 return None
982ee69a
MB
437
438 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
439 # kNonceLength
440 nonce_length = 96 // 8
441 # boringssl
442 # EVP_AEAD_AES_GCM_TAG_LEN
443 authentication_tag_length = 16
444
445 raw_ciphertext = ciphertext
446 nonce = raw_ciphertext[:nonce_length]
447 ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length]
448 authentication_tag = raw_ciphertext[-authentication_tag_length:]
449
450 return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger)
451
452 else:
f59f5ef8 453 self._cookie_counts['other'] += 1
982ee69a
MB
454 # any other prefix means the data is DPAPI encrypted
455 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
0f06bcd7 456 return _decrypt_windows_dpapi(encrypted_value, self._logger).decode()
982ee69a
MB
457
458
459def _extract_safari_cookies(profile, logger):
460 if profile is not None:
461 logger.error('safari does not support profiles')
462 if sys.platform != 'darwin':
86e5f3ed 463 raise ValueError(f'unsupported platform: {sys.platform}')
982ee69a
MB
464
465 cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')
466
467 if not os.path.isfile(cookies_path):
1f7db853
MP
468 logger.debug('Trying secondary cookie location')
469 cookies_path = os.path.expanduser('~/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies')
470 if not os.path.isfile(cookies_path):
471 raise FileNotFoundError('could not find safari cookies database')
982ee69a
MB
472
473 with open(cookies_path, 'rb') as f:
474 cookies_data = f.read()
475
476 jar = parse_safari_cookies(cookies_data, logger=logger)
86e5f3ed 477 logger.info(f'Extracted {len(jar)} cookies from safari')
982ee69a
MB
478 return jar
479
480
481class ParserError(Exception):
482 pass
483
484
485class DataParser:
486 def __init__(self, data, logger):
487 self._data = data
488 self.cursor = 0
489 self._logger = logger
490
491 def read_bytes(self, num_bytes):
492 if num_bytes < 0:
86e5f3ed 493 raise ParserError(f'invalid read of {num_bytes} bytes')
982ee69a
MB
494 end = self.cursor + num_bytes
495 if end > len(self._data):
496 raise ParserError('reached end of input')
497 data = self._data[self.cursor:end]
498 self.cursor = end
499 return data
500
501 def expect_bytes(self, expected_value, message):
502 value = self.read_bytes(len(expected_value))
503 if value != expected_value:
86e5f3ed 504 raise ParserError(f'unexpected value: {value} != {expected_value} ({message})')
982ee69a
MB
505
506 def read_uint(self, big_endian=False):
507 data_format = '>I' if big_endian else '<I'
508 return struct.unpack(data_format, self.read_bytes(4))[0]
509
510 def read_double(self, big_endian=False):
511 data_format = '>d' if big_endian else '<d'
512 return struct.unpack(data_format, self.read_bytes(8))[0]
513
514 def read_cstring(self):
515 buffer = []
516 while True:
517 c = self.read_bytes(1)
518 if c == b'\x00':
0f06bcd7 519 return b''.join(buffer).decode()
982ee69a
MB
520 else:
521 buffer.append(c)
522
523 def skip(self, num_bytes, description='unknown'):
524 if num_bytes > 0:
19a03940 525 self._logger.debug(f'skipping {num_bytes} bytes ({description}): {self.read_bytes(num_bytes)!r}')
982ee69a 526 elif num_bytes < 0:
86e5f3ed 527 raise ParserError(f'invalid skip of {num_bytes} bytes')
982ee69a
MB
528
529 def skip_to(self, offset, description='unknown'):
530 self.skip(offset - self.cursor, description)
531
532 def skip_to_end(self, description='unknown'):
533 self.skip_to(len(self._data), description)
534
535
536def _mac_absolute_time_to_posix(timestamp):
537 return int((datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc) + timedelta(seconds=timestamp)).timestamp())
538
539
540def _parse_safari_cookies_header(data, logger):
541 p = DataParser(data, logger)
542 p.expect_bytes(b'cook', 'database signature')
543 number_of_pages = p.read_uint(big_endian=True)
544 page_sizes = [p.read_uint(big_endian=True) for _ in range(number_of_pages)]
545 return page_sizes, p.cursor
546
547
548def _parse_safari_cookies_page(data, jar, logger):
549 p = DataParser(data, logger)
550 p.expect_bytes(b'\x00\x00\x01\x00', 'page signature')
551 number_of_cookies = p.read_uint()
552 record_offsets = [p.read_uint() for _ in range(number_of_cookies)]
553 if number_of_cookies == 0:
86e5f3ed 554 logger.debug(f'a cookies page of size {len(data)} has no cookies')
982ee69a
MB
555 return
556
557 p.skip_to(record_offsets[0], 'unknown page header field')
558
97ec5bc5 559 with _create_progress_bar(logger) as progress_bar:
560 for i, record_offset in enumerate(record_offsets):
561 progress_bar.print(f'Loading cookie {i: 6d}/{number_of_cookies: 6d}')
562 p.skip_to(record_offset, 'space between records')
563 record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger)
564 p.read_bytes(record_length)
982ee69a
MB
565 p.skip_to_end('space in between pages')
566
567
568def _parse_safari_cookies_record(data, jar, logger):
569 p = DataParser(data, logger)
570 record_size = p.read_uint()
571 p.skip(4, 'unknown record field 1')
572 flags = p.read_uint()
573 is_secure = bool(flags & 0x0001)
574 p.skip(4, 'unknown record field 2')
575 domain_offset = p.read_uint()
576 name_offset = p.read_uint()
577 path_offset = p.read_uint()
578 value_offset = p.read_uint()
579 p.skip(8, 'unknown record field 3')
580 expiration_date = _mac_absolute_time_to_posix(p.read_double())
581 _creation_date = _mac_absolute_time_to_posix(p.read_double()) # noqa: F841
582
583 try:
584 p.skip_to(domain_offset)
585 domain = p.read_cstring()
586
587 p.skip_to(name_offset)
588 name = p.read_cstring()
589
590 p.skip_to(path_offset)
591 path = p.read_cstring()
592
593 p.skip_to(value_offset)
594 value = p.read_cstring()
595 except UnicodeDecodeError:
f440b14f 596 logger.warning('failed to parse Safari cookie because UTF-8 decoding failed', only_once=True)
982ee69a
MB
597 return record_size
598
599 p.skip_to(record_size, 'space at the end of the record')
600
601 cookie = compat_cookiejar_Cookie(
602 version=0, name=name, value=value, port=None, port_specified=False,
603 domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'),
604 path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False,
605 comment=None, comment_url=None, rest={})
606 jar.set_cookie(cookie)
607 return record_size
608
609
610def parse_safari_cookies(data, jar=None, logger=YDLLogger()):
611 """
612 References:
613 - https://github.com/libyal/dtformats/blob/main/documentation/Safari%20Cookies.asciidoc
614 - this data appears to be out of date but the important parts of the database structure is the same
615 - there are a few bytes here and there which are skipped during parsing
616 """
617 if jar is None:
618 jar = YoutubeDLCookieJar()
619 page_sizes, body_start = _parse_safari_cookies_header(data, logger)
620 p = DataParser(data[body_start:], logger)
621 for page_size in page_sizes:
622 _parse_safari_cookies_page(p.read_bytes(page_size), jar, logger)
623 p.skip_to_end('footer')
624 return jar
625
626
f59f5ef8
MB
627class _LinuxDesktopEnvironment(Enum):
628 """
629 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.h
630 DesktopEnvironment
631 """
632 OTHER = auto()
633 CINNAMON = auto()
634 GNOME = auto()
635 KDE = auto()
636 PANTHEON = auto()
637 UNITY = auto()
638 XFCE = auto()
982ee69a
MB
639
640
f59f5ef8
MB
641class _LinuxKeyring(Enum):
642 """
643 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.h
644 SelectedLinuxBackend
645 """
646 KWALLET = auto()
647 GNOMEKEYRING = auto()
648 BASICTEXT = auto()
649
650
651SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys()
652
653
654def _get_linux_desktop_environment(env):
655 """
656 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc
657 GetDesktopEnvironment
658 """
659 xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
660 desktop_session = env.get('DESKTOP_SESSION', None)
661 if xdg_current_desktop is not None:
662 xdg_current_desktop = xdg_current_desktop.split(':')[0].strip()
663
664 if xdg_current_desktop == 'Unity':
665 if desktop_session is not None and 'gnome-fallback' in desktop_session:
666 return _LinuxDesktopEnvironment.GNOME
667 else:
668 return _LinuxDesktopEnvironment.UNITY
669 elif xdg_current_desktop == 'GNOME':
670 return _LinuxDesktopEnvironment.GNOME
671 elif xdg_current_desktop == 'X-Cinnamon':
672 return _LinuxDesktopEnvironment.CINNAMON
673 elif xdg_current_desktop == 'KDE':
674 return _LinuxDesktopEnvironment.KDE
675 elif xdg_current_desktop == 'Pantheon':
676 return _LinuxDesktopEnvironment.PANTHEON
677 elif xdg_current_desktop == 'XFCE':
678 return _LinuxDesktopEnvironment.XFCE
679 elif desktop_session is not None:
680 if desktop_session in ('mate', 'gnome'):
681 return _LinuxDesktopEnvironment.GNOME
682 elif 'kde' in desktop_session:
683 return _LinuxDesktopEnvironment.KDE
684 elif 'xfce' in desktop_session:
685 return _LinuxDesktopEnvironment.XFCE
686 else:
687 if 'GNOME_DESKTOP_SESSION_ID' in env:
688 return _LinuxDesktopEnvironment.GNOME
689 elif 'KDE_FULL_SESSION' in env:
690 return _LinuxDesktopEnvironment.KDE
fa8fd951 691 return _LinuxDesktopEnvironment.OTHER
f59f5ef8
MB
692
693
694def _choose_linux_keyring(logger):
695 """
696 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.cc
697 SelectBackend
698 """
699 desktop_environment = _get_linux_desktop_environment(os.environ)
86e5f3ed 700 logger.debug(f'detected desktop environment: {desktop_environment.name}')
f59f5ef8
MB
701 if desktop_environment == _LinuxDesktopEnvironment.KDE:
702 linux_keyring = _LinuxKeyring.KWALLET
703 elif desktop_environment == _LinuxDesktopEnvironment.OTHER:
704 linux_keyring = _LinuxKeyring.BASICTEXT
982ee69a 705 else:
f59f5ef8
MB
706 linux_keyring = _LinuxKeyring.GNOMEKEYRING
707 return linux_keyring
708
709
710def _get_kwallet_network_wallet(logger):
711 """ The name of the wallet used to store network passwords.
712
713 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/kwallet_dbus.cc
714 KWalletDBus::NetworkWallet
715 which does a dbus call to the following function:
716 https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html
717 Wallet::NetworkWallet
718 """
719 default_wallet = 'kdewallet'
720 try:
721 proc = Popen([
722 'dbus-send', '--session', '--print-reply=literal',
723 '--dest=org.kde.kwalletd5',
724 '/modules/kwalletd5',
725 'org.kde.KWallet.networkWallet'
726 ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
727
728 stdout, stderr = proc.communicate_or_kill()
729 if proc.returncode != 0:
730 logger.warning('failed to read NetworkWallet')
731 return default_wallet
732 else:
0f06bcd7 733 network_wallet = stdout.decode().strip()
86e5f3ed 734 logger.debug(f'NetworkWallet = "{network_wallet}"')
f59f5ef8 735 return network_wallet
a44ca5a4 736 except Exception as e:
86e5f3ed 737 logger.warning(f'exception while obtaining NetworkWallet: {e}')
f59f5ef8
MB
738 return default_wallet
739
740
741def _get_kwallet_password(browser_keyring_name, logger):
742 logger.debug('using kwallet-query to obtain password from kwallet')
743
744 if shutil.which('kwallet-query') is None:
745 logger.error('kwallet-query command not found. KWallet and kwallet-query '
746 'must be installed to read from KWallet. kwallet-query should be'
747 'included in the kwallet package for your distribution')
748 return b''
749
750 network_wallet = _get_kwallet_network_wallet(logger)
751
752 try:
753 proc = Popen([
754 'kwallet-query',
86e5f3ed 755 '--read-password', f'{browser_keyring_name} Safe Storage',
756 '--folder', f'{browser_keyring_name} Keys',
f59f5ef8
MB
757 network_wallet
758 ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
759
760 stdout, stderr = proc.communicate_or_kill()
761 if proc.returncode != 0:
19a03940 762 logger.error(f'kwallet-query failed with return code {proc.returncode}. Please consult '
763 'the kwallet-query man page for details')
f59f5ef8
MB
764 return b''
765 else:
766 if stdout.lower().startswith(b'failed to read'):
767 logger.debug('failed to read password from kwallet. Using empty string instead')
768 # this sometimes occurs in KDE because chrome does not check hasEntry and instead
769 # just tries to read the value (which kwallet returns "") whereas kwallet-query
770 # checks hasEntry. To verify this:
771 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
772 # while starting chrome.
773 # this may be a bug as the intended behaviour is to generate a random password and store
774 # it, but that doesn't matter here.
775 return b''
776 else:
777 logger.debug('password found')
778 if stdout[-1:] == b'\n':
779 stdout = stdout[:-1]
780 return stdout
a44ca5a4 781 except Exception as e:
782 logger.warning(f'exception running kwallet-query: {error_to_str(e)}')
f59f5ef8
MB
783 return b''
784
785
786def _get_gnome_keyring_password(browser_keyring_name, logger):
9b8ee23b 787 if not secretstorage:
788 logger.error(f'secretstorage not available {_SECRETSTORAGE_UNAVAILABLE_REASON}')
f59f5ef8
MB
789 return b''
790 # the Gnome keyring does not seem to organise keys in the same way as KWallet,
791 # using `dbus-monitor` during startup, it can be observed that chromium lists all keys
792 # and presumably searches for its key in the list. It appears that we must do the same.
793 # https://github.com/jaraco/keyring/issues/556
794 with contextlib.closing(secretstorage.dbus_init()) as con:
795 col = secretstorage.get_default_collection(con)
796 for item in col.get_all_items():
86e5f3ed 797 if item.get_label() == f'{browser_keyring_name} Safe Storage':
f59f5ef8
MB
798 return item.get_secret()
799 else:
800 logger.error('failed to read from keyring')
801 return b''
802
803
804def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
805 # note: chrome/chromium can be run with the following flags to determine which keyring backend
806 # it has chosen to use
807 # chromium --enable-logging=stderr --v=1 2>&1 | grep key_storage_
808 # Chromium supports a flag: --password-store=<basic|gnome|kwallet> so the automatic detection
809 # will not be sufficient in all cases.
810
2c539d49 811 keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger)
f59f5ef8
MB
812 logger.debug(f'Chosen keyring: {keyring.name}')
813
814 if keyring == _LinuxKeyring.KWALLET:
815 return _get_kwallet_password(browser_keyring_name, logger)
816 elif keyring == _LinuxKeyring.GNOMEKEYRING:
817 return _get_gnome_keyring_password(browser_keyring_name, logger)
818 elif keyring == _LinuxKeyring.BASICTEXT:
819 # when basic text is chosen, all cookies are stored as v10 (so no keyring password is required)
820 return None
821 assert False, f'Unknown keyring {keyring}'
822
823
824def _get_mac_keyring_password(browser_keyring_name, logger):
825 logger.debug('using find-generic-password to obtain password from OSX keychain')
826 try:
d3c93ec2 827 proc = Popen(
828 ['security', 'find-generic-password',
829 '-w', # write password to stdout
830 '-a', browser_keyring_name, # match 'account'
86e5f3ed 831 '-s', f'{browser_keyring_name} Safe Storage'], # match 'service'
d3c93ec2 832 stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
f59f5ef8
MB
833
834 stdout, stderr = proc.communicate_or_kill()
835 if stdout[-1:] == b'\n':
836 stdout = stdout[:-1]
837 return stdout
a44ca5a4 838 except Exception as e:
839 logger.warning(f'exception running find-generic-password: {error_to_str(e)}')
f59f5ef8 840 return None
982ee69a
MB
841
842
843def _get_windows_v10_key(browser_root, logger):
97ec5bc5 844 path = _find_most_recently_used_file(browser_root, 'Local State', logger)
982ee69a
MB
845 if path is None:
846 logger.error('could not find local state file')
847 return None
97ec5bc5 848 logger.debug(f'Found local state file at "{path}"')
86e5f3ed 849 with open(path, encoding='utf8') as f:
982ee69a
MB
850 data = json.load(f)
851 try:
852 base64_key = data['os_crypt']['encrypted_key']
853 except KeyError:
854 logger.error('no encrypted key in Local State')
855 return None
856 encrypted_key = compat_b64decode(base64_key)
857 prefix = b'DPAPI'
858 if not encrypted_key.startswith(prefix):
859 logger.error('invalid key')
860 return None
861 return _decrypt_windows_dpapi(encrypted_key[len(prefix):], logger)
862
863
864def pbkdf2_sha1(password, salt, iterations, key_length):
865 return pbkdf2_hmac('sha1', password, salt, iterations, key_length)
866
867
868def _decrypt_aes_cbc(ciphertext, key, logger, initialization_vector=b' ' * 16):
1d3586d0 869 plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
982ee69a 870 try:
0f06bcd7 871 return plaintext.decode()
982ee69a 872 except UnicodeDecodeError:
f440b14f 873 logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
874 return None
875
876
877def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
982ee69a 878 try:
09906f55 879 plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce)
982ee69a 880 except ValueError:
f440b14f 881 logger.warning('failed to decrypt cookie (AES-GCM) because the MAC check failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
882 return None
883
884 try:
0f06bcd7 885 return plaintext.decode()
982ee69a 886 except UnicodeDecodeError:
f440b14f 887 logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
888 return None
889
890
891def _decrypt_windows_dpapi(ciphertext, logger):
892 """
893 References:
894 - https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata
895 """
896 from ctypes.wintypes import DWORD
897
898 class DATA_BLOB(ctypes.Structure):
899 _fields_ = [('cbData', DWORD),
900 ('pbData', ctypes.POINTER(ctypes.c_char))]
901
902 buffer = ctypes.create_string_buffer(ciphertext)
903 blob_in = DATA_BLOB(ctypes.sizeof(buffer), buffer)
904 blob_out = DATA_BLOB()
905 ret = ctypes.windll.crypt32.CryptUnprotectData(
906 ctypes.byref(blob_in), # pDataIn
907 None, # ppszDataDescr: human readable description of pDataIn
908 None, # pOptionalEntropy: salt?
909 None, # pvReserved: must be NULL
910 None, # pPromptStruct: information about prompts to display
911 0, # dwFlags
912 ctypes.byref(blob_out) # pDataOut
913 )
914 if not ret:
f9be9cb9 915 logger.warning('failed to decrypt with DPAPI', only_once=True)
982ee69a
MB
916 return None
917
918 result = ctypes.string_at(blob_out.pbData, blob_out.cbData)
919 ctypes.windll.kernel32.LocalFree(blob_out.pbData)
920 return result
921
922
923def _config_home():
924 return os.environ.get('XDG_CONFIG_HOME', os.path.expanduser('~/.config'))
925
926
927def _open_database_copy(database_path, tmpdir):
928 # cannot open sqlite databases if they are already in use (e.g. by the browser)
929 database_copy_path = os.path.join(tmpdir, 'temporary.sqlite')
930 shutil.copy(database_path, database_copy_path)
931 conn = sqlite3.connect(database_copy_path)
932 return conn.cursor()
933
934
935def _get_column_names(cursor, table_name):
86e5f3ed 936 table_info = cursor.execute(f'PRAGMA table_info({table_name})').fetchall()
0f06bcd7 937 return [row[1].decode() for row in table_info]
982ee69a
MB
938
939
97ec5bc5 940def _find_most_recently_used_file(root, filename, logger):
982ee69a 941 # if there are multiple browser profiles, take the most recently used one
97ec5bc5 942 i, paths = 0, []
943 with _create_progress_bar(logger) as progress_bar:
944 for curr_root, dirs, files in os.walk(root):
945 for file in files:
946 i += 1
947 progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched')
948 if file == filename:
949 paths.append(os.path.join(curr_root, file))
982ee69a
MB
950 return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime)
951
952
953def _merge_cookie_jars(jars):
954 output_jar = YoutubeDLCookieJar()
955 for jar in jars:
956 for cookie in jar:
957 output_jar.set_cookie(cookie)
958 if jar.filename is not None:
959 output_jar.filename = jar.filename
960 return output_jar
961
962
963def _is_path(value):
964 return os.path.sep in value
965
966
f59f5ef8 967def _parse_browser_specification(browser_name, profile=None, keyring=None):
982ee69a
MB
968 if browser_name not in SUPPORTED_BROWSERS:
969 raise ValueError(f'unsupported browser: "{browser_name}"')
f59f5ef8
MB
970 if keyring not in (None, *SUPPORTED_KEYRINGS):
971 raise ValueError(f'unsupported keyring: "{keyring}"')
982ee69a
MB
972 if profile is not None and _is_path(profile):
973 profile = os.path.expanduser(profile)
f59f5ef8 974 return browser_name, profile, keyring