]> jfr.im git - yt-dlp.git/blame - yt_dlp/cookies.py
[cleanup] Sort imports
[yt-dlp.git] / yt_dlp / cookies.py
CommitLineData
f59f5ef8 1import contextlib
982ee69a
MB
2import ctypes
3import json
4import os
5import shutil
982ee69a
MB
6import struct
7import subprocess
8import sys
9import tempfile
10from datetime import datetime, timedelta, timezone
f59f5ef8 11from enum import Enum, auto
982ee69a
MB
12from hashlib import pbkdf2_hmac
13
1d3586d0 14from .aes import (
15 aes_cbc_decrypt_bytes,
16 aes_gcm_decrypt_and_verify_bytes,
17 unpad_pkcs7,
18)
f8271158 19from .compat import compat_b64decode, compat_cookiejar_Cookie
97ec5bc5 20from .minicurses import MultilinePrinter, QuietMultilinePrinter
f8271158 21from .utils import Popen, YoutubeDLCookieJar, error_to_str, expand_path
982ee69a 22
767b02a9
MB
23try:
24 import sqlite3
25 SQLITE_AVAILABLE = True
26except ImportError:
27 # although sqlite3 is part of the standard library, it is possible to compile python without
28 # sqlite support. See: https://github.com/yt-dlp/yt-dlp/issues/544
29 SQLITE_AVAILABLE = False
30
31
982ee69a 32try:
f59f5ef8
MB
33 import secretstorage
34 SECRETSTORAGE_AVAILABLE = True
982ee69a 35except ImportError:
f59f5ef8
MB
36 SECRETSTORAGE_AVAILABLE = False
37 SECRETSTORAGE_UNAVAILABLE_REASON = (
38 'as the `secretstorage` module is not installed. '
39 'Please install by running `python3 -m pip install secretstorage`.')
063c409d 40except Exception as _err:
f59f5ef8
MB
41 SECRETSTORAGE_AVAILABLE = False
42 SECRETSTORAGE_UNAVAILABLE_REASON = f'as the `secretstorage` module could not be initialized. {_err}'
982ee69a
MB
43
44
45CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
46SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
47
48
49class YDLLogger:
50 def __init__(self, ydl=None):
51 self._ydl = ydl
52
53 def debug(self, message):
54 if self._ydl:
55 self._ydl.write_debug(message)
56
57 def info(self, message):
58 if self._ydl:
59 self._ydl.to_screen(f'[Cookies] {message}')
60
61 def warning(self, message, only_once=False):
62 if self._ydl:
63 self._ydl.report_warning(message, only_once)
64
65 def error(self, message):
66 if self._ydl:
67 self._ydl.report_error(message)
68
97ec5bc5 69 def progress_bar(self):
70 """Return a context manager with a print method. (Optional)"""
71 # Do not print to files/pipes, loggers, or when --no-progress is used
72 if not self._ydl or self._ydl.params.get('noprogress') or self._ydl.params.get('logger'):
73 return
74 file = self._ydl._out_files['error']
75 try:
76 if not file.isatty():
77 return
78 except BaseException:
79 return
80
81 printer = MultilinePrinter(file, preserve_output=False)
82 printer.print = lambda message: printer.print_at_line(f'[Cookies] {message}', 0)
83 return printer
84
85
86def _create_progress_bar(logger):
87 if hasattr(logger, 'progress_bar'):
88 printer = logger.progress_bar()
89 if printer:
90 return printer
91 printer = QuietMultilinePrinter()
92 printer.print = lambda _: None
93 return printer
94
982ee69a
MB
95
96def load_cookies(cookie_file, browser_specification, ydl):
97 cookie_jars = []
98 if browser_specification is not None:
f59f5ef8
MB
99 browser_name, profile, keyring = _parse_browser_specification(*browser_specification)
100 cookie_jars.append(extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring))
982ee69a
MB
101
102 if cookie_file is not None:
103 cookie_file = expand_path(cookie_file)
104 jar = YoutubeDLCookieJar(cookie_file)
105 if os.access(cookie_file, os.R_OK):
106 jar.load(ignore_discard=True, ignore_expires=True)
107 cookie_jars.append(jar)
108
109 return _merge_cookie_jars(cookie_jars)
110
111
f59f5ef8 112def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None):
982ee69a
MB
113 if browser_name == 'firefox':
114 return _extract_firefox_cookies(profile, logger)
115 elif browser_name == 'safari':
116 return _extract_safari_cookies(profile, logger)
117 elif browser_name in CHROMIUM_BASED_BROWSERS:
f59f5ef8 118 return _extract_chrome_cookies(browser_name, profile, keyring, logger)
982ee69a 119 else:
86e5f3ed 120 raise ValueError(f'unknown browser: {browser_name}')
982ee69a
MB
121
122
123def _extract_firefox_cookies(profile, logger):
124 logger.info('Extracting cookies from firefox')
767b02a9
MB
125 if not SQLITE_AVAILABLE:
126 logger.warning('Cannot extract cookies from firefox without sqlite3 support. '
127 'Please use a python interpreter compiled with sqlite3 support')
128 return YoutubeDLCookieJar()
982ee69a
MB
129
130 if profile is None:
131 search_root = _firefox_browser_dir()
132 elif _is_path(profile):
133 search_root = profile
134 else:
135 search_root = os.path.join(_firefox_browser_dir(), profile)
136
97ec5bc5 137 cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite', logger)
982ee69a 138 if cookie_database_path is None:
86e5f3ed 139 raise FileNotFoundError(f'could not find firefox cookies database in {search_root}')
140 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
982ee69a 141
0930b11f 142 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
982ee69a
MB
143 cursor = None
144 try:
145 cursor = _open_database_copy(cookie_database_path, tmpdir)
146 cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
147 jar = YoutubeDLCookieJar()
97ec5bc5 148 with _create_progress_bar(logger) as progress_bar:
149 table = cursor.fetchall()
150 total_cookie_count = len(table)
151 for i, (host, name, value, path, expiry, is_secure) in enumerate(table):
152 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
153 cookie = compat_cookiejar_Cookie(
154 version=0, name=name, value=value, port=None, port_specified=False,
155 domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'),
156 path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False,
157 comment=None, comment_url=None, rest={})
158 jar.set_cookie(cookie)
86e5f3ed 159 logger.info(f'Extracted {len(jar)} cookies from firefox')
982ee69a
MB
160 return jar
161 finally:
162 if cursor is not None:
163 cursor.connection.close()
164
165
166def _firefox_browser_dir():
167 if sys.platform in ('linux', 'linux2'):
168 return os.path.expanduser('~/.mozilla/firefox')
169 elif sys.platform == 'win32':
170 return os.path.expandvars(r'%APPDATA%\Mozilla\Firefox\Profiles')
171 elif sys.platform == 'darwin':
172 return os.path.expanduser('~/Library/Application Support/Firefox')
173 else:
86e5f3ed 174 raise ValueError(f'unsupported platform: {sys.platform}')
982ee69a
MB
175
176
177def _get_chromium_based_browser_settings(browser_name):
178 # https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md
179 if sys.platform in ('linux', 'linux2'):
180 config = _config_home()
181 browser_dir = {
182 'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'),
183 'chrome': os.path.join(config, 'google-chrome'),
184 'chromium': os.path.join(config, 'chromium'),
185 'edge': os.path.join(config, 'microsoft-edge'),
186 'opera': os.path.join(config, 'opera'),
187 'vivaldi': os.path.join(config, 'vivaldi'),
188 }[browser_name]
189
190 elif sys.platform == 'win32':
191 appdata_local = os.path.expandvars('%LOCALAPPDATA%')
192 appdata_roaming = os.path.expandvars('%APPDATA%')
193 browser_dir = {
194 'brave': os.path.join(appdata_local, r'BraveSoftware\Brave-Browser\User Data'),
195 'chrome': os.path.join(appdata_local, r'Google\Chrome\User Data'),
196 'chromium': os.path.join(appdata_local, r'Chromium\User Data'),
197 'edge': os.path.join(appdata_local, r'Microsoft\Edge\User Data'),
198 'opera': os.path.join(appdata_roaming, r'Opera Software\Opera Stable'),
199 'vivaldi': os.path.join(appdata_local, r'Vivaldi\User Data'),
200 }[browser_name]
201
202 elif sys.platform == 'darwin':
203 appdata = os.path.expanduser('~/Library/Application Support')
204 browser_dir = {
205 'brave': os.path.join(appdata, 'BraveSoftware/Brave-Browser'),
206 'chrome': os.path.join(appdata, 'Google/Chrome'),
207 'chromium': os.path.join(appdata, 'Chromium'),
208 'edge': os.path.join(appdata, 'Microsoft Edge'),
209 'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
210 'vivaldi': os.path.join(appdata, 'Vivaldi'),
211 }[browser_name]
212
213 else:
86e5f3ed 214 raise ValueError(f'unsupported platform: {sys.platform}')
982ee69a
MB
215
216 # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
217 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
218 keyring_name = {
219 'brave': 'Brave',
220 'chrome': 'Chrome',
221 'chromium': 'Chromium',
29b208f6 222 'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
982ee69a
MB
223 'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
224 'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
225 }[browser_name]
226
227 browsers_without_profiles = {'opera'}
228
229 return {
230 'browser_dir': browser_dir,
231 'keyring_name': keyring_name,
232 'supports_profiles': browser_name not in browsers_without_profiles
233 }
234
235
f59f5ef8 236def _extract_chrome_cookies(browser_name, profile, keyring, logger):
86e5f3ed 237 logger.info(f'Extracting cookies from {browser_name}')
767b02a9
MB
238
239 if not SQLITE_AVAILABLE:
240 logger.warning(('Cannot extract cookies from {} without sqlite3 support. '
241 'Please use a python interpreter compiled with sqlite3 support').format(browser_name))
242 return YoutubeDLCookieJar()
243
982ee69a
MB
244 config = _get_chromium_based_browser_settings(browser_name)
245
246 if profile is None:
247 search_root = config['browser_dir']
248 elif _is_path(profile):
249 search_root = profile
250 config['browser_dir'] = os.path.dirname(profile) if config['supports_profiles'] else profile
251 else:
252 if config['supports_profiles']:
253 search_root = os.path.join(config['browser_dir'], profile)
254 else:
86e5f3ed 255 logger.error(f'{browser_name} does not support profiles')
982ee69a
MB
256 search_root = config['browser_dir']
257
97ec5bc5 258 cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies', logger)
982ee69a 259 if cookie_database_path is None:
86e5f3ed 260 raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"')
261 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
982ee69a 262
f59f5ef8 263 decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring)
982ee69a 264
0930b11f 265 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
982ee69a
MB
266 cursor = None
267 try:
268 cursor = _open_database_copy(cookie_database_path, tmpdir)
269 cursor.connection.text_factory = bytes
270 column_names = _get_column_names(cursor, 'cookies')
271 secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
272 cursor.execute('SELECT host_key, name, value, encrypted_value, path, '
273 'expires_utc, {} FROM cookies'.format(secure_column))
274 jar = YoutubeDLCookieJar()
275 failed_cookies = 0
f59f5ef8 276 unencrypted_cookies = 0
97ec5bc5 277 with _create_progress_bar(logger) as progress_bar:
278 table = cursor.fetchall()
279 total_cookie_count = len(table)
280 for i, line in enumerate(table):
281 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
282 is_encrypted, cookie = _process_chrome_cookie(decryptor, *line)
283 if not cookie:
982ee69a
MB
284 failed_cookies += 1
285 continue
97ec5bc5 286 elif not is_encrypted:
287 unencrypted_cookies += 1
288 jar.set_cookie(cookie)
982ee69a 289 if failed_cookies > 0:
86e5f3ed 290 failed_message = f' ({failed_cookies} could not be decrypted)'
982ee69a
MB
291 else:
292 failed_message = ''
86e5f3ed 293 logger.info(f'Extracted {len(jar)} cookies from {browser_name}{failed_message}')
f59f5ef8
MB
294 counts = decryptor.cookie_counts.copy()
295 counts['unencrypted'] = unencrypted_cookies
86e5f3ed 296 logger.debug(f'cookie version breakdown: {counts}')
982ee69a
MB
297 return jar
298 finally:
299 if cursor is not None:
300 cursor.connection.close()
301
302
97ec5bc5 303def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, path, expires_utc, is_secure):
304 host_key = host_key.decode('utf-8')
305 name = name.decode('utf-8')
306 value = value.decode('utf-8')
307 path = path.decode('utf-8')
308 is_encrypted = not value and encrypted_value
309
310 if is_encrypted:
311 value = decryptor.decrypt(encrypted_value)
312 if value is None:
313 return is_encrypted, None
314
315 return is_encrypted, compat_cookiejar_Cookie(
316 version=0, name=name, value=value, port=None, port_specified=False,
317 domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
318 path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False,
319 comment=None, comment_url=None, rest={})
320
321
982ee69a
MB
322class ChromeCookieDecryptor:
323 """
324 Overview:
325
326 Linux:
327 - cookies are either v10 or v11
328 - v10: AES-CBC encrypted with a fixed key
329 - v11: AES-CBC encrypted with an OS protected key (keyring)
330 - v11 keys can be stored in various places depending on the activate desktop environment [2]
331
332 Mac:
333 - cookies are either v10 or not v10
334 - v10: AES-CBC encrypted with an OS protected key (keyring) and more key derivation iterations than linux
335 - not v10: 'old data' stored as plaintext
336
337 Windows:
338 - cookies are either v10 or not v10
339 - v10: AES-GCM encrypted with a key which is encrypted with DPAPI
340 - not v10: encrypted with DPAPI
341
342 Sources:
343 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/
344 - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_linux.cc
345 - KeyStorageLinux::CreateService
346 """
347
348 def decrypt(self, encrypted_value):
349 raise NotImplementedError
350
f59f5ef8
MB
351 @property
352 def cookie_counts(self):
353 raise NotImplementedError
354
982ee69a 355
f59f5ef8 356def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None):
982ee69a 357 if sys.platform in ('linux', 'linux2'):
f59f5ef8 358 return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring)
982ee69a
MB
359 elif sys.platform == 'darwin':
360 return MacChromeCookieDecryptor(browser_keyring_name, logger)
361 elif sys.platform == 'win32':
362 return WindowsChromeCookieDecryptor(browser_root, logger)
363 else:
364 raise NotImplementedError('Chrome cookie decryption is not supported '
365 'on this platform: {}'.format(sys.platform))
366
367
368class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
f59f5ef8 369 def __init__(self, browser_keyring_name, logger, *, keyring=None):
982ee69a
MB
370 self._logger = logger
371 self._v10_key = self.derive_key(b'peanuts')
f59f5ef8
MB
372 password = _get_linux_keyring_password(browser_keyring_name, keyring, logger)
373 self._v11_key = None if password is None else self.derive_key(password)
374 self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
982ee69a
MB
375
376 @staticmethod
377 def derive_key(password):
378 # values from
379 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_linux.cc
380 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16)
381
f59f5ef8
MB
382 @property
383 def cookie_counts(self):
384 return self._cookie_counts
385
982ee69a
MB
386 def decrypt(self, encrypted_value):
387 version = encrypted_value[:3]
388 ciphertext = encrypted_value[3:]
389
390 if version == b'v10':
f59f5ef8 391 self._cookie_counts['v10'] += 1
982ee69a
MB
392 return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
393
394 elif version == b'v11':
f59f5ef8 395 self._cookie_counts['v11'] += 1
982ee69a 396 if self._v11_key is None:
f59f5ef8 397 self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
982ee69a
MB
398 return None
399 return _decrypt_aes_cbc(ciphertext, self._v11_key, self._logger)
400
401 else:
f59f5ef8 402 self._cookie_counts['other'] += 1
982ee69a
MB
403 return None
404
405
406class MacChromeCookieDecryptor(ChromeCookieDecryptor):
407 def __init__(self, browser_keyring_name, logger):
408 self._logger = logger
f440b14f 409 password = _get_mac_keyring_password(browser_keyring_name, logger)
982ee69a 410 self._v10_key = None if password is None else self.derive_key(password)
f59f5ef8 411 self._cookie_counts = {'v10': 0, 'other': 0}
982ee69a
MB
412
413 @staticmethod
414 def derive_key(password):
415 # values from
416 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
417 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16)
418
f59f5ef8
MB
419 @property
420 def cookie_counts(self):
421 return self._cookie_counts
422
982ee69a
MB
423 def decrypt(self, encrypted_value):
424 version = encrypted_value[:3]
425 ciphertext = encrypted_value[3:]
426
427 if version == b'v10':
f59f5ef8 428 self._cookie_counts['v10'] += 1
982ee69a
MB
429 if self._v10_key is None:
430 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
431 return None
432
433 return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
434
435 else:
f59f5ef8 436 self._cookie_counts['other'] += 1
982ee69a
MB
437 # other prefixes are considered 'old data' which were stored as plaintext
438 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
439 return encrypted_value
440
441
442class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
443 def __init__(self, browser_root, logger):
444 self._logger = logger
445 self._v10_key = _get_windows_v10_key(browser_root, logger)
f59f5ef8
MB
446 self._cookie_counts = {'v10': 0, 'other': 0}
447
448 @property
449 def cookie_counts(self):
450 return self._cookie_counts
982ee69a
MB
451
452 def decrypt(self, encrypted_value):
453 version = encrypted_value[:3]
454 ciphertext = encrypted_value[3:]
455
456 if version == b'v10':
f59f5ef8 457 self._cookie_counts['v10'] += 1
982ee69a
MB
458 if self._v10_key is None:
459 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
460 return None
982ee69a
MB
461
462 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
463 # kNonceLength
464 nonce_length = 96 // 8
465 # boringssl
466 # EVP_AEAD_AES_GCM_TAG_LEN
467 authentication_tag_length = 16
468
469 raw_ciphertext = ciphertext
470 nonce = raw_ciphertext[:nonce_length]
471 ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length]
472 authentication_tag = raw_ciphertext[-authentication_tag_length:]
473
474 return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger)
475
476 else:
f59f5ef8 477 self._cookie_counts['other'] += 1
982ee69a
MB
478 # any other prefix means the data is DPAPI encrypted
479 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
480 return _decrypt_windows_dpapi(encrypted_value, self._logger).decode('utf-8')
481
482
483def _extract_safari_cookies(profile, logger):
484 if profile is not None:
485 logger.error('safari does not support profiles')
486 if sys.platform != 'darwin':
86e5f3ed 487 raise ValueError(f'unsupported platform: {sys.platform}')
982ee69a
MB
488
489 cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')
490
491 if not os.path.isfile(cookies_path):
1f7db853
MP
492 logger.debug('Trying secondary cookie location')
493 cookies_path = os.path.expanduser('~/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies')
494 if not os.path.isfile(cookies_path):
495 raise FileNotFoundError('could not find safari cookies database')
982ee69a
MB
496
497 with open(cookies_path, 'rb') as f:
498 cookies_data = f.read()
499
500 jar = parse_safari_cookies(cookies_data, logger=logger)
86e5f3ed 501 logger.info(f'Extracted {len(jar)} cookies from safari')
982ee69a
MB
502 return jar
503
504
505class ParserError(Exception):
506 pass
507
508
509class DataParser:
510 def __init__(self, data, logger):
511 self._data = data
512 self.cursor = 0
513 self._logger = logger
514
515 def read_bytes(self, num_bytes):
516 if num_bytes < 0:
86e5f3ed 517 raise ParserError(f'invalid read of {num_bytes} bytes')
982ee69a
MB
518 end = self.cursor + num_bytes
519 if end > len(self._data):
520 raise ParserError('reached end of input')
521 data = self._data[self.cursor:end]
522 self.cursor = end
523 return data
524
525 def expect_bytes(self, expected_value, message):
526 value = self.read_bytes(len(expected_value))
527 if value != expected_value:
86e5f3ed 528 raise ParserError(f'unexpected value: {value} != {expected_value} ({message})')
982ee69a
MB
529
530 def read_uint(self, big_endian=False):
531 data_format = '>I' if big_endian else '<I'
532 return struct.unpack(data_format, self.read_bytes(4))[0]
533
534 def read_double(self, big_endian=False):
535 data_format = '>d' if big_endian else '<d'
536 return struct.unpack(data_format, self.read_bytes(8))[0]
537
538 def read_cstring(self):
539 buffer = []
540 while True:
541 c = self.read_bytes(1)
542 if c == b'\x00':
543 return b''.join(buffer).decode('utf-8')
544 else:
545 buffer.append(c)
546
547 def skip(self, num_bytes, description='unknown'):
548 if num_bytes > 0:
549 self._logger.debug('skipping {} bytes ({}): {}'.format(
550 num_bytes, description, self.read_bytes(num_bytes)))
551 elif num_bytes < 0:
86e5f3ed 552 raise ParserError(f'invalid skip of {num_bytes} bytes')
982ee69a
MB
553
554 def skip_to(self, offset, description='unknown'):
555 self.skip(offset - self.cursor, description)
556
557 def skip_to_end(self, description='unknown'):
558 self.skip_to(len(self._data), description)
559
560
561def _mac_absolute_time_to_posix(timestamp):
562 return int((datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc) + timedelta(seconds=timestamp)).timestamp())
563
564
565def _parse_safari_cookies_header(data, logger):
566 p = DataParser(data, logger)
567 p.expect_bytes(b'cook', 'database signature')
568 number_of_pages = p.read_uint(big_endian=True)
569 page_sizes = [p.read_uint(big_endian=True) for _ in range(number_of_pages)]
570 return page_sizes, p.cursor
571
572
573def _parse_safari_cookies_page(data, jar, logger):
574 p = DataParser(data, logger)
575 p.expect_bytes(b'\x00\x00\x01\x00', 'page signature')
576 number_of_cookies = p.read_uint()
577 record_offsets = [p.read_uint() for _ in range(number_of_cookies)]
578 if number_of_cookies == 0:
86e5f3ed 579 logger.debug(f'a cookies page of size {len(data)} has no cookies')
982ee69a
MB
580 return
581
582 p.skip_to(record_offsets[0], 'unknown page header field')
583
97ec5bc5 584 with _create_progress_bar(logger) as progress_bar:
585 for i, record_offset in enumerate(record_offsets):
586 progress_bar.print(f'Loading cookie {i: 6d}/{number_of_cookies: 6d}')
587 p.skip_to(record_offset, 'space between records')
588 record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger)
589 p.read_bytes(record_length)
982ee69a
MB
590 p.skip_to_end('space in between pages')
591
592
593def _parse_safari_cookies_record(data, jar, logger):
594 p = DataParser(data, logger)
595 record_size = p.read_uint()
596 p.skip(4, 'unknown record field 1')
597 flags = p.read_uint()
598 is_secure = bool(flags & 0x0001)
599 p.skip(4, 'unknown record field 2')
600 domain_offset = p.read_uint()
601 name_offset = p.read_uint()
602 path_offset = p.read_uint()
603 value_offset = p.read_uint()
604 p.skip(8, 'unknown record field 3')
605 expiration_date = _mac_absolute_time_to_posix(p.read_double())
606 _creation_date = _mac_absolute_time_to_posix(p.read_double()) # noqa: F841
607
608 try:
609 p.skip_to(domain_offset)
610 domain = p.read_cstring()
611
612 p.skip_to(name_offset)
613 name = p.read_cstring()
614
615 p.skip_to(path_offset)
616 path = p.read_cstring()
617
618 p.skip_to(value_offset)
619 value = p.read_cstring()
620 except UnicodeDecodeError:
f440b14f 621 logger.warning('failed to parse Safari cookie because UTF-8 decoding failed', only_once=True)
982ee69a
MB
622 return record_size
623
624 p.skip_to(record_size, 'space at the end of the record')
625
626 cookie = compat_cookiejar_Cookie(
627 version=0, name=name, value=value, port=None, port_specified=False,
628 domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'),
629 path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False,
630 comment=None, comment_url=None, rest={})
631 jar.set_cookie(cookie)
632 return record_size
633
634
635def parse_safari_cookies(data, jar=None, logger=YDLLogger()):
636 """
637 References:
638 - https://github.com/libyal/dtformats/blob/main/documentation/Safari%20Cookies.asciidoc
639 - this data appears to be out of date but the important parts of the database structure is the same
640 - there are a few bytes here and there which are skipped during parsing
641 """
642 if jar is None:
643 jar = YoutubeDLCookieJar()
644 page_sizes, body_start = _parse_safari_cookies_header(data, logger)
645 p = DataParser(data[body_start:], logger)
646 for page_size in page_sizes:
647 _parse_safari_cookies_page(p.read_bytes(page_size), jar, logger)
648 p.skip_to_end('footer')
649 return jar
650
651
f59f5ef8
MB
652class _LinuxDesktopEnvironment(Enum):
653 """
654 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.h
655 DesktopEnvironment
656 """
657 OTHER = auto()
658 CINNAMON = auto()
659 GNOME = auto()
660 KDE = auto()
661 PANTHEON = auto()
662 UNITY = auto()
663 XFCE = auto()
982ee69a
MB
664
665
f59f5ef8
MB
666class _LinuxKeyring(Enum):
667 """
668 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.h
669 SelectedLinuxBackend
670 """
671 KWALLET = auto()
672 GNOMEKEYRING = auto()
673 BASICTEXT = auto()
674
675
676SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys()
677
678
679def _get_linux_desktop_environment(env):
680 """
681 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc
682 GetDesktopEnvironment
683 """
684 xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
685 desktop_session = env.get('DESKTOP_SESSION', None)
686 if xdg_current_desktop is not None:
687 xdg_current_desktop = xdg_current_desktop.split(':')[0].strip()
688
689 if xdg_current_desktop == 'Unity':
690 if desktop_session is not None and 'gnome-fallback' in desktop_session:
691 return _LinuxDesktopEnvironment.GNOME
692 else:
693 return _LinuxDesktopEnvironment.UNITY
694 elif xdg_current_desktop == 'GNOME':
695 return _LinuxDesktopEnvironment.GNOME
696 elif xdg_current_desktop == 'X-Cinnamon':
697 return _LinuxDesktopEnvironment.CINNAMON
698 elif xdg_current_desktop == 'KDE':
699 return _LinuxDesktopEnvironment.KDE
700 elif xdg_current_desktop == 'Pantheon':
701 return _LinuxDesktopEnvironment.PANTHEON
702 elif xdg_current_desktop == 'XFCE':
703 return _LinuxDesktopEnvironment.XFCE
704 elif desktop_session is not None:
705 if desktop_session in ('mate', 'gnome'):
706 return _LinuxDesktopEnvironment.GNOME
707 elif 'kde' in desktop_session:
708 return _LinuxDesktopEnvironment.KDE
709 elif 'xfce' in desktop_session:
710 return _LinuxDesktopEnvironment.XFCE
711 else:
712 if 'GNOME_DESKTOP_SESSION_ID' in env:
713 return _LinuxDesktopEnvironment.GNOME
714 elif 'KDE_FULL_SESSION' in env:
715 return _LinuxDesktopEnvironment.KDE
fa8fd951 716 return _LinuxDesktopEnvironment.OTHER
f59f5ef8
MB
717
718
719def _choose_linux_keyring(logger):
720 """
721 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.cc
722 SelectBackend
723 """
724 desktop_environment = _get_linux_desktop_environment(os.environ)
86e5f3ed 725 logger.debug(f'detected desktop environment: {desktop_environment.name}')
f59f5ef8
MB
726 if desktop_environment == _LinuxDesktopEnvironment.KDE:
727 linux_keyring = _LinuxKeyring.KWALLET
728 elif desktop_environment == _LinuxDesktopEnvironment.OTHER:
729 linux_keyring = _LinuxKeyring.BASICTEXT
982ee69a 730 else:
f59f5ef8
MB
731 linux_keyring = _LinuxKeyring.GNOMEKEYRING
732 return linux_keyring
733
734
735def _get_kwallet_network_wallet(logger):
736 """ The name of the wallet used to store network passwords.
737
738 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/kwallet_dbus.cc
739 KWalletDBus::NetworkWallet
740 which does a dbus call to the following function:
741 https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html
742 Wallet::NetworkWallet
743 """
744 default_wallet = 'kdewallet'
745 try:
746 proc = Popen([
747 'dbus-send', '--session', '--print-reply=literal',
748 '--dest=org.kde.kwalletd5',
749 '/modules/kwalletd5',
750 'org.kde.KWallet.networkWallet'
751 ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
752
753 stdout, stderr = proc.communicate_or_kill()
754 if proc.returncode != 0:
755 logger.warning('failed to read NetworkWallet')
756 return default_wallet
757 else:
758 network_wallet = stdout.decode('utf-8').strip()
86e5f3ed 759 logger.debug(f'NetworkWallet = "{network_wallet}"')
f59f5ef8 760 return network_wallet
a44ca5a4 761 except Exception as e:
86e5f3ed 762 logger.warning(f'exception while obtaining NetworkWallet: {e}')
f59f5ef8
MB
763 return default_wallet
764
765
766def _get_kwallet_password(browser_keyring_name, logger):
767 logger.debug('using kwallet-query to obtain password from kwallet')
768
769 if shutil.which('kwallet-query') is None:
770 logger.error('kwallet-query command not found. KWallet and kwallet-query '
771 'must be installed to read from KWallet. kwallet-query should be'
772 'included in the kwallet package for your distribution')
773 return b''
774
775 network_wallet = _get_kwallet_network_wallet(logger)
776
777 try:
778 proc = Popen([
779 'kwallet-query',
86e5f3ed 780 '--read-password', f'{browser_keyring_name} Safe Storage',
781 '--folder', f'{browser_keyring_name} Keys',
f59f5ef8
MB
782 network_wallet
783 ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
784
785 stdout, stderr = proc.communicate_or_kill()
786 if proc.returncode != 0:
787 logger.error('kwallet-query failed with return code {}. Please consult '
788 'the kwallet-query man page for details'.format(proc.returncode))
789 return b''
790 else:
791 if stdout.lower().startswith(b'failed to read'):
792 logger.debug('failed to read password from kwallet. Using empty string instead')
793 # this sometimes occurs in KDE because chrome does not check hasEntry and instead
794 # just tries to read the value (which kwallet returns "") whereas kwallet-query
795 # checks hasEntry. To verify this:
796 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
797 # while starting chrome.
798 # this may be a bug as the intended behaviour is to generate a random password and store
799 # it, but that doesn't matter here.
800 return b''
801 else:
802 logger.debug('password found')
803 if stdout[-1:] == b'\n':
804 stdout = stdout[:-1]
805 return stdout
a44ca5a4 806 except Exception as e:
807 logger.warning(f'exception running kwallet-query: {error_to_str(e)}')
f59f5ef8
MB
808 return b''
809
810
811def _get_gnome_keyring_password(browser_keyring_name, logger):
812 if not SECRETSTORAGE_AVAILABLE:
86e5f3ed 813 logger.error(f'secretstorage not available {SECRETSTORAGE_UNAVAILABLE_REASON}')
f59f5ef8
MB
814 return b''
815 # the Gnome keyring does not seem to organise keys in the same way as KWallet,
816 # using `dbus-monitor` during startup, it can be observed that chromium lists all keys
817 # and presumably searches for its key in the list. It appears that we must do the same.
818 # https://github.com/jaraco/keyring/issues/556
819 with contextlib.closing(secretstorage.dbus_init()) as con:
820 col = secretstorage.get_default_collection(con)
821 for item in col.get_all_items():
86e5f3ed 822 if item.get_label() == f'{browser_keyring_name} Safe Storage':
f59f5ef8
MB
823 return item.get_secret()
824 else:
825 logger.error('failed to read from keyring')
826 return b''
827
828
829def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
830 # note: chrome/chromium can be run with the following flags to determine which keyring backend
831 # it has chosen to use
832 # chromium --enable-logging=stderr --v=1 2>&1 | grep key_storage_
833 # Chromium supports a flag: --password-store=<basic|gnome|kwallet> so the automatic detection
834 # will not be sufficient in all cases.
835
2c539d49 836 keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger)
f59f5ef8
MB
837 logger.debug(f'Chosen keyring: {keyring.name}')
838
839 if keyring == _LinuxKeyring.KWALLET:
840 return _get_kwallet_password(browser_keyring_name, logger)
841 elif keyring == _LinuxKeyring.GNOMEKEYRING:
842 return _get_gnome_keyring_password(browser_keyring_name, logger)
843 elif keyring == _LinuxKeyring.BASICTEXT:
844 # when basic text is chosen, all cookies are stored as v10 (so no keyring password is required)
845 return None
846 assert False, f'Unknown keyring {keyring}'
847
848
849def _get_mac_keyring_password(browser_keyring_name, logger):
850 logger.debug('using find-generic-password to obtain password from OSX keychain')
851 try:
d3c93ec2 852 proc = Popen(
853 ['security', 'find-generic-password',
854 '-w', # write password to stdout
855 '-a', browser_keyring_name, # match 'account'
86e5f3ed 856 '-s', f'{browser_keyring_name} Safe Storage'], # match 'service'
d3c93ec2 857 stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
f59f5ef8
MB
858
859 stdout, stderr = proc.communicate_or_kill()
860 if stdout[-1:] == b'\n':
861 stdout = stdout[:-1]
862 return stdout
a44ca5a4 863 except Exception as e:
864 logger.warning(f'exception running find-generic-password: {error_to_str(e)}')
f59f5ef8 865 return None
982ee69a
MB
866
867
868def _get_windows_v10_key(browser_root, logger):
97ec5bc5 869 path = _find_most_recently_used_file(browser_root, 'Local State', logger)
982ee69a
MB
870 if path is None:
871 logger.error('could not find local state file')
872 return None
97ec5bc5 873 logger.debug(f'Found local state file at "{path}"')
86e5f3ed 874 with open(path, encoding='utf8') as f:
982ee69a
MB
875 data = json.load(f)
876 try:
877 base64_key = data['os_crypt']['encrypted_key']
878 except KeyError:
879 logger.error('no encrypted key in Local State')
880 return None
881 encrypted_key = compat_b64decode(base64_key)
882 prefix = b'DPAPI'
883 if not encrypted_key.startswith(prefix):
884 logger.error('invalid key')
885 return None
886 return _decrypt_windows_dpapi(encrypted_key[len(prefix):], logger)
887
888
889def pbkdf2_sha1(password, salt, iterations, key_length):
890 return pbkdf2_hmac('sha1', password, salt, iterations, key_length)
891
892
893def _decrypt_aes_cbc(ciphertext, key, logger, initialization_vector=b' ' * 16):
1d3586d0 894 plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
982ee69a 895 try:
1d3586d0 896 return plaintext.decode('utf-8')
982ee69a 897 except UnicodeDecodeError:
f440b14f 898 logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
899 return None
900
901
902def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
982ee69a 903 try:
09906f55 904 plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce)
982ee69a 905 except ValueError:
f440b14f 906 logger.warning('failed to decrypt cookie (AES-GCM) because the MAC check failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
907 return None
908
909 try:
910 return plaintext.decode('utf-8')
911 except UnicodeDecodeError:
f440b14f 912 logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
913 return None
914
915
916def _decrypt_windows_dpapi(ciphertext, logger):
917 """
918 References:
919 - https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata
920 """
921 from ctypes.wintypes import DWORD
922
923 class DATA_BLOB(ctypes.Structure):
924 _fields_ = [('cbData', DWORD),
925 ('pbData', ctypes.POINTER(ctypes.c_char))]
926
927 buffer = ctypes.create_string_buffer(ciphertext)
928 blob_in = DATA_BLOB(ctypes.sizeof(buffer), buffer)
929 blob_out = DATA_BLOB()
930 ret = ctypes.windll.crypt32.CryptUnprotectData(
931 ctypes.byref(blob_in), # pDataIn
932 None, # ppszDataDescr: human readable description of pDataIn
933 None, # pOptionalEntropy: salt?
934 None, # pvReserved: must be NULL
935 None, # pPromptStruct: information about prompts to display
936 0, # dwFlags
937 ctypes.byref(blob_out) # pDataOut
938 )
939 if not ret:
f9be9cb9 940 logger.warning('failed to decrypt with DPAPI', only_once=True)
982ee69a
MB
941 return None
942
943 result = ctypes.string_at(blob_out.pbData, blob_out.cbData)
944 ctypes.windll.kernel32.LocalFree(blob_out.pbData)
945 return result
946
947
948def _config_home():
949 return os.environ.get('XDG_CONFIG_HOME', os.path.expanduser('~/.config'))
950
951
952def _open_database_copy(database_path, tmpdir):
953 # cannot open sqlite databases if they are already in use (e.g. by the browser)
954 database_copy_path = os.path.join(tmpdir, 'temporary.sqlite')
955 shutil.copy(database_path, database_copy_path)
956 conn = sqlite3.connect(database_copy_path)
957 return conn.cursor()
958
959
960def _get_column_names(cursor, table_name):
86e5f3ed 961 table_info = cursor.execute(f'PRAGMA table_info({table_name})').fetchall()
982ee69a
MB
962 return [row[1].decode('utf-8') for row in table_info]
963
964
97ec5bc5 965def _find_most_recently_used_file(root, filename, logger):
982ee69a 966 # if there are multiple browser profiles, take the most recently used one
97ec5bc5 967 i, paths = 0, []
968 with _create_progress_bar(logger) as progress_bar:
969 for curr_root, dirs, files in os.walk(root):
970 for file in files:
971 i += 1
972 progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched')
973 if file == filename:
974 paths.append(os.path.join(curr_root, file))
982ee69a
MB
975 return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime)
976
977
978def _merge_cookie_jars(jars):
979 output_jar = YoutubeDLCookieJar()
980 for jar in jars:
981 for cookie in jar:
982 output_jar.set_cookie(cookie)
983 if jar.filename is not None:
984 output_jar.filename = jar.filename
985 return output_jar
986
987
988def _is_path(value):
989 return os.path.sep in value
990
991
f59f5ef8 992def _parse_browser_specification(browser_name, profile=None, keyring=None):
982ee69a
MB
993 if browser_name not in SUPPORTED_BROWSERS:
994 raise ValueError(f'unsupported browser: "{browser_name}"')
f59f5ef8
MB
995 if keyring not in (None, *SUPPORTED_KEYRINGS):
996 raise ValueError(f'unsupported keyring: "{keyring}"')
982ee69a
MB
997 if profile is not None and _is_path(profile):
998 profile = os.path.expanduser(profile)
f59f5ef8 999 return browser_name, profile, keyring