]> jfr.im git - yt-dlp.git/blame - yt_dlp/cookies.py
[extractor/FranceCulture] Fix extractor (#3874)
[yt-dlp.git] / yt_dlp / cookies.py
CommitLineData
f59f5ef8 1import contextlib
982ee69a
MB
2import ctypes
3import json
4import os
5import shutil
982ee69a
MB
6import struct
7import subprocess
8import sys
9import tempfile
2e4585da 10import time
982ee69a 11from datetime import datetime, timedelta, timezone
f59f5ef8 12from enum import Enum, auto
982ee69a
MB
13from hashlib import pbkdf2_hmac
14
1d3586d0 15from .aes import (
16 aes_cbc_decrypt_bytes,
17 aes_gcm_decrypt_and_verify_bytes,
18 unpad_pkcs7,
19)
f8271158 20from .compat import compat_b64decode, compat_cookiejar_Cookie
9b8ee23b 21from .dependencies import (
22 _SECRETSTORAGE_UNAVAILABLE_REASON,
23 secretstorage,
24 sqlite3,
25)
97ec5bc5 26from .minicurses import MultilinePrinter, QuietMultilinePrinter
f8271158 27from .utils import Popen, YoutubeDLCookieJar, error_to_str, expand_path
982ee69a 28
982ee69a
MB
29CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
30SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
31
32
33class YDLLogger:
34 def __init__(self, ydl=None):
35 self._ydl = ydl
36
37 def debug(self, message):
38 if self._ydl:
39 self._ydl.write_debug(message)
40
41 def info(self, message):
42 if self._ydl:
43 self._ydl.to_screen(f'[Cookies] {message}')
44
45 def warning(self, message, only_once=False):
46 if self._ydl:
47 self._ydl.report_warning(message, only_once)
48
49 def error(self, message):
50 if self._ydl:
51 self._ydl.report_error(message)
52
2e4585da 53 class ProgressBar(MultilinePrinter):
54 _DELAY, _timer = 0.1, 0
55
56 def print(self, message):
57 if time.time() - self._timer > self._DELAY:
58 self.print_at_line(f'[Cookies] {message}', 0)
59 self._timer = time.time()
60
97ec5bc5 61 def progress_bar(self):
62 """Return a context manager with a print method. (Optional)"""
63 # Do not print to files/pipes, loggers, or when --no-progress is used
64 if not self._ydl or self._ydl.params.get('noprogress') or self._ydl.params.get('logger'):
65 return
591bb9d3 66 file = self._ydl._out_files.error
97ec5bc5 67 try:
68 if not file.isatty():
69 return
70 except BaseException:
71 return
2e4585da 72 return self.ProgressBar(file, preserve_output=False)
97ec5bc5 73
74
75def _create_progress_bar(logger):
76 if hasattr(logger, 'progress_bar'):
77 printer = logger.progress_bar()
78 if printer:
79 return printer
80 printer = QuietMultilinePrinter()
81 printer.print = lambda _: None
82 return printer
83
982ee69a
MB
84
85def load_cookies(cookie_file, browser_specification, ydl):
86 cookie_jars = []
87 if browser_specification is not None:
f59f5ef8
MB
88 browser_name, profile, keyring = _parse_browser_specification(*browser_specification)
89 cookie_jars.append(extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring))
982ee69a
MB
90
91 if cookie_file is not None:
d76fa1f3 92 is_filename = YoutubeDLCookieJar.is_path(cookie_file)
93 if is_filename:
94 cookie_file = expand_path(cookie_file)
95
982ee69a 96 jar = YoutubeDLCookieJar(cookie_file)
d76fa1f3 97 if not is_filename or os.access(cookie_file, os.R_OK):
982ee69a
MB
98 jar.load(ignore_discard=True, ignore_expires=True)
99 cookie_jars.append(jar)
100
101 return _merge_cookie_jars(cookie_jars)
102
103
f59f5ef8 104def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None):
982ee69a
MB
105 if browser_name == 'firefox':
106 return _extract_firefox_cookies(profile, logger)
107 elif browser_name == 'safari':
108 return _extract_safari_cookies(profile, logger)
109 elif browser_name in CHROMIUM_BASED_BROWSERS:
f59f5ef8 110 return _extract_chrome_cookies(browser_name, profile, keyring, logger)
982ee69a 111 else:
86e5f3ed 112 raise ValueError(f'unknown browser: {browser_name}')
982ee69a
MB
113
114
115def _extract_firefox_cookies(profile, logger):
116 logger.info('Extracting cookies from firefox')
9b8ee23b 117 if not sqlite3:
767b02a9
MB
118 logger.warning('Cannot extract cookies from firefox without sqlite3 support. '
119 'Please use a python interpreter compiled with sqlite3 support')
120 return YoutubeDLCookieJar()
982ee69a
MB
121
122 if profile is None:
123 search_root = _firefox_browser_dir()
124 elif _is_path(profile):
125 search_root = profile
126 else:
127 search_root = os.path.join(_firefox_browser_dir(), profile)
128
97ec5bc5 129 cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite', logger)
982ee69a 130 if cookie_database_path is None:
86e5f3ed 131 raise FileNotFoundError(f'could not find firefox cookies database in {search_root}')
132 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
982ee69a 133
0930b11f 134 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
982ee69a
MB
135 cursor = None
136 try:
137 cursor = _open_database_copy(cookie_database_path, tmpdir)
138 cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
139 jar = YoutubeDLCookieJar()
97ec5bc5 140 with _create_progress_bar(logger) as progress_bar:
141 table = cursor.fetchall()
142 total_cookie_count = len(table)
143 for i, (host, name, value, path, expiry, is_secure) in enumerate(table):
144 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
145 cookie = compat_cookiejar_Cookie(
146 version=0, name=name, value=value, port=None, port_specified=False,
147 domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'),
148 path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False,
149 comment=None, comment_url=None, rest={})
150 jar.set_cookie(cookie)
86e5f3ed 151 logger.info(f'Extracted {len(jar)} cookies from firefox')
982ee69a
MB
152 return jar
153 finally:
154 if cursor is not None:
155 cursor.connection.close()
156
157
158def _firefox_browser_dir():
dec30912 159 if sys.platform in ('cygwin', 'win32'):
19a03940 160 return os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles')
982ee69a
MB
161 elif sys.platform == 'darwin':
162 return os.path.expanduser('~/Library/Application Support/Firefox')
dec30912 163 return os.path.expanduser('~/.mozilla/firefox')
982ee69a
MB
164
165
166def _get_chromium_based_browser_settings(browser_name):
167 # https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md
dec30912 168 if sys.platform in ('cygwin', 'win32'):
982ee69a
MB
169 appdata_local = os.path.expandvars('%LOCALAPPDATA%')
170 appdata_roaming = os.path.expandvars('%APPDATA%')
171 browser_dir = {
19a03940 172 'brave': os.path.join(appdata_local, R'BraveSoftware\Brave-Browser\User Data'),
173 'chrome': os.path.join(appdata_local, R'Google\Chrome\User Data'),
174 'chromium': os.path.join(appdata_local, R'Chromium\User Data'),
175 'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
176 'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
177 'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
982ee69a
MB
178 }[browser_name]
179
180 elif sys.platform == 'darwin':
181 appdata = os.path.expanduser('~/Library/Application Support')
182 browser_dir = {
183 'brave': os.path.join(appdata, 'BraveSoftware/Brave-Browser'),
184 'chrome': os.path.join(appdata, 'Google/Chrome'),
185 'chromium': os.path.join(appdata, 'Chromium'),
186 'edge': os.path.join(appdata, 'Microsoft Edge'),
187 'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
188 'vivaldi': os.path.join(appdata, 'Vivaldi'),
189 }[browser_name]
190
191 else:
dec30912
CMT
192 config = _config_home()
193 browser_dir = {
194 'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'),
195 'chrome': os.path.join(config, 'google-chrome'),
196 'chromium': os.path.join(config, 'chromium'),
197 'edge': os.path.join(config, 'microsoft-edge'),
198 'opera': os.path.join(config, 'opera'),
199 'vivaldi': os.path.join(config, 'vivaldi'),
200 }[browser_name]
982ee69a
MB
201
202 # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
203 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
204 keyring_name = {
205 'brave': 'Brave',
206 'chrome': 'Chrome',
207 'chromium': 'Chromium',
29b208f6 208 'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
982ee69a
MB
209 'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
210 'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
211 }[browser_name]
212
213 browsers_without_profiles = {'opera'}
214
215 return {
216 'browser_dir': browser_dir,
217 'keyring_name': keyring_name,
218 'supports_profiles': browser_name not in browsers_without_profiles
219 }
220
221
f59f5ef8 222def _extract_chrome_cookies(browser_name, profile, keyring, logger):
86e5f3ed 223 logger.info(f'Extracting cookies from {browser_name}')
767b02a9 224
9b8ee23b 225 if not sqlite3:
19a03940 226 logger.warning(f'Cannot extract cookies from {browser_name} without sqlite3 support. '
227 'Please use a python interpreter compiled with sqlite3 support')
767b02a9
MB
228 return YoutubeDLCookieJar()
229
982ee69a
MB
230 config = _get_chromium_based_browser_settings(browser_name)
231
232 if profile is None:
233 search_root = config['browser_dir']
234 elif _is_path(profile):
235 search_root = profile
236 config['browser_dir'] = os.path.dirname(profile) if config['supports_profiles'] else profile
237 else:
238 if config['supports_profiles']:
239 search_root = os.path.join(config['browser_dir'], profile)
240 else:
86e5f3ed 241 logger.error(f'{browser_name} does not support profiles')
982ee69a
MB
242 search_root = config['browser_dir']
243
97ec5bc5 244 cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies', logger)
982ee69a 245 if cookie_database_path is None:
86e5f3ed 246 raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"')
247 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
982ee69a 248
f59f5ef8 249 decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring)
982ee69a 250
0930b11f 251 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
982ee69a
MB
252 cursor = None
253 try:
254 cursor = _open_database_copy(cookie_database_path, tmpdir)
255 cursor.connection.text_factory = bytes
256 column_names = _get_column_names(cursor, 'cookies')
257 secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
19a03940 258 cursor.execute(f'SELECT host_key, name, value, encrypted_value, path, expires_utc, {secure_column} FROM cookies')
982ee69a
MB
259 jar = YoutubeDLCookieJar()
260 failed_cookies = 0
f59f5ef8 261 unencrypted_cookies = 0
97ec5bc5 262 with _create_progress_bar(logger) as progress_bar:
263 table = cursor.fetchall()
264 total_cookie_count = len(table)
265 for i, line in enumerate(table):
266 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
267 is_encrypted, cookie = _process_chrome_cookie(decryptor, *line)
268 if not cookie:
982ee69a
MB
269 failed_cookies += 1
270 continue
97ec5bc5 271 elif not is_encrypted:
272 unencrypted_cookies += 1
273 jar.set_cookie(cookie)
982ee69a 274 if failed_cookies > 0:
86e5f3ed 275 failed_message = f' ({failed_cookies} could not be decrypted)'
982ee69a
MB
276 else:
277 failed_message = ''
86e5f3ed 278 logger.info(f'Extracted {len(jar)} cookies from {browser_name}{failed_message}')
24146491 279 counts = decryptor._cookie_counts.copy()
f59f5ef8 280 counts['unencrypted'] = unencrypted_cookies
86e5f3ed 281 logger.debug(f'cookie version breakdown: {counts}')
982ee69a
MB
282 return jar
283 finally:
284 if cursor is not None:
285 cursor.connection.close()
286
287
97ec5bc5 288def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, path, expires_utc, is_secure):
0f06bcd7 289 host_key = host_key.decode()
290 name = name.decode()
291 value = value.decode()
292 path = path.decode()
97ec5bc5 293 is_encrypted = not value and encrypted_value
294
295 if is_encrypted:
296 value = decryptor.decrypt(encrypted_value)
297 if value is None:
298 return is_encrypted, None
299
300 return is_encrypted, compat_cookiejar_Cookie(
301 version=0, name=name, value=value, port=None, port_specified=False,
302 domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
303 path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False,
304 comment=None, comment_url=None, rest={})
305
306
982ee69a
MB
307class ChromeCookieDecryptor:
308 """
309 Overview:
310
311 Linux:
312 - cookies are either v10 or v11
313 - v10: AES-CBC encrypted with a fixed key
314 - v11: AES-CBC encrypted with an OS protected key (keyring)
315 - v11 keys can be stored in various places depending on the activate desktop environment [2]
316
317 Mac:
318 - cookies are either v10 or not v10
319 - v10: AES-CBC encrypted with an OS protected key (keyring) and more key derivation iterations than linux
320 - not v10: 'old data' stored as plaintext
321
322 Windows:
323 - cookies are either v10 or not v10
324 - v10: AES-GCM encrypted with a key which is encrypted with DPAPI
325 - not v10: encrypted with DPAPI
326
327 Sources:
328 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/
329 - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_linux.cc
330 - KeyStorageLinux::CreateService
331 """
332
0fa7d2c8 333 _cookie_counts = {}
982ee69a 334
0fa7d2c8 335 def decrypt(self, encrypted_value):
19a03940 336 raise NotImplementedError('Must be implemented by sub classes')
f59f5ef8 337
982ee69a 338
f59f5ef8 339def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None):
982ee69a 340 if sys.platform in ('linux', 'linux2'):
f59f5ef8 341 return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring)
982ee69a
MB
342 elif sys.platform == 'darwin':
343 return MacChromeCookieDecryptor(browser_keyring_name, logger)
344 elif sys.platform == 'win32':
345 return WindowsChromeCookieDecryptor(browser_root, logger)
346 else:
19a03940 347 raise NotImplementedError(f'Chrome cookie decryption is not supported on this platform: {sys.platform}')
982ee69a
MB
348
349
350class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
f59f5ef8 351 def __init__(self, browser_keyring_name, logger, *, keyring=None):
982ee69a
MB
352 self._logger = logger
353 self._v10_key = self.derive_key(b'peanuts')
f59f5ef8
MB
354 password = _get_linux_keyring_password(browser_keyring_name, keyring, logger)
355 self._v11_key = None if password is None else self.derive_key(password)
356 self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
982ee69a
MB
357
358 @staticmethod
359 def derive_key(password):
360 # values from
361 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_linux.cc
362 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16)
363
364 def decrypt(self, encrypted_value):
365 version = encrypted_value[:3]
366 ciphertext = encrypted_value[3:]
367
368 if version == b'v10':
f59f5ef8 369 self._cookie_counts['v10'] += 1
982ee69a
MB
370 return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
371
372 elif version == b'v11':
f59f5ef8 373 self._cookie_counts['v11'] += 1
982ee69a 374 if self._v11_key is None:
f59f5ef8 375 self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
982ee69a
MB
376 return None
377 return _decrypt_aes_cbc(ciphertext, self._v11_key, self._logger)
378
379 else:
f59f5ef8 380 self._cookie_counts['other'] += 1
982ee69a
MB
381 return None
382
383
384class MacChromeCookieDecryptor(ChromeCookieDecryptor):
385 def __init__(self, browser_keyring_name, logger):
386 self._logger = logger
f440b14f 387 password = _get_mac_keyring_password(browser_keyring_name, logger)
982ee69a 388 self._v10_key = None if password is None else self.derive_key(password)
f59f5ef8 389 self._cookie_counts = {'v10': 0, 'other': 0}
982ee69a
MB
390
391 @staticmethod
392 def derive_key(password):
393 # values from
394 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
395 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16)
396
397 def decrypt(self, encrypted_value):
398 version = encrypted_value[:3]
399 ciphertext = encrypted_value[3:]
400
401 if version == b'v10':
f59f5ef8 402 self._cookie_counts['v10'] += 1
982ee69a
MB
403 if self._v10_key is None:
404 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
405 return None
406
407 return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
408
409 else:
f59f5ef8 410 self._cookie_counts['other'] += 1
982ee69a
MB
411 # other prefixes are considered 'old data' which were stored as plaintext
412 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
413 return encrypted_value
414
415
416class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
417 def __init__(self, browser_root, logger):
418 self._logger = logger
419 self._v10_key = _get_windows_v10_key(browser_root, logger)
f59f5ef8
MB
420 self._cookie_counts = {'v10': 0, 'other': 0}
421
982ee69a
MB
422 def decrypt(self, encrypted_value):
423 version = encrypted_value[:3]
424 ciphertext = encrypted_value[3:]
425
426 if version == b'v10':
f59f5ef8 427 self._cookie_counts['v10'] += 1
982ee69a
MB
428 if self._v10_key is None:
429 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
430 return None
982ee69a
MB
431
432 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
433 # kNonceLength
434 nonce_length = 96 // 8
435 # boringssl
436 # EVP_AEAD_AES_GCM_TAG_LEN
437 authentication_tag_length = 16
438
439 raw_ciphertext = ciphertext
440 nonce = raw_ciphertext[:nonce_length]
441 ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length]
442 authentication_tag = raw_ciphertext[-authentication_tag_length:]
443
444 return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger)
445
446 else:
f59f5ef8 447 self._cookie_counts['other'] += 1
982ee69a
MB
448 # any other prefix means the data is DPAPI encrypted
449 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
0f06bcd7 450 return _decrypt_windows_dpapi(encrypted_value, self._logger).decode()
982ee69a
MB
451
452
453def _extract_safari_cookies(profile, logger):
454 if profile is not None:
455 logger.error('safari does not support profiles')
456 if sys.platform != 'darwin':
86e5f3ed 457 raise ValueError(f'unsupported platform: {sys.platform}')
982ee69a
MB
458
459 cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')
460
461 if not os.path.isfile(cookies_path):
1f7db853
MP
462 logger.debug('Trying secondary cookie location')
463 cookies_path = os.path.expanduser('~/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies')
464 if not os.path.isfile(cookies_path):
465 raise FileNotFoundError('could not find safari cookies database')
982ee69a
MB
466
467 with open(cookies_path, 'rb') as f:
468 cookies_data = f.read()
469
470 jar = parse_safari_cookies(cookies_data, logger=logger)
86e5f3ed 471 logger.info(f'Extracted {len(jar)} cookies from safari')
982ee69a
MB
472 return jar
473
474
475class ParserError(Exception):
476 pass
477
478
479class DataParser:
480 def __init__(self, data, logger):
481 self._data = data
482 self.cursor = 0
483 self._logger = logger
484
485 def read_bytes(self, num_bytes):
486 if num_bytes < 0:
86e5f3ed 487 raise ParserError(f'invalid read of {num_bytes} bytes')
982ee69a
MB
488 end = self.cursor + num_bytes
489 if end > len(self._data):
490 raise ParserError('reached end of input')
491 data = self._data[self.cursor:end]
492 self.cursor = end
493 return data
494
495 def expect_bytes(self, expected_value, message):
496 value = self.read_bytes(len(expected_value))
497 if value != expected_value:
86e5f3ed 498 raise ParserError(f'unexpected value: {value} != {expected_value} ({message})')
982ee69a
MB
499
500 def read_uint(self, big_endian=False):
501 data_format = '>I' if big_endian else '<I'
502 return struct.unpack(data_format, self.read_bytes(4))[0]
503
504 def read_double(self, big_endian=False):
505 data_format = '>d' if big_endian else '<d'
506 return struct.unpack(data_format, self.read_bytes(8))[0]
507
508 def read_cstring(self):
509 buffer = []
510 while True:
511 c = self.read_bytes(1)
512 if c == b'\x00':
0f06bcd7 513 return b''.join(buffer).decode()
982ee69a
MB
514 else:
515 buffer.append(c)
516
517 def skip(self, num_bytes, description='unknown'):
518 if num_bytes > 0:
19a03940 519 self._logger.debug(f'skipping {num_bytes} bytes ({description}): {self.read_bytes(num_bytes)!r}')
982ee69a 520 elif num_bytes < 0:
86e5f3ed 521 raise ParserError(f'invalid skip of {num_bytes} bytes')
982ee69a
MB
522
523 def skip_to(self, offset, description='unknown'):
524 self.skip(offset - self.cursor, description)
525
526 def skip_to_end(self, description='unknown'):
527 self.skip_to(len(self._data), description)
528
529
530def _mac_absolute_time_to_posix(timestamp):
531 return int((datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc) + timedelta(seconds=timestamp)).timestamp())
532
533
534def _parse_safari_cookies_header(data, logger):
535 p = DataParser(data, logger)
536 p.expect_bytes(b'cook', 'database signature')
537 number_of_pages = p.read_uint(big_endian=True)
538 page_sizes = [p.read_uint(big_endian=True) for _ in range(number_of_pages)]
539 return page_sizes, p.cursor
540
541
542def _parse_safari_cookies_page(data, jar, logger):
543 p = DataParser(data, logger)
544 p.expect_bytes(b'\x00\x00\x01\x00', 'page signature')
545 number_of_cookies = p.read_uint()
546 record_offsets = [p.read_uint() for _ in range(number_of_cookies)]
547 if number_of_cookies == 0:
86e5f3ed 548 logger.debug(f'a cookies page of size {len(data)} has no cookies')
982ee69a
MB
549 return
550
551 p.skip_to(record_offsets[0], 'unknown page header field')
552
97ec5bc5 553 with _create_progress_bar(logger) as progress_bar:
554 for i, record_offset in enumerate(record_offsets):
555 progress_bar.print(f'Loading cookie {i: 6d}/{number_of_cookies: 6d}')
556 p.skip_to(record_offset, 'space between records')
557 record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger)
558 p.read_bytes(record_length)
982ee69a
MB
559 p.skip_to_end('space in between pages')
560
561
562def _parse_safari_cookies_record(data, jar, logger):
563 p = DataParser(data, logger)
564 record_size = p.read_uint()
565 p.skip(4, 'unknown record field 1')
566 flags = p.read_uint()
567 is_secure = bool(flags & 0x0001)
568 p.skip(4, 'unknown record field 2')
569 domain_offset = p.read_uint()
570 name_offset = p.read_uint()
571 path_offset = p.read_uint()
572 value_offset = p.read_uint()
573 p.skip(8, 'unknown record field 3')
574 expiration_date = _mac_absolute_time_to_posix(p.read_double())
575 _creation_date = _mac_absolute_time_to_posix(p.read_double()) # noqa: F841
576
577 try:
578 p.skip_to(domain_offset)
579 domain = p.read_cstring()
580
581 p.skip_to(name_offset)
582 name = p.read_cstring()
583
584 p.skip_to(path_offset)
585 path = p.read_cstring()
586
587 p.skip_to(value_offset)
588 value = p.read_cstring()
589 except UnicodeDecodeError:
f440b14f 590 logger.warning('failed to parse Safari cookie because UTF-8 decoding failed', only_once=True)
982ee69a
MB
591 return record_size
592
593 p.skip_to(record_size, 'space at the end of the record')
594
595 cookie = compat_cookiejar_Cookie(
596 version=0, name=name, value=value, port=None, port_specified=False,
597 domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'),
598 path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False,
599 comment=None, comment_url=None, rest={})
600 jar.set_cookie(cookie)
601 return record_size
602
603
604def parse_safari_cookies(data, jar=None, logger=YDLLogger()):
605 """
606 References:
607 - https://github.com/libyal/dtformats/blob/main/documentation/Safari%20Cookies.asciidoc
608 - this data appears to be out of date but the important parts of the database structure is the same
609 - there are a few bytes here and there which are skipped during parsing
610 """
611 if jar is None:
612 jar = YoutubeDLCookieJar()
613 page_sizes, body_start = _parse_safari_cookies_header(data, logger)
614 p = DataParser(data[body_start:], logger)
615 for page_size in page_sizes:
616 _parse_safari_cookies_page(p.read_bytes(page_size), jar, logger)
617 p.skip_to_end('footer')
618 return jar
619
620
f59f5ef8
MB
621class _LinuxDesktopEnvironment(Enum):
622 """
623 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.h
624 DesktopEnvironment
625 """
626 OTHER = auto()
627 CINNAMON = auto()
628 GNOME = auto()
629 KDE = auto()
630 PANTHEON = auto()
631 UNITY = auto()
632 XFCE = auto()
982ee69a
MB
633
634
f59f5ef8
MB
635class _LinuxKeyring(Enum):
636 """
637 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.h
638 SelectedLinuxBackend
639 """
640 KWALLET = auto()
641 GNOMEKEYRING = auto()
642 BASICTEXT = auto()
643
644
645SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys()
646
647
648def _get_linux_desktop_environment(env):
649 """
650 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc
651 GetDesktopEnvironment
652 """
653 xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
654 desktop_session = env.get('DESKTOP_SESSION', None)
655 if xdg_current_desktop is not None:
656 xdg_current_desktop = xdg_current_desktop.split(':')[0].strip()
657
658 if xdg_current_desktop == 'Unity':
659 if desktop_session is not None and 'gnome-fallback' in desktop_session:
660 return _LinuxDesktopEnvironment.GNOME
661 else:
662 return _LinuxDesktopEnvironment.UNITY
663 elif xdg_current_desktop == 'GNOME':
664 return _LinuxDesktopEnvironment.GNOME
665 elif xdg_current_desktop == 'X-Cinnamon':
666 return _LinuxDesktopEnvironment.CINNAMON
667 elif xdg_current_desktop == 'KDE':
668 return _LinuxDesktopEnvironment.KDE
669 elif xdg_current_desktop == 'Pantheon':
670 return _LinuxDesktopEnvironment.PANTHEON
671 elif xdg_current_desktop == 'XFCE':
672 return _LinuxDesktopEnvironment.XFCE
673 elif desktop_session is not None:
674 if desktop_session in ('mate', 'gnome'):
675 return _LinuxDesktopEnvironment.GNOME
676 elif 'kde' in desktop_session:
677 return _LinuxDesktopEnvironment.KDE
678 elif 'xfce' in desktop_session:
679 return _LinuxDesktopEnvironment.XFCE
680 else:
681 if 'GNOME_DESKTOP_SESSION_ID' in env:
682 return _LinuxDesktopEnvironment.GNOME
683 elif 'KDE_FULL_SESSION' in env:
684 return _LinuxDesktopEnvironment.KDE
fa8fd951 685 return _LinuxDesktopEnvironment.OTHER
f59f5ef8
MB
686
687
688def _choose_linux_keyring(logger):
689 """
690 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.cc
691 SelectBackend
692 """
693 desktop_environment = _get_linux_desktop_environment(os.environ)
86e5f3ed 694 logger.debug(f'detected desktop environment: {desktop_environment.name}')
f59f5ef8
MB
695 if desktop_environment == _LinuxDesktopEnvironment.KDE:
696 linux_keyring = _LinuxKeyring.KWALLET
697 elif desktop_environment == _LinuxDesktopEnvironment.OTHER:
698 linux_keyring = _LinuxKeyring.BASICTEXT
982ee69a 699 else:
f59f5ef8
MB
700 linux_keyring = _LinuxKeyring.GNOMEKEYRING
701 return linux_keyring
702
703
704def _get_kwallet_network_wallet(logger):
705 """ The name of the wallet used to store network passwords.
706
707 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/kwallet_dbus.cc
708 KWalletDBus::NetworkWallet
709 which does a dbus call to the following function:
710 https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html
711 Wallet::NetworkWallet
712 """
713 default_wallet = 'kdewallet'
714 try:
715 proc = Popen([
716 'dbus-send', '--session', '--print-reply=literal',
717 '--dest=org.kde.kwalletd5',
718 '/modules/kwalletd5',
719 'org.kde.KWallet.networkWallet'
720 ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
721
722 stdout, stderr = proc.communicate_or_kill()
723 if proc.returncode != 0:
724 logger.warning('failed to read NetworkWallet')
725 return default_wallet
726 else:
0f06bcd7 727 network_wallet = stdout.decode().strip()
86e5f3ed 728 logger.debug(f'NetworkWallet = "{network_wallet}"')
f59f5ef8 729 return network_wallet
a44ca5a4 730 except Exception as e:
86e5f3ed 731 logger.warning(f'exception while obtaining NetworkWallet: {e}')
f59f5ef8
MB
732 return default_wallet
733
734
735def _get_kwallet_password(browser_keyring_name, logger):
736 logger.debug('using kwallet-query to obtain password from kwallet')
737
738 if shutil.which('kwallet-query') is None:
739 logger.error('kwallet-query command not found. KWallet and kwallet-query '
740 'must be installed to read from KWallet. kwallet-query should be'
741 'included in the kwallet package for your distribution')
742 return b''
743
744 network_wallet = _get_kwallet_network_wallet(logger)
745
746 try:
747 proc = Popen([
748 'kwallet-query',
86e5f3ed 749 '--read-password', f'{browser_keyring_name} Safe Storage',
750 '--folder', f'{browser_keyring_name} Keys',
f59f5ef8
MB
751 network_wallet
752 ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
753
754 stdout, stderr = proc.communicate_or_kill()
755 if proc.returncode != 0:
19a03940 756 logger.error(f'kwallet-query failed with return code {proc.returncode}. Please consult '
757 'the kwallet-query man page for details')
f59f5ef8
MB
758 return b''
759 else:
760 if stdout.lower().startswith(b'failed to read'):
761 logger.debug('failed to read password from kwallet. Using empty string instead')
762 # this sometimes occurs in KDE because chrome does not check hasEntry and instead
763 # just tries to read the value (which kwallet returns "") whereas kwallet-query
764 # checks hasEntry. To verify this:
765 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
766 # while starting chrome.
767 # this may be a bug as the intended behaviour is to generate a random password and store
768 # it, but that doesn't matter here.
769 return b''
770 else:
771 logger.debug('password found')
772 if stdout[-1:] == b'\n':
773 stdout = stdout[:-1]
774 return stdout
a44ca5a4 775 except Exception as e:
776 logger.warning(f'exception running kwallet-query: {error_to_str(e)}')
f59f5ef8
MB
777 return b''
778
779
780def _get_gnome_keyring_password(browser_keyring_name, logger):
9b8ee23b 781 if not secretstorage:
782 logger.error(f'secretstorage not available {_SECRETSTORAGE_UNAVAILABLE_REASON}')
f59f5ef8
MB
783 return b''
784 # the Gnome keyring does not seem to organise keys in the same way as KWallet,
785 # using `dbus-monitor` during startup, it can be observed that chromium lists all keys
786 # and presumably searches for its key in the list. It appears that we must do the same.
787 # https://github.com/jaraco/keyring/issues/556
788 with contextlib.closing(secretstorage.dbus_init()) as con:
789 col = secretstorage.get_default_collection(con)
790 for item in col.get_all_items():
86e5f3ed 791 if item.get_label() == f'{browser_keyring_name} Safe Storage':
f59f5ef8
MB
792 return item.get_secret()
793 else:
794 logger.error('failed to read from keyring')
795 return b''
796
797
798def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
799 # note: chrome/chromium can be run with the following flags to determine which keyring backend
800 # it has chosen to use
801 # chromium --enable-logging=stderr --v=1 2>&1 | grep key_storage_
802 # Chromium supports a flag: --password-store=<basic|gnome|kwallet> so the automatic detection
803 # will not be sufficient in all cases.
804
2c539d49 805 keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger)
f59f5ef8
MB
806 logger.debug(f'Chosen keyring: {keyring.name}')
807
808 if keyring == _LinuxKeyring.KWALLET:
809 return _get_kwallet_password(browser_keyring_name, logger)
810 elif keyring == _LinuxKeyring.GNOMEKEYRING:
811 return _get_gnome_keyring_password(browser_keyring_name, logger)
812 elif keyring == _LinuxKeyring.BASICTEXT:
813 # when basic text is chosen, all cookies are stored as v10 (so no keyring password is required)
814 return None
815 assert False, f'Unknown keyring {keyring}'
816
817
818def _get_mac_keyring_password(browser_keyring_name, logger):
819 logger.debug('using find-generic-password to obtain password from OSX keychain')
820 try:
d3c93ec2 821 proc = Popen(
822 ['security', 'find-generic-password',
823 '-w', # write password to stdout
824 '-a', browser_keyring_name, # match 'account'
86e5f3ed 825 '-s', f'{browser_keyring_name} Safe Storage'], # match 'service'
d3c93ec2 826 stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
f59f5ef8
MB
827
828 stdout, stderr = proc.communicate_or_kill()
829 if stdout[-1:] == b'\n':
830 stdout = stdout[:-1]
831 return stdout
a44ca5a4 832 except Exception as e:
833 logger.warning(f'exception running find-generic-password: {error_to_str(e)}')
f59f5ef8 834 return None
982ee69a
MB
835
836
837def _get_windows_v10_key(browser_root, logger):
97ec5bc5 838 path = _find_most_recently_used_file(browser_root, 'Local State', logger)
982ee69a
MB
839 if path is None:
840 logger.error('could not find local state file')
841 return None
97ec5bc5 842 logger.debug(f'Found local state file at "{path}"')
86e5f3ed 843 with open(path, encoding='utf8') as f:
982ee69a
MB
844 data = json.load(f)
845 try:
846 base64_key = data['os_crypt']['encrypted_key']
847 except KeyError:
848 logger.error('no encrypted key in Local State')
849 return None
850 encrypted_key = compat_b64decode(base64_key)
851 prefix = b'DPAPI'
852 if not encrypted_key.startswith(prefix):
853 logger.error('invalid key')
854 return None
855 return _decrypt_windows_dpapi(encrypted_key[len(prefix):], logger)
856
857
858def pbkdf2_sha1(password, salt, iterations, key_length):
859 return pbkdf2_hmac('sha1', password, salt, iterations, key_length)
860
861
862def _decrypt_aes_cbc(ciphertext, key, logger, initialization_vector=b' ' * 16):
1d3586d0 863 plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
982ee69a 864 try:
0f06bcd7 865 return plaintext.decode()
982ee69a 866 except UnicodeDecodeError:
f440b14f 867 logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
868 return None
869
870
871def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
982ee69a 872 try:
09906f55 873 plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce)
982ee69a 874 except ValueError:
f440b14f 875 logger.warning('failed to decrypt cookie (AES-GCM) because the MAC check failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
876 return None
877
878 try:
0f06bcd7 879 return plaintext.decode()
982ee69a 880 except UnicodeDecodeError:
f440b14f 881 logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
882 return None
883
884
885def _decrypt_windows_dpapi(ciphertext, logger):
886 """
887 References:
888 - https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata
889 """
890 from ctypes.wintypes import DWORD
891
892 class DATA_BLOB(ctypes.Structure):
893 _fields_ = [('cbData', DWORD),
894 ('pbData', ctypes.POINTER(ctypes.c_char))]
895
896 buffer = ctypes.create_string_buffer(ciphertext)
897 blob_in = DATA_BLOB(ctypes.sizeof(buffer), buffer)
898 blob_out = DATA_BLOB()
899 ret = ctypes.windll.crypt32.CryptUnprotectData(
900 ctypes.byref(blob_in), # pDataIn
901 None, # ppszDataDescr: human readable description of pDataIn
902 None, # pOptionalEntropy: salt?
903 None, # pvReserved: must be NULL
904 None, # pPromptStruct: information about prompts to display
905 0, # dwFlags
906 ctypes.byref(blob_out) # pDataOut
907 )
908 if not ret:
f9be9cb9 909 logger.warning('failed to decrypt with DPAPI', only_once=True)
982ee69a
MB
910 return None
911
912 result = ctypes.string_at(blob_out.pbData, blob_out.cbData)
913 ctypes.windll.kernel32.LocalFree(blob_out.pbData)
914 return result
915
916
917def _config_home():
918 return os.environ.get('XDG_CONFIG_HOME', os.path.expanduser('~/.config'))
919
920
921def _open_database_copy(database_path, tmpdir):
922 # cannot open sqlite databases if they are already in use (e.g. by the browser)
923 database_copy_path = os.path.join(tmpdir, 'temporary.sqlite')
924 shutil.copy(database_path, database_copy_path)
925 conn = sqlite3.connect(database_copy_path)
926 return conn.cursor()
927
928
929def _get_column_names(cursor, table_name):
86e5f3ed 930 table_info = cursor.execute(f'PRAGMA table_info({table_name})').fetchall()
0f06bcd7 931 return [row[1].decode() for row in table_info]
982ee69a
MB
932
933
97ec5bc5 934def _find_most_recently_used_file(root, filename, logger):
982ee69a 935 # if there are multiple browser profiles, take the most recently used one
97ec5bc5 936 i, paths = 0, []
937 with _create_progress_bar(logger) as progress_bar:
938 for curr_root, dirs, files in os.walk(root):
939 for file in files:
940 i += 1
941 progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched')
942 if file == filename:
943 paths.append(os.path.join(curr_root, file))
982ee69a
MB
944 return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime)
945
946
947def _merge_cookie_jars(jars):
948 output_jar = YoutubeDLCookieJar()
949 for jar in jars:
950 for cookie in jar:
951 output_jar.set_cookie(cookie)
952 if jar.filename is not None:
953 output_jar.filename = jar.filename
954 return output_jar
955
956
957def _is_path(value):
958 return os.path.sep in value
959
960
f59f5ef8 961def _parse_browser_specification(browser_name, profile=None, keyring=None):
982ee69a
MB
962 if browser_name not in SUPPORTED_BROWSERS:
963 raise ValueError(f'unsupported browser: "{browser_name}"')
f59f5ef8
MB
964 if keyring not in (None, *SUPPORTED_KEYRINGS):
965 raise ValueError(f'unsupported keyring: "{keyring}"')
982ee69a
MB
966 if profile is not None and _is_path(profile):
967 profile = os.path.expanduser(profile)
f59f5ef8 968 return browser_name, profile, keyring