]> jfr.im git - yt-dlp.git/blame - yt_dlp/cookies.py
[ie/commonmistakes] Raise error on blob URLs (#9897)
[yt-dlp.git] / yt_dlp / cookies.py
CommitLineData
14f25df2 1import base64
b87e01c1 2import collections
f59f5ef8 3import contextlib
c305a25c 4import datetime as dt
cbed249a 5import glob
54007a45 6import http.cookiejar
8817a80d 7import http.cookies
b87e01c1 8import io
982ee69a
MB
9import json
10import os
9bd13fe5 11import re
982ee69a 12import shutil
982ee69a
MB
13import struct
14import subprocess
15import sys
16import tempfile
2e4585da 17import time
b87e01c1 18import urllib.request
f59f5ef8 19from enum import Enum, auto
982ee69a
MB
20from hashlib import pbkdf2_hmac
21
1d3586d0 22from .aes import (
23 aes_cbc_decrypt_bytes,
24 aes_gcm_decrypt_and_verify_bytes,
25 unpad_pkcs7,
26)
2792092a
SS
27from .compat import functools # isort: split
28from .compat import compat_os_name
9b8ee23b 29from .dependencies import (
30 _SECRETSTORAGE_UNAVAILABLE_REASON,
31 secretstorage,
32 sqlite3,
33)
97ec5bc5 34from .minicurses import MultilinePrinter, QuietMultilinePrinter
d2c8aadf 35from .utils import (
2792092a 36 DownloadError,
d2c8aadf 37 Popen,
d2c8aadf 38 error_to_str,
39 expand_path,
22df97f9 40 is_path_like,
b87e01c1 41 sanitize_url,
42 str_or_none,
d2c8aadf 43 try_call,
b87e01c1 44 write_string,
d2c8aadf 45)
1b392f90 46from .utils._utils import _YDLLogger
4bf91228 47from .utils.networking import normalize_url
982ee69a 48
982ee69a
MB
49CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
50SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
51
52
1b392f90 53class YDLLogger(_YDLLogger):
54 def warning(self, message, only_once=False): # compat
55 return super().warning(message, once=only_once)
982ee69a 56
2e4585da 57 class ProgressBar(MultilinePrinter):
58 _DELAY, _timer = 0.1, 0
59
60 def print(self, message):
61 if time.time() - self._timer > self._DELAY:
62 self.print_at_line(f'[Cookies] {message}', 0)
63 self._timer = time.time()
64
97ec5bc5 65 def progress_bar(self):
66 """Return a context manager with a print method. (Optional)"""
67 # Do not print to files/pipes, loggers, or when --no-progress is used
68 if not self._ydl or self._ydl.params.get('noprogress') or self._ydl.params.get('logger'):
69 return
591bb9d3 70 file = self._ydl._out_files.error
97ec5bc5 71 try:
72 if not file.isatty():
73 return
74 except BaseException:
75 return
2e4585da 76 return self.ProgressBar(file, preserve_output=False)
97ec5bc5 77
78
79def _create_progress_bar(logger):
80 if hasattr(logger, 'progress_bar'):
81 printer = logger.progress_bar()
82 if printer:
83 return printer
84 printer = QuietMultilinePrinter()
85 printer.print = lambda _: None
86 return printer
87
982ee69a
MB
88
89def load_cookies(cookie_file, browser_specification, ydl):
90 cookie_jars = []
91 if browser_specification is not None:
9bd13fe5 92 browser_name, profile, keyring, container = _parse_browser_specification(*browser_specification)
93 cookie_jars.append(
94 extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring, container=container))
982ee69a
MB
95
96 if cookie_file is not None:
22df97f9 97 is_filename = is_path_like(cookie_file)
d76fa1f3 98 if is_filename:
99 cookie_file = expand_path(cookie_file)
100
982ee69a 101 jar = YoutubeDLCookieJar(cookie_file)
d76fa1f3 102 if not is_filename or os.access(cookie_file, os.R_OK):
62b5c94c 103 jar.load()
982ee69a
MB
104 cookie_jars.append(jar)
105
106 return _merge_cookie_jars(cookie_jars)
107
108
9bd13fe5 109def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None, container=None):
982ee69a 110 if browser_name == 'firefox':
9bd13fe5 111 return _extract_firefox_cookies(profile, container, logger)
982ee69a
MB
112 elif browser_name == 'safari':
113 return _extract_safari_cookies(profile, logger)
114 elif browser_name in CHROMIUM_BASED_BROWSERS:
f59f5ef8 115 return _extract_chrome_cookies(browser_name, profile, keyring, logger)
982ee69a 116 else:
86e5f3ed 117 raise ValueError(f'unknown browser: {browser_name}')
982ee69a
MB
118
119
9bd13fe5 120def _extract_firefox_cookies(profile, container, logger):
982ee69a 121 logger.info('Extracting cookies from firefox')
9b8ee23b 122 if not sqlite3:
767b02a9 123 logger.warning('Cannot extract cookies from firefox without sqlite3 support. '
47ab66db 124 'Please use a Python interpreter compiled with sqlite3 support')
767b02a9 125 return YoutubeDLCookieJar()
982ee69a
MB
126
127 if profile is None:
cbed249a 128 search_roots = list(_firefox_browser_dirs())
982ee69a 129 elif _is_path(profile):
cbed249a 130 search_roots = [profile]
982ee69a 131 else:
cbed249a
SS
132 search_roots = [os.path.join(path, profile) for path in _firefox_browser_dirs()]
133 search_root = ', '.join(map(repr, search_roots))
982ee69a 134
cbed249a 135 cookie_database_path = _newest(_firefox_cookie_dbs(search_roots))
825d3ce3 136 if cookie_database_path is None:
137 raise FileNotFoundError(f'could not find firefox cookies database in {search_root}')
138 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
139
9bd13fe5 140 container_id = None
825d3ce3 141 if container not in (None, 'none'):
142 containers_path = os.path.join(os.path.dirname(cookie_database_path), 'containers.json')
9bd13fe5 143 if not os.path.isfile(containers_path) or not os.access(containers_path, os.R_OK):
144 raise FileNotFoundError(f'could not read containers.json in {search_root}')
dab87ca2 145 with open(containers_path, encoding='utf8') as containers:
9bd13fe5 146 identities = json.load(containers).get('identities', [])
147 container_id = next((context.get('userContextId') for context in identities if container in (
148 context.get('name'),
149 try_call(lambda: re.fullmatch(r'userContext([^\.]+)\.label', context['l10nID']).group())
150 )), None)
151 if not isinstance(container_id, int):
152 raise ValueError(f'could not find firefox container "{container}" in containers.json')
153
0930b11f 154 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
982ee69a
MB
155 cursor = None
156 try:
157 cursor = _open_database_copy(cookie_database_path, tmpdir)
9bd13fe5 158 if isinstance(container_id, int):
9bd13fe5 159 logger.debug(
160 f'Only loading cookies from firefox container "{container}", ID {container_id}')
9bd13fe5 161 cursor.execute(
825d3ce3 162 'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE originAttributes LIKE ? OR originAttributes LIKE ?',
163 (f'%userContextId={container_id}', f'%userContextId={container_id}&%'))
164 elif container == 'none':
165 logger.debug('Only loading cookies not belonging to any container')
166 cursor.execute(
167 'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE NOT INSTR(originAttributes,"userContextId=")')
168 else:
9bd13fe5 169 cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
982ee69a 170 jar = YoutubeDLCookieJar()
97ec5bc5 171 with _create_progress_bar(logger) as progress_bar:
172 table = cursor.fetchall()
173 total_cookie_count = len(table)
174 for i, (host, name, value, path, expiry, is_secure) in enumerate(table):
175 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
ac668111 176 cookie = http.cookiejar.Cookie(
97ec5bc5 177 version=0, name=name, value=value, port=None, port_specified=False,
178 domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'),
179 path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False,
180 comment=None, comment_url=None, rest={})
181 jar.set_cookie(cookie)
86e5f3ed 182 logger.info(f'Extracted {len(jar)} cookies from firefox')
982ee69a
MB
183 return jar
184 finally:
185 if cursor is not None:
186 cursor.connection.close()
187
188
cbed249a 189def _firefox_browser_dirs():
dec30912 190 if sys.platform in ('cygwin', 'win32'):
cbed249a
SS
191 yield os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles')
192
982ee69a 193 elif sys.platform == 'darwin':
cbed249a
SS
194 yield os.path.expanduser('~/Library/Application Support/Firefox/Profiles')
195
196 else:
2ab2651a
DM
197 yield from map(os.path.expanduser, (
198 '~/.mozilla/firefox',
199 '~/snap/firefox/common/.mozilla/firefox',
200 '~/.var/app/org.mozilla.firefox/.mozilla/firefox',
201 ))
cbed249a
SS
202
203
204def _firefox_cookie_dbs(roots):
205 for root in map(os.path.abspath, roots):
206 for pattern in ('', '*/', 'Profiles/*/'):
207 yield from glob.iglob(os.path.join(root, pattern, 'cookies.sqlite'))
982ee69a
MB
208
209
210def _get_chromium_based_browser_settings(browser_name):
211 # https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md
dec30912 212 if sys.platform in ('cygwin', 'win32'):
982ee69a
MB
213 appdata_local = os.path.expandvars('%LOCALAPPDATA%')
214 appdata_roaming = os.path.expandvars('%APPDATA%')
215 browser_dir = {
19a03940 216 'brave': os.path.join(appdata_local, R'BraveSoftware\Brave-Browser\User Data'),
217 'chrome': os.path.join(appdata_local, R'Google\Chrome\User Data'),
218 'chromium': os.path.join(appdata_local, R'Chromium\User Data'),
219 'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
220 'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
221 'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
982ee69a
MB
222 }[browser_name]
223
224 elif sys.platform == 'darwin':
225 appdata = os.path.expanduser('~/Library/Application Support')
226 browser_dir = {
227 'brave': os.path.join(appdata, 'BraveSoftware/Brave-Browser'),
228 'chrome': os.path.join(appdata, 'Google/Chrome'),
229 'chromium': os.path.join(appdata, 'Chromium'),
230 'edge': os.path.join(appdata, 'Microsoft Edge'),
231 'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
232 'vivaldi': os.path.join(appdata, 'Vivaldi'),
233 }[browser_name]
234
235 else:
dec30912
CMT
236 config = _config_home()
237 browser_dir = {
238 'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'),
239 'chrome': os.path.join(config, 'google-chrome'),
240 'chromium': os.path.join(config, 'chromium'),
241 'edge': os.path.join(config, 'microsoft-edge'),
242 'opera': os.path.join(config, 'opera'),
243 'vivaldi': os.path.join(config, 'vivaldi'),
244 }[browser_name]
982ee69a
MB
245
246 # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
247 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
248 keyring_name = {
249 'brave': 'Brave',
250 'chrome': 'Chrome',
251 'chromium': 'Chromium',
29b208f6 252 'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
982ee69a
MB
253 'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
254 'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
255 }[browser_name]
256
257 browsers_without_profiles = {'opera'}
258
259 return {
260 'browser_dir': browser_dir,
261 'keyring_name': keyring_name,
262 'supports_profiles': browser_name not in browsers_without_profiles
263 }
264
265
f59f5ef8 266def _extract_chrome_cookies(browser_name, profile, keyring, logger):
86e5f3ed 267 logger.info(f'Extracting cookies from {browser_name}')
767b02a9 268
9b8ee23b 269 if not sqlite3:
19a03940 270 logger.warning(f'Cannot extract cookies from {browser_name} without sqlite3 support. '
47ab66db 271 'Please use a Python interpreter compiled with sqlite3 support')
767b02a9
MB
272 return YoutubeDLCookieJar()
273
982ee69a
MB
274 config = _get_chromium_based_browser_settings(browser_name)
275
276 if profile is None:
277 search_root = config['browser_dir']
278 elif _is_path(profile):
279 search_root = profile
280 config['browser_dir'] = os.path.dirname(profile) if config['supports_profiles'] else profile
281 else:
282 if config['supports_profiles']:
283 search_root = os.path.join(config['browser_dir'], profile)
284 else:
86e5f3ed 285 logger.error(f'{browser_name} does not support profiles')
982ee69a
MB
286 search_root = config['browser_dir']
287
cbed249a 288 cookie_database_path = _newest(_find_files(search_root, 'Cookies', logger))
982ee69a 289 if cookie_database_path is None:
86e5f3ed 290 raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"')
291 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
982ee69a 292
f59f5ef8 293 decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring)
982ee69a 294
0930b11f 295 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
982ee69a
MB
296 cursor = None
297 try:
298 cursor = _open_database_copy(cookie_database_path, tmpdir)
299 cursor.connection.text_factory = bytes
300 column_names = _get_column_names(cursor, 'cookies')
301 secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
19a03940 302 cursor.execute(f'SELECT host_key, name, value, encrypted_value, path, expires_utc, {secure_column} FROM cookies')
982ee69a
MB
303 jar = YoutubeDLCookieJar()
304 failed_cookies = 0
f59f5ef8 305 unencrypted_cookies = 0
97ec5bc5 306 with _create_progress_bar(logger) as progress_bar:
307 table = cursor.fetchall()
308 total_cookie_count = len(table)
309 for i, line in enumerate(table):
310 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
311 is_encrypted, cookie = _process_chrome_cookie(decryptor, *line)
312 if not cookie:
982ee69a
MB
313 failed_cookies += 1
314 continue
97ec5bc5 315 elif not is_encrypted:
316 unencrypted_cookies += 1
317 jar.set_cookie(cookie)
982ee69a 318 if failed_cookies > 0:
86e5f3ed 319 failed_message = f' ({failed_cookies} could not be decrypted)'
982ee69a
MB
320 else:
321 failed_message = ''
86e5f3ed 322 logger.info(f'Extracted {len(jar)} cookies from {browser_name}{failed_message}')
24146491 323 counts = decryptor._cookie_counts.copy()
f59f5ef8 324 counts['unencrypted'] = unencrypted_cookies
86e5f3ed 325 logger.debug(f'cookie version breakdown: {counts}')
982ee69a 326 return jar
2792092a
SS
327 except PermissionError as error:
328 if compat_os_name == 'nt' and error.errno == 13:
329 message = 'Could not copy Chrome cookie database. See https://github.com/yt-dlp/yt-dlp/issues/7271 for more info'
330 logger.error(message)
331 raise DownloadError(message) # force exit
332 raise
982ee69a
MB
333 finally:
334 if cursor is not None:
335 cursor.connection.close()
336
337
97ec5bc5 338def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, path, expires_utc, is_secure):
0f06bcd7 339 host_key = host_key.decode()
340 name = name.decode()
341 value = value.decode()
342 path = path.decode()
97ec5bc5 343 is_encrypted = not value and encrypted_value
344
345 if is_encrypted:
346 value = decryptor.decrypt(encrypted_value)
347 if value is None:
348 return is_encrypted, None
349
ac668111 350 return is_encrypted, http.cookiejar.Cookie(
97ec5bc5 351 version=0, name=name, value=value, port=None, port_specified=False,
352 domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
353 path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False,
354 comment=None, comment_url=None, rest={})
355
356
982ee69a
MB
357class ChromeCookieDecryptor:
358 """
359 Overview:
360
361 Linux:
362 - cookies are either v10 or v11
363 - v10: AES-CBC encrypted with a fixed key
b38d4c94 364 - also attempts empty password if decryption fails
982ee69a 365 - v11: AES-CBC encrypted with an OS protected key (keyring)
b38d4c94 366 - also attempts empty password if decryption fails
982ee69a
MB
367 - v11 keys can be stored in various places depending on the activate desktop environment [2]
368
369 Mac:
370 - cookies are either v10 or not v10
371 - v10: AES-CBC encrypted with an OS protected key (keyring) and more key derivation iterations than linux
372 - not v10: 'old data' stored as plaintext
373
374 Windows:
375 - cookies are either v10 or not v10
376 - v10: AES-GCM encrypted with a key which is encrypted with DPAPI
377 - not v10: encrypted with DPAPI
378
379 Sources:
380 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/
b38d4c94 381 - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_linux.cc
982ee69a
MB
382 - KeyStorageLinux::CreateService
383 """
384
0fa7d2c8 385 _cookie_counts = {}
982ee69a 386
0fa7d2c8 387 def decrypt(self, encrypted_value):
19a03940 388 raise NotImplementedError('Must be implemented by sub classes')
f59f5ef8 389
982ee69a 390
f59f5ef8 391def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None):
56ba69e4 392 if sys.platform == 'darwin':
982ee69a 393 return MacChromeCookieDecryptor(browser_keyring_name, logger)
56ba69e4 394 elif sys.platform in ('win32', 'cygwin'):
982ee69a 395 return WindowsChromeCookieDecryptor(browser_root, logger)
56ba69e4 396 return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring)
982ee69a
MB
397
398
399class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
f59f5ef8 400 def __init__(self, browser_keyring_name, logger, *, keyring=None):
982ee69a
MB
401 self._logger = logger
402 self._v10_key = self.derive_key(b'peanuts')
b38d4c94 403 self._empty_key = self.derive_key(b'')
f59f5ef8 404 self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
9b7a48ab
SS
405 self._browser_keyring_name = browser_keyring_name
406 self._keyring = keyring
407
408 @functools.cached_property
409 def _v11_key(self):
410 password = _get_linux_keyring_password(self._browser_keyring_name, self._keyring, self._logger)
411 return None if password is None else self.derive_key(password)
982ee69a
MB
412
413 @staticmethod
414 def derive_key(password):
415 # values from
b38d4c94 416 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_linux.cc
982ee69a
MB
417 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16)
418
419 def decrypt(self, encrypted_value):
b38d4c94
MB
420 """
421
422 following the same approach as the fix in [1]: if cookies fail to decrypt then attempt to decrypt
423 with an empty password. The failure detection is not the same as what chromium uses so the
424 results won't be perfect
425
426 References:
427 - [1] https://chromium.googlesource.com/chromium/src/+/bbd54702284caca1f92d656fdcadf2ccca6f4165%5E%21/
428 - a bugfix to try an empty password as a fallback
429 """
982ee69a
MB
430 version = encrypted_value[:3]
431 ciphertext = encrypted_value[3:]
432
433 if version == b'v10':
f59f5ef8 434 self._cookie_counts['v10'] += 1
b38d4c94 435 return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key, self._empty_key), self._logger)
982ee69a
MB
436
437 elif version == b'v11':
f59f5ef8 438 self._cookie_counts['v11'] += 1
982ee69a 439 if self._v11_key is None:
f59f5ef8 440 self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
982ee69a 441 return None
b38d4c94 442 return _decrypt_aes_cbc_multi(ciphertext, (self._v11_key, self._empty_key), self._logger)
982ee69a
MB
443
444 else:
b38d4c94 445 self._logger.warning(f'unknown cookie version: "{version}"', only_once=True)
f59f5ef8 446 self._cookie_counts['other'] += 1
982ee69a
MB
447 return None
448
449
450class MacChromeCookieDecryptor(ChromeCookieDecryptor):
451 def __init__(self, browser_keyring_name, logger):
452 self._logger = logger
f440b14f 453 password = _get_mac_keyring_password(browser_keyring_name, logger)
982ee69a 454 self._v10_key = None if password is None else self.derive_key(password)
f59f5ef8 455 self._cookie_counts = {'v10': 0, 'other': 0}
982ee69a
MB
456
457 @staticmethod
458 def derive_key(password):
459 # values from
b38d4c94 460 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_mac.mm
982ee69a
MB
461 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16)
462
463 def decrypt(self, encrypted_value):
464 version = encrypted_value[:3]
465 ciphertext = encrypted_value[3:]
466
467 if version == b'v10':
f59f5ef8 468 self._cookie_counts['v10'] += 1
982ee69a
MB
469 if self._v10_key is None:
470 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
471 return None
472
b38d4c94 473 return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key,), self._logger)
982ee69a
MB
474
475 else:
f59f5ef8 476 self._cookie_counts['other'] += 1
982ee69a 477 # other prefixes are considered 'old data' which were stored as plaintext
b38d4c94 478 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_mac.mm
982ee69a
MB
479 return encrypted_value
480
481
482class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
483 def __init__(self, browser_root, logger):
484 self._logger = logger
485 self._v10_key = _get_windows_v10_key(browser_root, logger)
f59f5ef8
MB
486 self._cookie_counts = {'v10': 0, 'other': 0}
487
982ee69a
MB
488 def decrypt(self, encrypted_value):
489 version = encrypted_value[:3]
490 ciphertext = encrypted_value[3:]
491
492 if version == b'v10':
f59f5ef8 493 self._cookie_counts['v10'] += 1
982ee69a
MB
494 if self._v10_key is None:
495 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
496 return None
982ee69a 497
b38d4c94 498 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
982ee69a
MB
499 # kNonceLength
500 nonce_length = 96 // 8
501 # boringssl
502 # EVP_AEAD_AES_GCM_TAG_LEN
503 authentication_tag_length = 16
504
505 raw_ciphertext = ciphertext
506 nonce = raw_ciphertext[:nonce_length]
507 ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length]
508 authentication_tag = raw_ciphertext[-authentication_tag_length:]
509
510 return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger)
511
512 else:
f59f5ef8 513 self._cookie_counts['other'] += 1
982ee69a 514 # any other prefix means the data is DPAPI encrypted
b38d4c94 515 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
0f06bcd7 516 return _decrypt_windows_dpapi(encrypted_value, self._logger).decode()
982ee69a
MB
517
518
519def _extract_safari_cookies(profile, logger):
982ee69a 520 if sys.platform != 'darwin':
86e5f3ed 521 raise ValueError(f'unsupported platform: {sys.platform}')
982ee69a 522
a58182b7
NV
523 if profile:
524 cookies_path = os.path.expanduser(profile)
525 if not os.path.isfile(cookies_path):
526 raise FileNotFoundError('custom safari cookies database not found')
527
528 else:
529 cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')
982ee69a 530
1f7db853 531 if not os.path.isfile(cookies_path):
a58182b7
NV
532 logger.debug('Trying secondary cookie location')
533 cookies_path = os.path.expanduser('~/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies')
534 if not os.path.isfile(cookies_path):
535 raise FileNotFoundError('could not find safari cookies database')
982ee69a
MB
536
537 with open(cookies_path, 'rb') as f:
538 cookies_data = f.read()
539
540 jar = parse_safari_cookies(cookies_data, logger=logger)
86e5f3ed 541 logger.info(f'Extracted {len(jar)} cookies from safari')
982ee69a
MB
542 return jar
543
544
545class ParserError(Exception):
546 pass
547
548
549class DataParser:
550 def __init__(self, data, logger):
551 self._data = data
552 self.cursor = 0
553 self._logger = logger
554
555 def read_bytes(self, num_bytes):
556 if num_bytes < 0:
86e5f3ed 557 raise ParserError(f'invalid read of {num_bytes} bytes')
982ee69a
MB
558 end = self.cursor + num_bytes
559 if end > len(self._data):
560 raise ParserError('reached end of input')
561 data = self._data[self.cursor:end]
562 self.cursor = end
563 return data
564
565 def expect_bytes(self, expected_value, message):
566 value = self.read_bytes(len(expected_value))
567 if value != expected_value:
86e5f3ed 568 raise ParserError(f'unexpected value: {value} != {expected_value} ({message})')
982ee69a
MB
569
570 def read_uint(self, big_endian=False):
571 data_format = '>I' if big_endian else '<I'
572 return struct.unpack(data_format, self.read_bytes(4))[0]
573
574 def read_double(self, big_endian=False):
575 data_format = '>d' if big_endian else '<d'
576 return struct.unpack(data_format, self.read_bytes(8))[0]
577
578 def read_cstring(self):
579 buffer = []
580 while True:
581 c = self.read_bytes(1)
582 if c == b'\x00':
0f06bcd7 583 return b''.join(buffer).decode()
982ee69a
MB
584 else:
585 buffer.append(c)
586
587 def skip(self, num_bytes, description='unknown'):
588 if num_bytes > 0:
19a03940 589 self._logger.debug(f'skipping {num_bytes} bytes ({description}): {self.read_bytes(num_bytes)!r}')
982ee69a 590 elif num_bytes < 0:
86e5f3ed 591 raise ParserError(f'invalid skip of {num_bytes} bytes')
982ee69a
MB
592
593 def skip_to(self, offset, description='unknown'):
594 self.skip(offset - self.cursor, description)
595
596 def skip_to_end(self, description='unknown'):
597 self.skip_to(len(self._data), description)
598
599
600def _mac_absolute_time_to_posix(timestamp):
c305a25c 601 return int((dt.datetime(2001, 1, 1, 0, 0, tzinfo=dt.timezone.utc) + dt.timedelta(seconds=timestamp)).timestamp())
982ee69a
MB
602
603
604def _parse_safari_cookies_header(data, logger):
605 p = DataParser(data, logger)
606 p.expect_bytes(b'cook', 'database signature')
607 number_of_pages = p.read_uint(big_endian=True)
608 page_sizes = [p.read_uint(big_endian=True) for _ in range(number_of_pages)]
609 return page_sizes, p.cursor
610
611
612def _parse_safari_cookies_page(data, jar, logger):
613 p = DataParser(data, logger)
614 p.expect_bytes(b'\x00\x00\x01\x00', 'page signature')
615 number_of_cookies = p.read_uint()
616 record_offsets = [p.read_uint() for _ in range(number_of_cookies)]
617 if number_of_cookies == 0:
86e5f3ed 618 logger.debug(f'a cookies page of size {len(data)} has no cookies')
982ee69a
MB
619 return
620
621 p.skip_to(record_offsets[0], 'unknown page header field')
622
97ec5bc5 623 with _create_progress_bar(logger) as progress_bar:
624 for i, record_offset in enumerate(record_offsets):
625 progress_bar.print(f'Loading cookie {i: 6d}/{number_of_cookies: 6d}')
626 p.skip_to(record_offset, 'space between records')
627 record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger)
628 p.read_bytes(record_length)
982ee69a
MB
629 p.skip_to_end('space in between pages')
630
631
632def _parse_safari_cookies_record(data, jar, logger):
633 p = DataParser(data, logger)
634 record_size = p.read_uint()
635 p.skip(4, 'unknown record field 1')
636 flags = p.read_uint()
637 is_secure = bool(flags & 0x0001)
638 p.skip(4, 'unknown record field 2')
639 domain_offset = p.read_uint()
640 name_offset = p.read_uint()
641 path_offset = p.read_uint()
642 value_offset = p.read_uint()
643 p.skip(8, 'unknown record field 3')
644 expiration_date = _mac_absolute_time_to_posix(p.read_double())
645 _creation_date = _mac_absolute_time_to_posix(p.read_double()) # noqa: F841
646
647 try:
648 p.skip_to(domain_offset)
649 domain = p.read_cstring()
650
651 p.skip_to(name_offset)
652 name = p.read_cstring()
653
654 p.skip_to(path_offset)
655 path = p.read_cstring()
656
657 p.skip_to(value_offset)
658 value = p.read_cstring()
659 except UnicodeDecodeError:
f440b14f 660 logger.warning('failed to parse Safari cookie because UTF-8 decoding failed', only_once=True)
982ee69a
MB
661 return record_size
662
663 p.skip_to(record_size, 'space at the end of the record')
664
ac668111 665 cookie = http.cookiejar.Cookie(
982ee69a
MB
666 version=0, name=name, value=value, port=None, port_specified=False,
667 domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'),
668 path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False,
669 comment=None, comment_url=None, rest={})
670 jar.set_cookie(cookie)
671 return record_size
672
673
674def parse_safari_cookies(data, jar=None, logger=YDLLogger()):
675 """
676 References:
677 - https://github.com/libyal/dtformats/blob/main/documentation/Safari%20Cookies.asciidoc
678 - this data appears to be out of date but the important parts of the database structure is the same
679 - there are a few bytes here and there which are skipped during parsing
680 """
681 if jar is None:
682 jar = YoutubeDLCookieJar()
683 page_sizes, body_start = _parse_safari_cookies_header(data, logger)
684 p = DataParser(data[body_start:], logger)
685 for page_size in page_sizes:
686 _parse_safari_cookies_page(p.read_bytes(page_size), jar, logger)
687 p.skip_to_end('footer')
688 return jar
689
690
f59f5ef8
MB
691class _LinuxDesktopEnvironment(Enum):
692 """
693 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.h
694 DesktopEnvironment
695 """
696 OTHER = auto()
697 CINNAMON = auto()
b38d4c94 698 DEEPIN = auto()
f59f5ef8 699 GNOME = auto()
b38d4c94
MB
700 KDE3 = auto()
701 KDE4 = auto()
702 KDE5 = auto()
703 KDE6 = auto()
f59f5ef8 704 PANTHEON = auto()
b38d4c94 705 UKUI = auto()
f59f5ef8
MB
706 UNITY = auto()
707 XFCE = auto()
b38d4c94 708 LXQT = auto()
982ee69a
MB
709
710
f59f5ef8
MB
711class _LinuxKeyring(Enum):
712 """
b38d4c94 713 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_util_linux.h
f59f5ef8
MB
714 SelectedLinuxBackend
715 """
2e023649 716 KWALLET = auto() # KDE4
b38d4c94
MB
717 KWALLET5 = auto()
718 KWALLET6 = auto()
2e023649 719 GNOMEKEYRING = auto()
720 BASICTEXT = auto()
f59f5ef8
MB
721
722
723SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys()
724
725
b38d4c94 726def _get_linux_desktop_environment(env, logger):
f59f5ef8
MB
727 """
728 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc
729 GetDesktopEnvironment
730 """
731 xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
732 desktop_session = env.get('DESKTOP_SESSION', None)
733 if xdg_current_desktop is not None:
734 xdg_current_desktop = xdg_current_desktop.split(':')[0].strip()
735
736 if xdg_current_desktop == 'Unity':
737 if desktop_session is not None and 'gnome-fallback' in desktop_session:
738 return _LinuxDesktopEnvironment.GNOME
739 else:
740 return _LinuxDesktopEnvironment.UNITY
b38d4c94
MB
741 elif xdg_current_desktop == 'Deepin':
742 return _LinuxDesktopEnvironment.DEEPIN
f59f5ef8
MB
743 elif xdg_current_desktop == 'GNOME':
744 return _LinuxDesktopEnvironment.GNOME
745 elif xdg_current_desktop == 'X-Cinnamon':
746 return _LinuxDesktopEnvironment.CINNAMON
747 elif xdg_current_desktop == 'KDE':
b38d4c94
MB
748 kde_version = env.get('KDE_SESSION_VERSION', None)
749 if kde_version == '5':
750 return _LinuxDesktopEnvironment.KDE5
751 elif kde_version == '6':
752 return _LinuxDesktopEnvironment.KDE6
753 elif kde_version == '4':
754 return _LinuxDesktopEnvironment.KDE4
755 else:
756 logger.info(f'unknown KDE version: "{kde_version}". Assuming KDE4')
757 return _LinuxDesktopEnvironment.KDE4
f59f5ef8
MB
758 elif xdg_current_desktop == 'Pantheon':
759 return _LinuxDesktopEnvironment.PANTHEON
760 elif xdg_current_desktop == 'XFCE':
761 return _LinuxDesktopEnvironment.XFCE
b38d4c94
MB
762 elif xdg_current_desktop == 'UKUI':
763 return _LinuxDesktopEnvironment.UKUI
764 elif xdg_current_desktop == 'LXQt':
765 return _LinuxDesktopEnvironment.LXQT
766 else:
767 logger.info(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"')
768
f59f5ef8 769 elif desktop_session is not None:
b38d4c94
MB
770 if desktop_session == 'deepin':
771 return _LinuxDesktopEnvironment.DEEPIN
772 elif desktop_session in ('mate', 'gnome'):
f59f5ef8 773 return _LinuxDesktopEnvironment.GNOME
b38d4c94
MB
774 elif desktop_session in ('kde4', 'kde-plasma'):
775 return _LinuxDesktopEnvironment.KDE4
776 elif desktop_session == 'kde':
777 if 'KDE_SESSION_VERSION' in env:
778 return _LinuxDesktopEnvironment.KDE4
779 else:
780 return _LinuxDesktopEnvironment.KDE3
781 elif 'xfce' in desktop_session or desktop_session == 'xubuntu':
f59f5ef8 782 return _LinuxDesktopEnvironment.XFCE
b38d4c94
MB
783 elif desktop_session == 'ukui':
784 return _LinuxDesktopEnvironment.UKUI
785 else:
786 logger.info(f'DESKTOP_SESSION is set to an unknown value: "{desktop_session}"')
787
f59f5ef8
MB
788 else:
789 if 'GNOME_DESKTOP_SESSION_ID' in env:
790 return _LinuxDesktopEnvironment.GNOME
791 elif 'KDE_FULL_SESSION' in env:
b38d4c94
MB
792 if 'KDE_SESSION_VERSION' in env:
793 return _LinuxDesktopEnvironment.KDE4
794 else:
795 return _LinuxDesktopEnvironment.KDE3
fa8fd951 796 return _LinuxDesktopEnvironment.OTHER
f59f5ef8
MB
797
798
799def _choose_linux_keyring(logger):
800 """
b38d4c94
MB
801 SelectBackend in [1]
802
803 There is currently support for forcing chromium to use BASIC_TEXT by creating a file called
804 `Disable Local Encryption` [1] in the user data dir. The function to write this file (`WriteBackendUse()` [1])
805 does not appear to be called anywhere other than in tests, so the user would have to create this file manually
806 and so would be aware enough to tell yt-dlp to use the BASIC_TEXT keyring.
807
808 References:
809 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_util_linux.cc
f59f5ef8 810 """
b38d4c94 811 desktop_environment = _get_linux_desktop_environment(os.environ, logger)
86e5f3ed 812 logger.debug(f'detected desktop environment: {desktop_environment.name}')
b38d4c94 813 if desktop_environment == _LinuxDesktopEnvironment.KDE4:
2e023649 814 linux_keyring = _LinuxKeyring.KWALLET
b38d4c94
MB
815 elif desktop_environment == _LinuxDesktopEnvironment.KDE5:
816 linux_keyring = _LinuxKeyring.KWALLET5
817 elif desktop_environment == _LinuxDesktopEnvironment.KDE6:
818 linux_keyring = _LinuxKeyring.KWALLET6
819 elif desktop_environment in (
820 _LinuxDesktopEnvironment.KDE3, _LinuxDesktopEnvironment.LXQT, _LinuxDesktopEnvironment.OTHER
821 ):
2e023649 822 linux_keyring = _LinuxKeyring.BASICTEXT
982ee69a 823 else:
2e023649 824 linux_keyring = _LinuxKeyring.GNOMEKEYRING
f59f5ef8
MB
825 return linux_keyring
826
827
b38d4c94 828def _get_kwallet_network_wallet(keyring, logger):
f59f5ef8
MB
829 """ The name of the wallet used to store network passwords.
830
b38d4c94 831 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/kwallet_dbus.cc
f59f5ef8
MB
832 KWalletDBus::NetworkWallet
833 which does a dbus call to the following function:
834 https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html
835 Wallet::NetworkWallet
836 """
837 default_wallet = 'kdewallet'
838 try:
2e023649 839 if keyring == _LinuxKeyring.KWALLET:
b38d4c94
MB
840 service_name = 'org.kde.kwalletd'
841 wallet_path = '/modules/kwalletd'
842 elif keyring == _LinuxKeyring.KWALLET5:
843 service_name = 'org.kde.kwalletd5'
844 wallet_path = '/modules/kwalletd5'
845 elif keyring == _LinuxKeyring.KWALLET6:
846 service_name = 'org.kde.kwalletd6'
847 wallet_path = '/modules/kwalletd6'
848 else:
849 raise ValueError(keyring)
850
f0c9fb96 851 stdout, _, returncode = Popen.run([
f59f5ef8 852 'dbus-send', '--session', '--print-reply=literal',
b38d4c94
MB
853 f'--dest={service_name}',
854 wallet_path,
f59f5ef8 855 'org.kde.KWallet.networkWallet'
f0c9fb96 856 ], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
f59f5ef8 857
f0c9fb96 858 if returncode:
f59f5ef8
MB
859 logger.warning('failed to read NetworkWallet')
860 return default_wallet
861 else:
f0c9fb96 862 logger.debug(f'NetworkWallet = "{stdout.strip()}"')
863 return stdout.strip()
a44ca5a4 864 except Exception as e:
86e5f3ed 865 logger.warning(f'exception while obtaining NetworkWallet: {e}')
f59f5ef8
MB
866 return default_wallet
867
868
b38d4c94
MB
869def _get_kwallet_password(browser_keyring_name, keyring, logger):
870 logger.debug(f'using kwallet-query to obtain password from {keyring.name}')
f59f5ef8
MB
871
872 if shutil.which('kwallet-query') is None:
873 logger.error('kwallet-query command not found. KWallet and kwallet-query '
874 'must be installed to read from KWallet. kwallet-query should be'
875 'included in the kwallet package for your distribution')
876 return b''
877
b38d4c94 878 network_wallet = _get_kwallet_network_wallet(keyring, logger)
f59f5ef8
MB
879
880 try:
f0c9fb96 881 stdout, _, returncode = Popen.run([
f59f5ef8 882 'kwallet-query',
86e5f3ed 883 '--read-password', f'{browser_keyring_name} Safe Storage',
884 '--folder', f'{browser_keyring_name} Keys',
f59f5ef8
MB
885 network_wallet
886 ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
887
f0c9fb96 888 if returncode:
889 logger.error(f'kwallet-query failed with return code {returncode}. '
890 'Please consult the kwallet-query man page for details')
f59f5ef8
MB
891 return b''
892 else:
893 if stdout.lower().startswith(b'failed to read'):
894 logger.debug('failed to read password from kwallet. Using empty string instead')
895 # this sometimes occurs in KDE because chrome does not check hasEntry and instead
896 # just tries to read the value (which kwallet returns "") whereas kwallet-query
897 # checks hasEntry. To verify this:
898 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
899 # while starting chrome.
b38d4c94
MB
900 # this was identified as a bug later and fixed in
901 # https://chromium.googlesource.com/chromium/src/+/bbd54702284caca1f92d656fdcadf2ccca6f4165%5E%21/#F0
902 # https://chromium.googlesource.com/chromium/src/+/5463af3c39d7f5b6d11db7fbd51e38cc1974d764
f59f5ef8
MB
903 return b''
904 else:
905 logger.debug('password found')
f0c9fb96 906 return stdout.rstrip(b'\n')
a44ca5a4 907 except Exception as e:
908 logger.warning(f'exception running kwallet-query: {error_to_str(e)}')
f59f5ef8
MB
909 return b''
910
911
912def _get_gnome_keyring_password(browser_keyring_name, logger):
9b8ee23b 913 if not secretstorage:
914 logger.error(f'secretstorage not available {_SECRETSTORAGE_UNAVAILABLE_REASON}')
f59f5ef8
MB
915 return b''
916 # the Gnome keyring does not seem to organise keys in the same way as KWallet,
917 # using `dbus-monitor` during startup, it can be observed that chromium lists all keys
918 # and presumably searches for its key in the list. It appears that we must do the same.
919 # https://github.com/jaraco/keyring/issues/556
920 with contextlib.closing(secretstorage.dbus_init()) as con:
921 col = secretstorage.get_default_collection(con)
922 for item in col.get_all_items():
86e5f3ed 923 if item.get_label() == f'{browser_keyring_name} Safe Storage':
f59f5ef8
MB
924 return item.get_secret()
925 else:
926 logger.error('failed to read from keyring')
927 return b''
928
929
930def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
931 # note: chrome/chromium can be run with the following flags to determine which keyring backend
932 # it has chosen to use
933 # chromium --enable-logging=stderr --v=1 2>&1 | grep key_storage_
934 # Chromium supports a flag: --password-store=<basic|gnome|kwallet> so the automatic detection
935 # will not be sufficient in all cases.
936
2c539d49 937 keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger)
f59f5ef8
MB
938 logger.debug(f'Chosen keyring: {keyring.name}')
939
2e023649 940 if keyring in (_LinuxKeyring.KWALLET, _LinuxKeyring.KWALLET5, _LinuxKeyring.KWALLET6):
b38d4c94 941 return _get_kwallet_password(browser_keyring_name, keyring, logger)
2e023649 942 elif keyring == _LinuxKeyring.GNOMEKEYRING:
f59f5ef8 943 return _get_gnome_keyring_password(browser_keyring_name, logger)
2e023649 944 elif keyring == _LinuxKeyring.BASICTEXT:
f59f5ef8
MB
945 # when basic text is chosen, all cookies are stored as v10 (so no keyring password is required)
946 return None
947 assert False, f'Unknown keyring {keyring}'
948
949
950def _get_mac_keyring_password(browser_keyring_name, logger):
951 logger.debug('using find-generic-password to obtain password from OSX keychain')
952 try:
46a5b335 953 stdout, _, returncode = Popen.run(
d3c93ec2 954 ['security', 'find-generic-password',
955 '-w', # write password to stdout
956 '-a', browser_keyring_name, # match 'account'
86e5f3ed 957 '-s', f'{browser_keyring_name} Safe Storage'], # match 'service'
d3c93ec2 958 stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
46a5b335 959 if returncode:
960 logger.warning('find-generic-password failed')
961 return None
f0c9fb96 962 return stdout.rstrip(b'\n')
a44ca5a4 963 except Exception as e:
964 logger.warning(f'exception running find-generic-password: {error_to_str(e)}')
f59f5ef8 965 return None
982ee69a
MB
966
967
968def _get_windows_v10_key(browser_root, logger):
b38d4c94
MB
969 """
970 References:
971 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
972 """
cbed249a 973 path = _newest(_find_files(browser_root, 'Local State', logger))
982ee69a
MB
974 if path is None:
975 logger.error('could not find local state file')
976 return None
97ec5bc5 977 logger.debug(f'Found local state file at "{path}"')
86e5f3ed 978 with open(path, encoding='utf8') as f:
982ee69a
MB
979 data = json.load(f)
980 try:
b38d4c94 981 # kOsCryptEncryptedKeyPrefName in [1]
982ee69a
MB
982 base64_key = data['os_crypt']['encrypted_key']
983 except KeyError:
984 logger.error('no encrypted key in Local State')
985 return None
14f25df2 986 encrypted_key = base64.b64decode(base64_key)
b38d4c94 987 # kDPAPIKeyPrefix in [1]
982ee69a
MB
988 prefix = b'DPAPI'
989 if not encrypted_key.startswith(prefix):
990 logger.error('invalid key')
991 return None
992 return _decrypt_windows_dpapi(encrypted_key[len(prefix):], logger)
993
994
995def pbkdf2_sha1(password, salt, iterations, key_length):
996 return pbkdf2_hmac('sha1', password, salt, iterations, key_length)
997
998
b38d4c94
MB
999def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' ' * 16):
1000 for key in keys:
1001 plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
1002 try:
1003 return plaintext.decode()
1004 except UnicodeDecodeError:
1005 pass
1006 logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
1007 return None
982ee69a
MB
1008
1009
1010def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
982ee69a 1011 try:
09906f55 1012 plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce)
982ee69a 1013 except ValueError:
f440b14f 1014 logger.warning('failed to decrypt cookie (AES-GCM) because the MAC check failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
1015 return None
1016
1017 try:
0f06bcd7 1018 return plaintext.decode()
982ee69a 1019 except UnicodeDecodeError:
f440b14f 1020 logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
1021 return None
1022
1023
1024def _decrypt_windows_dpapi(ciphertext, logger):
1025 """
1026 References:
1027 - https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata
1028 """
fe0918bb 1029
1030 import ctypes
1031 import ctypes.wintypes
982ee69a
MB
1032
1033 class DATA_BLOB(ctypes.Structure):
fe0918bb 1034 _fields_ = [('cbData', ctypes.wintypes.DWORD),
982ee69a
MB
1035 ('pbData', ctypes.POINTER(ctypes.c_char))]
1036
1037 buffer = ctypes.create_string_buffer(ciphertext)
1038 blob_in = DATA_BLOB(ctypes.sizeof(buffer), buffer)
1039 blob_out = DATA_BLOB()
1040 ret = ctypes.windll.crypt32.CryptUnprotectData(
1041 ctypes.byref(blob_in), # pDataIn
1042 None, # ppszDataDescr: human readable description of pDataIn
1043 None, # pOptionalEntropy: salt?
1044 None, # pvReserved: must be NULL
1045 None, # pPromptStruct: information about prompts to display
1046 0, # dwFlags
1047 ctypes.byref(blob_out) # pDataOut
1048 )
1049 if not ret:
f9be9cb9 1050 logger.warning('failed to decrypt with DPAPI', only_once=True)
982ee69a
MB
1051 return None
1052
1053 result = ctypes.string_at(blob_out.pbData, blob_out.cbData)
1054 ctypes.windll.kernel32.LocalFree(blob_out.pbData)
1055 return result
1056
1057
1058def _config_home():
1059 return os.environ.get('XDG_CONFIG_HOME', os.path.expanduser('~/.config'))
1060
1061
1062def _open_database_copy(database_path, tmpdir):
1063 # cannot open sqlite databases if they are already in use (e.g. by the browser)
1064 database_copy_path = os.path.join(tmpdir, 'temporary.sqlite')
1065 shutil.copy(database_path, database_copy_path)
1066 conn = sqlite3.connect(database_copy_path)
1067 return conn.cursor()
1068
1069
1070def _get_column_names(cursor, table_name):
86e5f3ed 1071 table_info = cursor.execute(f'PRAGMA table_info({table_name})').fetchall()
0f06bcd7 1072 return [row[1].decode() for row in table_info]
982ee69a
MB
1073
1074
cbed249a
SS
1075def _newest(files):
1076 return max(files, key=lambda path: os.lstat(path).st_mtime, default=None)
1077
1078
1079def _find_files(root, filename, logger):
982ee69a 1080 # if there are multiple browser profiles, take the most recently used one
cbed249a 1081 i = 0
97ec5bc5 1082 with _create_progress_bar(logger) as progress_bar:
cbed249a 1083 for curr_root, _, files in os.walk(root):
97ec5bc5 1084 for file in files:
1085 i += 1
1086 progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched')
1087 if file == filename:
cbed249a 1088 yield os.path.join(curr_root, file)
982ee69a
MB
1089
1090
1091def _merge_cookie_jars(jars):
1092 output_jar = YoutubeDLCookieJar()
1093 for jar in jars:
1094 for cookie in jar:
1095 output_jar.set_cookie(cookie)
1096 if jar.filename is not None:
1097 output_jar.filename = jar.filename
1098 return output_jar
1099
1100
1101def _is_path(value):
cbed249a 1102 return any(sep in value for sep in (os.path.sep, os.path.altsep) if sep)
982ee69a
MB
1103
1104
9bd13fe5 1105def _parse_browser_specification(browser_name, profile=None, keyring=None, container=None):
982ee69a
MB
1106 if browser_name not in SUPPORTED_BROWSERS:
1107 raise ValueError(f'unsupported browser: "{browser_name}"')
f59f5ef8
MB
1108 if keyring not in (None, *SUPPORTED_KEYRINGS):
1109 raise ValueError(f'unsupported keyring: "{keyring}"')
2b24afa6 1110 if profile is not None and _is_path(expand_path(profile)):
1111 profile = expand_path(profile)
9bd13fe5 1112 return browser_name, profile, keyring, container
8817a80d
SS
1113
1114
1115class LenientSimpleCookie(http.cookies.SimpleCookie):
1116 """More lenient version of http.cookies.SimpleCookie"""
1117 # From https://github.com/python/cpython/blob/v3.10.7/Lib/http/cookies.py
36069409
SS
1118 # We use Morsel's legal key chars to avoid errors on setting values
1119 _LEGAL_KEY_CHARS = r'\w\d' + re.escape('!#$%&\'*+-.:^_`|~')
1120 _LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + re.escape('(),/<=>?@[]{}')
8817a80d
SS
1121
1122 _RESERVED = {
1123 "expires",
1124 "path",
1125 "comment",
1126 "domain",
1127 "max-age",
1128 "secure",
1129 "httponly",
1130 "version",
1131 "samesite",
1132 }
1133
1134 _FLAGS = {"secure", "httponly"}
1135
1136 # Added 'bad' group to catch the remaining value
1137 _COOKIE_PATTERN = re.compile(r"""
1138 \s* # Optional whitespace at start of cookie
1139 (?P<key> # Start of group 'key'
1140 [""" + _LEGAL_KEY_CHARS + r"""]+?# Any word of at least one letter
1141 ) # End of group 'key'
1142 ( # Optional group: there may not be a value.
1143 \s*=\s* # Equal Sign
1144 ( # Start of potential value
1145 (?P<val> # Start of group 'val'
1146 "(?:[^\\"]|\\.)*" # Any doublequoted string
1147 | # or
1148 \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr
1149 | # or
1150 [""" + _LEGAL_VALUE_CHARS + r"""]* # Any word or empty string
1151 ) # End of group 'val'
1152 | # or
1153 (?P<bad>(?:\\;|[^;])*?) # 'bad' group fallback for invalid values
1154 ) # End of potential value
1155 )? # End of optional value group
1156 \s* # Any number of spaces.
1157 (\s+|;|$) # Ending either at space, semicolon, or EOS.
1158 """, re.ASCII | re.VERBOSE)
1159
1160 def load(self, data):
1161 # Workaround for https://github.com/yt-dlp/yt-dlp/issues/4776
1162 if not isinstance(data, str):
1163 return super().load(data)
1164
1165 morsel = None
36069409
SS
1166 for match in self._COOKIE_PATTERN.finditer(data):
1167 if match.group('bad'):
8817a80d
SS
1168 morsel = None
1169 continue
1170
36069409 1171 key, value = match.group('key', 'val')
8817a80d 1172
36069409
SS
1173 is_attribute = False
1174 if key.startswith('$'):
1175 key = key[1:]
1176 is_attribute = True
8817a80d
SS
1177
1178 lower_key = key.lower()
1179 if lower_key in self._RESERVED:
1180 if morsel is None:
1181 continue
1182
1183 if value is None:
1184 if lower_key not in self._FLAGS:
1185 morsel = None
1186 continue
1187 value = True
1188 else:
1189 value, _ = self.value_decode(value)
1190
1191 morsel[key] = value
1192
36069409
SS
1193 elif is_attribute:
1194 morsel = None
1195
8817a80d
SS
1196 elif value is not None:
1197 morsel = self.get(key, http.cookies.Morsel())
1198 real_value, coded_value = self.value_decode(value)
1199 morsel.set(key, real_value, coded_value)
1200 self[key] = morsel
1201
1202 else:
1203 morsel = None
b87e01c1 1204
1205
1206class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
1207 """
1208 See [1] for cookie file format.
1209
1210 1. https://curl.haxx.se/docs/http-cookies.html
1211 """
1212 _HTTPONLY_PREFIX = '#HttpOnly_'
1213 _ENTRY_LEN = 7
1214 _HEADER = '''# Netscape HTTP Cookie File
1215# This file is generated by yt-dlp. Do not edit.
1216
1217'''
1218 _CookieFileEntry = collections.namedtuple(
1219 'CookieFileEntry',
1220 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
1221
1222 def __init__(self, filename=None, *args, **kwargs):
1223 super().__init__(None, *args, **kwargs)
1224 if is_path_like(filename):
1225 filename = os.fspath(filename)
1226 self.filename = filename
1227
1228 @staticmethod
1229 def _true_or_false(cndn):
1230 return 'TRUE' if cndn else 'FALSE'
1231
1232 @contextlib.contextmanager
1233 def open(self, file, *, write=False):
1234 if is_path_like(file):
1235 with open(file, 'w' if write else 'r', encoding='utf-8') as f:
1236 yield f
1237 else:
1238 if write:
1239 file.truncate(0)
1240 yield file
1241
62b5c94c 1242 def _really_save(self, f, ignore_discard, ignore_expires):
b87e01c1 1243 now = time.time()
1244 for cookie in self:
1245 if (not ignore_discard and cookie.discard
1246 or not ignore_expires and cookie.is_expired(now)):
1247 continue
1248 name, value = cookie.name, cookie.value
1249 if value is None:
1250 # cookies.txt regards 'Set-Cookie: foo' as a cookie
1251 # with no name, whereas http.cookiejar regards it as a
1252 # cookie with no value.
1253 name, value = '', name
1254 f.write('%s\n' % '\t'.join((
1255 cookie.domain,
1256 self._true_or_false(cookie.domain.startswith('.')),
1257 cookie.path,
1258 self._true_or_false(cookie.secure),
1259 str_or_none(cookie.expires, default=''),
1260 name, value
1261 )))
1262
62b5c94c 1263 def save(self, filename=None, ignore_discard=True, ignore_expires=True):
b87e01c1 1264 """
1265 Save cookies to a file.
1266 Code is taken from CPython 3.6
1267 https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """
1268
1269 if filename is None:
1270 if self.filename is not None:
1271 filename = self.filename
1272 else:
1273 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
1274
1275 # Store session cookies with `expires` set to 0 instead of an empty string
1276 for cookie in self:
1277 if cookie.expires is None:
1278 cookie.expires = 0
1279
1280 with self.open(filename, write=True) as f:
1281 f.write(self._HEADER)
62b5c94c 1282 self._really_save(f, ignore_discard, ignore_expires)
b87e01c1 1283
62b5c94c 1284 def load(self, filename=None, ignore_discard=True, ignore_expires=True):
b87e01c1 1285 """Load cookies from a file."""
1286 if filename is None:
1287 if self.filename is not None:
1288 filename = self.filename
1289 else:
1290 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
1291
1292 def prepare_line(line):
1293 if line.startswith(self._HTTPONLY_PREFIX):
1294 line = line[len(self._HTTPONLY_PREFIX):]
1295 # comments and empty lines are fine
1296 if line.startswith('#') or not line.strip():
1297 return line
1298 cookie_list = line.split('\t')
1299 if len(cookie_list) != self._ENTRY_LEN:
1300 raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list))
1301 cookie = self._CookieFileEntry(*cookie_list)
1302 if cookie.expires_at and not cookie.expires_at.isdigit():
1303 raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
1304 return line
1305
1306 cf = io.StringIO()
1307 with self.open(filename) as f:
1308 for line in f:
1309 try:
1310 cf.write(prepare_line(line))
1311 except http.cookiejar.LoadError as e:
1312 if f'{line.strip()} '[0] in '[{"':
1313 raise http.cookiejar.LoadError(
1314 'Cookies file must be Netscape formatted, not JSON. See '
1315 'https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp')
1316 write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
1317 continue
1318 cf.seek(0)
1319 self._really_load(cf, filename, ignore_discard, ignore_expires)
1320 # Session cookies are denoted by either `expires` field set to
1321 # an empty string or 0. MozillaCookieJar only recognizes the former
1322 # (see [1]). So we need force the latter to be recognized as session
1323 # cookies on our own.
1324 # Session cookies may be important for cookies-based authentication,
1325 # e.g. usually, when user does not check 'Remember me' check box while
1326 # logging in on a site, some important cookies are stored as session
1327 # cookies so that not recognizing them will result in failed login.
1328 # 1. https://bugs.python.org/issue17164
1329 for cookie in self:
1330 # Treat `expires=0` cookies as session cookies
1331 if cookie.expires == 0:
1332 cookie.expires = None
1333 cookie.discard = True
1334
1335 def get_cookie_header(self, url):
1336 """Generate a Cookie HTTP header for a given url"""
4bf91228 1337 cookie_req = urllib.request.Request(normalize_url(sanitize_url(url)))
b87e01c1 1338 self.add_cookie_header(cookie_req)
1339 return cookie_req.get_header('Cookie')
ad54c913 1340
1ceb657b 1341 def get_cookies_for_url(self, url):
1342 """Generate a list of Cookie objects for a given url"""
1343 # Policy `_now` attribute must be set before calling `_cookies_for_request`
1344 # Ref: https://github.com/python/cpython/blob/3.7/Lib/http/cookiejar.py#L1360
1345 self._policy._now = self._now = int(time.time())
4bf91228 1346 return self._cookies_for_request(urllib.request.Request(normalize_url(sanitize_url(url))))
1ceb657b 1347
ad54c913 1348 def clear(self, *args, **kwargs):
1349 with contextlib.suppress(KeyError):
1350 return super().clear(*args, **kwargs)