]> jfr.im git - yt-dlp.git/blame - yt_dlp/cookies.py
[ie/brightcove] Upgrade requests to HTTPS (#10202)
[yt-dlp.git] / yt_dlp / cookies.py
CommitLineData
14f25df2 1import base64
b87e01c1 2import collections
f59f5ef8 3import contextlib
c305a25c 4import datetime as dt
cbed249a 5import glob
54007a45 6import http.cookiejar
8817a80d 7import http.cookies
b87e01c1 8import io
982ee69a
MB
9import json
10import os
9bd13fe5 11import re
982ee69a 12import shutil
982ee69a
MB
13import struct
14import subprocess
15import sys
16import tempfile
2e4585da 17import time
b87e01c1 18import urllib.request
f59f5ef8 19from enum import Enum, auto
982ee69a
MB
20from hashlib import pbkdf2_hmac
21
1d3586d0 22from .aes import (
23 aes_cbc_decrypt_bytes,
24 aes_gcm_decrypt_and_verify_bytes,
25 unpad_pkcs7,
26)
2792092a
SS
27from .compat import functools # isort: split
28from .compat import compat_os_name
9b8ee23b 29from .dependencies import (
30 _SECRETSTORAGE_UNAVAILABLE_REASON,
31 secretstorage,
32 sqlite3,
33)
97ec5bc5 34from .minicurses import MultilinePrinter, QuietMultilinePrinter
d2c8aadf 35from .utils import (
2792092a 36 DownloadError,
d2c8aadf 37 Popen,
d2c8aadf 38 error_to_str,
39 expand_path,
22df97f9 40 is_path_like,
b87e01c1 41 sanitize_url,
42 str_or_none,
d2c8aadf 43 try_call,
b87e01c1 44 write_string,
d2c8aadf 45)
1b392f90 46from .utils._utils import _YDLLogger
4bf91228 47from .utils.networking import normalize_url
982ee69a 48
dd9ad97b 49CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi', 'whale'}
982ee69a
MB
50SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
51
52
1b392f90 53class YDLLogger(_YDLLogger):
54 def warning(self, message, only_once=False): # compat
55 return super().warning(message, once=only_once)
982ee69a 56
2e4585da 57 class ProgressBar(MultilinePrinter):
58 _DELAY, _timer = 0.1, 0
59
60 def print(self, message):
61 if time.time() - self._timer > self._DELAY:
62 self.print_at_line(f'[Cookies] {message}', 0)
63 self._timer = time.time()
64
97ec5bc5 65 def progress_bar(self):
66 """Return a context manager with a print method. (Optional)"""
67 # Do not print to files/pipes, loggers, or when --no-progress is used
68 if not self._ydl or self._ydl.params.get('noprogress') or self._ydl.params.get('logger'):
69 return
591bb9d3 70 file = self._ydl._out_files.error
97ec5bc5 71 try:
72 if not file.isatty():
73 return
74 except BaseException:
75 return
2e4585da 76 return self.ProgressBar(file, preserve_output=False)
97ec5bc5 77
78
79def _create_progress_bar(logger):
80 if hasattr(logger, 'progress_bar'):
81 printer = logger.progress_bar()
82 if printer:
83 return printer
84 printer = QuietMultilinePrinter()
85 printer.print = lambda _: None
86 return printer
87
982ee69a
MB
88
89def load_cookies(cookie_file, browser_specification, ydl):
90 cookie_jars = []
91 if browser_specification is not None:
9bd13fe5 92 browser_name, profile, keyring, container = _parse_browser_specification(*browser_specification)
93 cookie_jars.append(
94 extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring, container=container))
982ee69a
MB
95
96 if cookie_file is not None:
22df97f9 97 is_filename = is_path_like(cookie_file)
d76fa1f3 98 if is_filename:
99 cookie_file = expand_path(cookie_file)
100
982ee69a 101 jar = YoutubeDLCookieJar(cookie_file)
d76fa1f3 102 if not is_filename or os.access(cookie_file, os.R_OK):
62b5c94c 103 jar.load()
982ee69a
MB
104 cookie_jars.append(jar)
105
106 return _merge_cookie_jars(cookie_jars)
107
108
9bd13fe5 109def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None, container=None):
982ee69a 110 if browser_name == 'firefox':
9bd13fe5 111 return _extract_firefox_cookies(profile, container, logger)
982ee69a
MB
112 elif browser_name == 'safari':
113 return _extract_safari_cookies(profile, logger)
114 elif browser_name in CHROMIUM_BASED_BROWSERS:
f59f5ef8 115 return _extract_chrome_cookies(browser_name, profile, keyring, logger)
982ee69a 116 else:
86e5f3ed 117 raise ValueError(f'unknown browser: {browser_name}')
982ee69a
MB
118
119
9bd13fe5 120def _extract_firefox_cookies(profile, container, logger):
982ee69a 121 logger.info('Extracting cookies from firefox')
9b8ee23b 122 if not sqlite3:
767b02a9 123 logger.warning('Cannot extract cookies from firefox without sqlite3 support. '
47ab66db 124 'Please use a Python interpreter compiled with sqlite3 support')
767b02a9 125 return YoutubeDLCookieJar()
982ee69a
MB
126
127 if profile is None:
cbed249a 128 search_roots = list(_firefox_browser_dirs())
982ee69a 129 elif _is_path(profile):
cbed249a 130 search_roots = [profile]
982ee69a 131 else:
cbed249a
SS
132 search_roots = [os.path.join(path, profile) for path in _firefox_browser_dirs()]
133 search_root = ', '.join(map(repr, search_roots))
982ee69a 134
cbed249a 135 cookie_database_path = _newest(_firefox_cookie_dbs(search_roots))
825d3ce3 136 if cookie_database_path is None:
137 raise FileNotFoundError(f'could not find firefox cookies database in {search_root}')
138 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
139
9bd13fe5 140 container_id = None
825d3ce3 141 if container not in (None, 'none'):
142 containers_path = os.path.join(os.path.dirname(cookie_database_path), 'containers.json')
9bd13fe5 143 if not os.path.isfile(containers_path) or not os.access(containers_path, os.R_OK):
144 raise FileNotFoundError(f'could not read containers.json in {search_root}')
dab87ca2 145 with open(containers_path, encoding='utf8') as containers:
9bd13fe5 146 identities = json.load(containers).get('identities', [])
147 container_id = next((context.get('userContextId') for context in identities if container in (
148 context.get('name'),
add96eb9 149 try_call(lambda: re.fullmatch(r'userContext([^\.]+)\.label', context['l10nID']).group()),
9bd13fe5 150 )), None)
151 if not isinstance(container_id, int):
152 raise ValueError(f'could not find firefox container "{container}" in containers.json')
153
0930b11f 154 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
982ee69a
MB
155 cursor = None
156 try:
157 cursor = _open_database_copy(cookie_database_path, tmpdir)
9bd13fe5 158 if isinstance(container_id, int):
9bd13fe5 159 logger.debug(
160 f'Only loading cookies from firefox container "{container}", ID {container_id}')
9bd13fe5 161 cursor.execute(
825d3ce3 162 'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE originAttributes LIKE ? OR originAttributes LIKE ?',
163 (f'%userContextId={container_id}', f'%userContextId={container_id}&%'))
164 elif container == 'none':
165 logger.debug('Only loading cookies not belonging to any container')
166 cursor.execute(
167 'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE NOT INSTR(originAttributes,"userContextId=")')
168 else:
9bd13fe5 169 cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
982ee69a 170 jar = YoutubeDLCookieJar()
97ec5bc5 171 with _create_progress_bar(logger) as progress_bar:
172 table = cursor.fetchall()
173 total_cookie_count = len(table)
174 for i, (host, name, value, path, expiry, is_secure) in enumerate(table):
175 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
ac668111 176 cookie = http.cookiejar.Cookie(
97ec5bc5 177 version=0, name=name, value=value, port=None, port_specified=False,
178 domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'),
179 path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False,
180 comment=None, comment_url=None, rest={})
181 jar.set_cookie(cookie)
86e5f3ed 182 logger.info(f'Extracted {len(jar)} cookies from firefox')
982ee69a
MB
183 return jar
184 finally:
185 if cursor is not None:
186 cursor.connection.close()
187
188
cbed249a 189def _firefox_browser_dirs():
dec30912 190 if sys.platform in ('cygwin', 'win32'):
cbed249a
SS
191 yield os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles')
192
982ee69a 193 elif sys.platform == 'darwin':
cbed249a
SS
194 yield os.path.expanduser('~/Library/Application Support/Firefox/Profiles')
195
196 else:
2ab2651a
DM
197 yield from map(os.path.expanduser, (
198 '~/.mozilla/firefox',
199 '~/snap/firefox/common/.mozilla/firefox',
200 '~/.var/app/org.mozilla.firefox/.mozilla/firefox',
201 ))
cbed249a
SS
202
203
204def _firefox_cookie_dbs(roots):
205 for root in map(os.path.abspath, roots):
206 for pattern in ('', '*/', 'Profiles/*/'):
207 yield from glob.iglob(os.path.join(root, pattern, 'cookies.sqlite'))
982ee69a
MB
208
209
210def _get_chromium_based_browser_settings(browser_name):
211 # https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md
dec30912 212 if sys.platform in ('cygwin', 'win32'):
982ee69a
MB
213 appdata_local = os.path.expandvars('%LOCALAPPDATA%')
214 appdata_roaming = os.path.expandvars('%APPDATA%')
215 browser_dir = {
19a03940 216 'brave': os.path.join(appdata_local, R'BraveSoftware\Brave-Browser\User Data'),
217 'chrome': os.path.join(appdata_local, R'Google\Chrome\User Data'),
218 'chromium': os.path.join(appdata_local, R'Chromium\User Data'),
219 'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
220 'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
221 'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
dd9ad97b 222 'whale': os.path.join(appdata_local, R'Naver\Naver Whale\User Data'),
982ee69a
MB
223 }[browser_name]
224
225 elif sys.platform == 'darwin':
226 appdata = os.path.expanduser('~/Library/Application Support')
227 browser_dir = {
228 'brave': os.path.join(appdata, 'BraveSoftware/Brave-Browser'),
229 'chrome': os.path.join(appdata, 'Google/Chrome'),
230 'chromium': os.path.join(appdata, 'Chromium'),
231 'edge': os.path.join(appdata, 'Microsoft Edge'),
232 'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
233 'vivaldi': os.path.join(appdata, 'Vivaldi'),
dd9ad97b 234 'whale': os.path.join(appdata, 'Naver/Whale'),
982ee69a
MB
235 }[browser_name]
236
237 else:
dec30912
CMT
238 config = _config_home()
239 browser_dir = {
240 'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'),
241 'chrome': os.path.join(config, 'google-chrome'),
242 'chromium': os.path.join(config, 'chromium'),
243 'edge': os.path.join(config, 'microsoft-edge'),
244 'opera': os.path.join(config, 'opera'),
245 'vivaldi': os.path.join(config, 'vivaldi'),
dd9ad97b 246 'whale': os.path.join(config, 'naver-whale'),
dec30912 247 }[browser_name]
982ee69a
MB
248
249 # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
250 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
251 keyring_name = {
252 'brave': 'Brave',
253 'chrome': 'Chrome',
254 'chromium': 'Chromium',
29b208f6 255 'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
982ee69a
MB
256 'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
257 'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
dd9ad97b 258 'whale': 'Whale',
982ee69a
MB
259 }[browser_name]
260
261 browsers_without_profiles = {'opera'}
262
263 return {
264 'browser_dir': browser_dir,
265 'keyring_name': keyring_name,
add96eb9 266 'supports_profiles': browser_name not in browsers_without_profiles,
982ee69a
MB
267 }
268
269
f59f5ef8 270def _extract_chrome_cookies(browser_name, profile, keyring, logger):
86e5f3ed 271 logger.info(f'Extracting cookies from {browser_name}')
767b02a9 272
9b8ee23b 273 if not sqlite3:
19a03940 274 logger.warning(f'Cannot extract cookies from {browser_name} without sqlite3 support. '
47ab66db 275 'Please use a Python interpreter compiled with sqlite3 support')
767b02a9
MB
276 return YoutubeDLCookieJar()
277
982ee69a
MB
278 config = _get_chromium_based_browser_settings(browser_name)
279
280 if profile is None:
281 search_root = config['browser_dir']
282 elif _is_path(profile):
283 search_root = profile
284 config['browser_dir'] = os.path.dirname(profile) if config['supports_profiles'] else profile
285 else:
286 if config['supports_profiles']:
287 search_root = os.path.join(config['browser_dir'], profile)
288 else:
86e5f3ed 289 logger.error(f'{browser_name} does not support profiles')
982ee69a
MB
290 search_root = config['browser_dir']
291
cbed249a 292 cookie_database_path = _newest(_find_files(search_root, 'Cookies', logger))
982ee69a 293 if cookie_database_path is None:
86e5f3ed 294 raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"')
295 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
982ee69a 296
f59f5ef8 297 decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring)
982ee69a 298
0930b11f 299 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
982ee69a
MB
300 cursor = None
301 try:
302 cursor = _open_database_copy(cookie_database_path, tmpdir)
303 cursor.connection.text_factory = bytes
304 column_names = _get_column_names(cursor, 'cookies')
305 secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
19a03940 306 cursor.execute(f'SELECT host_key, name, value, encrypted_value, path, expires_utc, {secure_column} FROM cookies')
982ee69a
MB
307 jar = YoutubeDLCookieJar()
308 failed_cookies = 0
f59f5ef8 309 unencrypted_cookies = 0
97ec5bc5 310 with _create_progress_bar(logger) as progress_bar:
311 table = cursor.fetchall()
312 total_cookie_count = len(table)
313 for i, line in enumerate(table):
314 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
315 is_encrypted, cookie = _process_chrome_cookie(decryptor, *line)
316 if not cookie:
982ee69a
MB
317 failed_cookies += 1
318 continue
97ec5bc5 319 elif not is_encrypted:
320 unencrypted_cookies += 1
321 jar.set_cookie(cookie)
982ee69a 322 if failed_cookies > 0:
86e5f3ed 323 failed_message = f' ({failed_cookies} could not be decrypted)'
982ee69a
MB
324 else:
325 failed_message = ''
86e5f3ed 326 logger.info(f'Extracted {len(jar)} cookies from {browser_name}{failed_message}')
24146491 327 counts = decryptor._cookie_counts.copy()
f59f5ef8 328 counts['unencrypted'] = unencrypted_cookies
86e5f3ed 329 logger.debug(f'cookie version breakdown: {counts}')
982ee69a 330 return jar
2792092a
SS
331 except PermissionError as error:
332 if compat_os_name == 'nt' and error.errno == 13:
333 message = 'Could not copy Chrome cookie database. See https://github.com/yt-dlp/yt-dlp/issues/7271 for more info'
334 logger.error(message)
335 raise DownloadError(message) # force exit
336 raise
982ee69a
MB
337 finally:
338 if cursor is not None:
339 cursor.connection.close()
340
341
97ec5bc5 342def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, path, expires_utc, is_secure):
0f06bcd7 343 host_key = host_key.decode()
344 name = name.decode()
345 value = value.decode()
346 path = path.decode()
97ec5bc5 347 is_encrypted = not value and encrypted_value
348
349 if is_encrypted:
350 value = decryptor.decrypt(encrypted_value)
351 if value is None:
352 return is_encrypted, None
353
f1f15897
SL
354 # In chrome, session cookies have expires_utc set to 0
355 # In our cookie-store, cookies that do not expire should have expires set to None
356 if not expires_utc:
357 expires_utc = None
358
ac668111 359 return is_encrypted, http.cookiejar.Cookie(
97ec5bc5 360 version=0, name=name, value=value, port=None, port_specified=False,
361 domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
362 path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False,
363 comment=None, comment_url=None, rest={})
364
365
982ee69a
MB
366class ChromeCookieDecryptor:
367 """
368 Overview:
369
370 Linux:
371 - cookies are either v10 or v11
372 - v10: AES-CBC encrypted with a fixed key
b38d4c94 373 - also attempts empty password if decryption fails
982ee69a 374 - v11: AES-CBC encrypted with an OS protected key (keyring)
b38d4c94 375 - also attempts empty password if decryption fails
982ee69a
MB
376 - v11 keys can be stored in various places depending on the activate desktop environment [2]
377
378 Mac:
379 - cookies are either v10 or not v10
380 - v10: AES-CBC encrypted with an OS protected key (keyring) and more key derivation iterations than linux
381 - not v10: 'old data' stored as plaintext
382
383 Windows:
384 - cookies are either v10 or not v10
385 - v10: AES-GCM encrypted with a key which is encrypted with DPAPI
386 - not v10: encrypted with DPAPI
387
388 Sources:
389 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/
b38d4c94 390 - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_linux.cc
982ee69a
MB
391 - KeyStorageLinux::CreateService
392 """
393
0fa7d2c8 394 _cookie_counts = {}
982ee69a 395
0fa7d2c8 396 def decrypt(self, encrypted_value):
19a03940 397 raise NotImplementedError('Must be implemented by sub classes')
f59f5ef8 398
982ee69a 399
f59f5ef8 400def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None):
56ba69e4 401 if sys.platform == 'darwin':
982ee69a 402 return MacChromeCookieDecryptor(browser_keyring_name, logger)
56ba69e4 403 elif sys.platform in ('win32', 'cygwin'):
982ee69a 404 return WindowsChromeCookieDecryptor(browser_root, logger)
56ba69e4 405 return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring)
982ee69a
MB
406
407
408class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
f59f5ef8 409 def __init__(self, browser_keyring_name, logger, *, keyring=None):
982ee69a
MB
410 self._logger = logger
411 self._v10_key = self.derive_key(b'peanuts')
b38d4c94 412 self._empty_key = self.derive_key(b'')
f59f5ef8 413 self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
9b7a48ab
SS
414 self._browser_keyring_name = browser_keyring_name
415 self._keyring = keyring
416
417 @functools.cached_property
418 def _v11_key(self):
419 password = _get_linux_keyring_password(self._browser_keyring_name, self._keyring, self._logger)
420 return None if password is None else self.derive_key(password)
982ee69a
MB
421
422 @staticmethod
423 def derive_key(password):
424 # values from
b38d4c94 425 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_linux.cc
982ee69a
MB
426 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16)
427
428 def decrypt(self, encrypted_value):
b38d4c94
MB
429 """
430
431 following the same approach as the fix in [1]: if cookies fail to decrypt then attempt to decrypt
432 with an empty password. The failure detection is not the same as what chromium uses so the
433 results won't be perfect
434
435 References:
436 - [1] https://chromium.googlesource.com/chromium/src/+/bbd54702284caca1f92d656fdcadf2ccca6f4165%5E%21/
437 - a bugfix to try an empty password as a fallback
438 """
982ee69a
MB
439 version = encrypted_value[:3]
440 ciphertext = encrypted_value[3:]
441
442 if version == b'v10':
f59f5ef8 443 self._cookie_counts['v10'] += 1
b38d4c94 444 return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key, self._empty_key), self._logger)
982ee69a
MB
445
446 elif version == b'v11':
f59f5ef8 447 self._cookie_counts['v11'] += 1
982ee69a 448 if self._v11_key is None:
f59f5ef8 449 self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
982ee69a 450 return None
b38d4c94 451 return _decrypt_aes_cbc_multi(ciphertext, (self._v11_key, self._empty_key), self._logger)
982ee69a
MB
452
453 else:
b38d4c94 454 self._logger.warning(f'unknown cookie version: "{version}"', only_once=True)
f59f5ef8 455 self._cookie_counts['other'] += 1
982ee69a
MB
456 return None
457
458
459class MacChromeCookieDecryptor(ChromeCookieDecryptor):
460 def __init__(self, browser_keyring_name, logger):
461 self._logger = logger
f440b14f 462 password = _get_mac_keyring_password(browser_keyring_name, logger)
982ee69a 463 self._v10_key = None if password is None else self.derive_key(password)
f59f5ef8 464 self._cookie_counts = {'v10': 0, 'other': 0}
982ee69a
MB
465
466 @staticmethod
467 def derive_key(password):
468 # values from
b38d4c94 469 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_mac.mm
982ee69a
MB
470 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16)
471
472 def decrypt(self, encrypted_value):
473 version = encrypted_value[:3]
474 ciphertext = encrypted_value[3:]
475
476 if version == b'v10':
f59f5ef8 477 self._cookie_counts['v10'] += 1
982ee69a
MB
478 if self._v10_key is None:
479 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
480 return None
481
b38d4c94 482 return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key,), self._logger)
982ee69a
MB
483
484 else:
f59f5ef8 485 self._cookie_counts['other'] += 1
982ee69a 486 # other prefixes are considered 'old data' which were stored as plaintext
b38d4c94 487 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_mac.mm
982ee69a
MB
488 return encrypted_value
489
490
491class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
492 def __init__(self, browser_root, logger):
493 self._logger = logger
494 self._v10_key = _get_windows_v10_key(browser_root, logger)
f59f5ef8
MB
495 self._cookie_counts = {'v10': 0, 'other': 0}
496
982ee69a
MB
497 def decrypt(self, encrypted_value):
498 version = encrypted_value[:3]
499 ciphertext = encrypted_value[3:]
500
501 if version == b'v10':
f59f5ef8 502 self._cookie_counts['v10'] += 1
982ee69a
MB
503 if self._v10_key is None:
504 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
505 return None
982ee69a 506
b38d4c94 507 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
982ee69a
MB
508 # kNonceLength
509 nonce_length = 96 // 8
510 # boringssl
511 # EVP_AEAD_AES_GCM_TAG_LEN
512 authentication_tag_length = 16
513
514 raw_ciphertext = ciphertext
515 nonce = raw_ciphertext[:nonce_length]
516 ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length]
517 authentication_tag = raw_ciphertext[-authentication_tag_length:]
518
519 return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger)
520
521 else:
f59f5ef8 522 self._cookie_counts['other'] += 1
982ee69a 523 # any other prefix means the data is DPAPI encrypted
b38d4c94 524 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
0f06bcd7 525 return _decrypt_windows_dpapi(encrypted_value, self._logger).decode()
982ee69a
MB
526
527
528def _extract_safari_cookies(profile, logger):
982ee69a 529 if sys.platform != 'darwin':
86e5f3ed 530 raise ValueError(f'unsupported platform: {sys.platform}')
982ee69a 531
a58182b7
NV
532 if profile:
533 cookies_path = os.path.expanduser(profile)
534 if not os.path.isfile(cookies_path):
535 raise FileNotFoundError('custom safari cookies database not found')
536
537 else:
538 cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')
982ee69a 539
1f7db853 540 if not os.path.isfile(cookies_path):
a58182b7
NV
541 logger.debug('Trying secondary cookie location')
542 cookies_path = os.path.expanduser('~/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies')
543 if not os.path.isfile(cookies_path):
544 raise FileNotFoundError('could not find safari cookies database')
982ee69a
MB
545
546 with open(cookies_path, 'rb') as f:
547 cookies_data = f.read()
548
549 jar = parse_safari_cookies(cookies_data, logger=logger)
86e5f3ed 550 logger.info(f'Extracted {len(jar)} cookies from safari')
982ee69a
MB
551 return jar
552
553
554class ParserError(Exception):
555 pass
556
557
558class DataParser:
559 def __init__(self, data, logger):
560 self._data = data
561 self.cursor = 0
562 self._logger = logger
563
564 def read_bytes(self, num_bytes):
565 if num_bytes < 0:
86e5f3ed 566 raise ParserError(f'invalid read of {num_bytes} bytes')
982ee69a
MB
567 end = self.cursor + num_bytes
568 if end > len(self._data):
569 raise ParserError('reached end of input')
570 data = self._data[self.cursor:end]
571 self.cursor = end
572 return data
573
574 def expect_bytes(self, expected_value, message):
575 value = self.read_bytes(len(expected_value))
576 if value != expected_value:
86e5f3ed 577 raise ParserError(f'unexpected value: {value} != {expected_value} ({message})')
982ee69a
MB
578
579 def read_uint(self, big_endian=False):
580 data_format = '>I' if big_endian else '<I'
581 return struct.unpack(data_format, self.read_bytes(4))[0]
582
583 def read_double(self, big_endian=False):
584 data_format = '>d' if big_endian else '<d'
585 return struct.unpack(data_format, self.read_bytes(8))[0]
586
587 def read_cstring(self):
588 buffer = []
589 while True:
590 c = self.read_bytes(1)
591 if c == b'\x00':
0f06bcd7 592 return b''.join(buffer).decode()
982ee69a
MB
593 else:
594 buffer.append(c)
595
596 def skip(self, num_bytes, description='unknown'):
597 if num_bytes > 0:
19a03940 598 self._logger.debug(f'skipping {num_bytes} bytes ({description}): {self.read_bytes(num_bytes)!r}')
982ee69a 599 elif num_bytes < 0:
86e5f3ed 600 raise ParserError(f'invalid skip of {num_bytes} bytes')
982ee69a
MB
601
602 def skip_to(self, offset, description='unknown'):
603 self.skip(offset - self.cursor, description)
604
605 def skip_to_end(self, description='unknown'):
606 self.skip_to(len(self._data), description)
607
608
609def _mac_absolute_time_to_posix(timestamp):
c305a25c 610 return int((dt.datetime(2001, 1, 1, 0, 0, tzinfo=dt.timezone.utc) + dt.timedelta(seconds=timestamp)).timestamp())
982ee69a
MB
611
612
613def _parse_safari_cookies_header(data, logger):
614 p = DataParser(data, logger)
615 p.expect_bytes(b'cook', 'database signature')
616 number_of_pages = p.read_uint(big_endian=True)
617 page_sizes = [p.read_uint(big_endian=True) for _ in range(number_of_pages)]
618 return page_sizes, p.cursor
619
620
621def _parse_safari_cookies_page(data, jar, logger):
622 p = DataParser(data, logger)
623 p.expect_bytes(b'\x00\x00\x01\x00', 'page signature')
624 number_of_cookies = p.read_uint()
625 record_offsets = [p.read_uint() for _ in range(number_of_cookies)]
626 if number_of_cookies == 0:
86e5f3ed 627 logger.debug(f'a cookies page of size {len(data)} has no cookies')
982ee69a
MB
628 return
629
630 p.skip_to(record_offsets[0], 'unknown page header field')
631
97ec5bc5 632 with _create_progress_bar(logger) as progress_bar:
633 for i, record_offset in enumerate(record_offsets):
634 progress_bar.print(f'Loading cookie {i: 6d}/{number_of_cookies: 6d}')
635 p.skip_to(record_offset, 'space between records')
636 record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger)
637 p.read_bytes(record_length)
982ee69a
MB
638 p.skip_to_end('space in between pages')
639
640
641def _parse_safari_cookies_record(data, jar, logger):
642 p = DataParser(data, logger)
643 record_size = p.read_uint()
644 p.skip(4, 'unknown record field 1')
645 flags = p.read_uint()
646 is_secure = bool(flags & 0x0001)
647 p.skip(4, 'unknown record field 2')
648 domain_offset = p.read_uint()
649 name_offset = p.read_uint()
650 path_offset = p.read_uint()
651 value_offset = p.read_uint()
652 p.skip(8, 'unknown record field 3')
653 expiration_date = _mac_absolute_time_to_posix(p.read_double())
654 _creation_date = _mac_absolute_time_to_posix(p.read_double()) # noqa: F841
655
656 try:
657 p.skip_to(domain_offset)
658 domain = p.read_cstring()
659
660 p.skip_to(name_offset)
661 name = p.read_cstring()
662
663 p.skip_to(path_offset)
664 path = p.read_cstring()
665
666 p.skip_to(value_offset)
667 value = p.read_cstring()
668 except UnicodeDecodeError:
f440b14f 669 logger.warning('failed to parse Safari cookie because UTF-8 decoding failed', only_once=True)
982ee69a
MB
670 return record_size
671
672 p.skip_to(record_size, 'space at the end of the record')
673
ac668111 674 cookie = http.cookiejar.Cookie(
982ee69a
MB
675 version=0, name=name, value=value, port=None, port_specified=False,
676 domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'),
677 path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False,
678 comment=None, comment_url=None, rest={})
679 jar.set_cookie(cookie)
680 return record_size
681
682
683def parse_safari_cookies(data, jar=None, logger=YDLLogger()):
684 """
685 References:
686 - https://github.com/libyal/dtformats/blob/main/documentation/Safari%20Cookies.asciidoc
687 - this data appears to be out of date but the important parts of the database structure is the same
688 - there are a few bytes here and there which are skipped during parsing
689 """
690 if jar is None:
691 jar = YoutubeDLCookieJar()
692 page_sizes, body_start = _parse_safari_cookies_header(data, logger)
693 p = DataParser(data[body_start:], logger)
694 for page_size in page_sizes:
695 _parse_safari_cookies_page(p.read_bytes(page_size), jar, logger)
696 p.skip_to_end('footer')
697 return jar
698
699
f59f5ef8
MB
700class _LinuxDesktopEnvironment(Enum):
701 """
702 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.h
703 DesktopEnvironment
704 """
705 OTHER = auto()
706 CINNAMON = auto()
b38d4c94 707 DEEPIN = auto()
f59f5ef8 708 GNOME = auto()
b38d4c94
MB
709 KDE3 = auto()
710 KDE4 = auto()
711 KDE5 = auto()
712 KDE6 = auto()
f59f5ef8 713 PANTHEON = auto()
b38d4c94 714 UKUI = auto()
f59f5ef8
MB
715 UNITY = auto()
716 XFCE = auto()
b38d4c94 717 LXQT = auto()
982ee69a
MB
718
719
f59f5ef8
MB
720class _LinuxKeyring(Enum):
721 """
b38d4c94 722 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_util_linux.h
f59f5ef8
MB
723 SelectedLinuxBackend
724 """
2e023649 725 KWALLET = auto() # KDE4
b38d4c94
MB
726 KWALLET5 = auto()
727 KWALLET6 = auto()
2e023649 728 GNOMEKEYRING = auto()
729 BASICTEXT = auto()
f59f5ef8
MB
730
731
732SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys()
733
734
b38d4c94 735def _get_linux_desktop_environment(env, logger):
f59f5ef8
MB
736 """
737 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc
738 GetDesktopEnvironment
739 """
740 xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
741 desktop_session = env.get('DESKTOP_SESSION', None)
742 if xdg_current_desktop is not None:
743 xdg_current_desktop = xdg_current_desktop.split(':')[0].strip()
744
745 if xdg_current_desktop == 'Unity':
746 if desktop_session is not None and 'gnome-fallback' in desktop_session:
747 return _LinuxDesktopEnvironment.GNOME
748 else:
749 return _LinuxDesktopEnvironment.UNITY
b38d4c94
MB
750 elif xdg_current_desktop == 'Deepin':
751 return _LinuxDesktopEnvironment.DEEPIN
f59f5ef8
MB
752 elif xdg_current_desktop == 'GNOME':
753 return _LinuxDesktopEnvironment.GNOME
754 elif xdg_current_desktop == 'X-Cinnamon':
755 return _LinuxDesktopEnvironment.CINNAMON
756 elif xdg_current_desktop == 'KDE':
b38d4c94
MB
757 kde_version = env.get('KDE_SESSION_VERSION', None)
758 if kde_version == '5':
759 return _LinuxDesktopEnvironment.KDE5
760 elif kde_version == '6':
761 return _LinuxDesktopEnvironment.KDE6
762 elif kde_version == '4':
763 return _LinuxDesktopEnvironment.KDE4
764 else:
765 logger.info(f'unknown KDE version: "{kde_version}". Assuming KDE4')
766 return _LinuxDesktopEnvironment.KDE4
f59f5ef8
MB
767 elif xdg_current_desktop == 'Pantheon':
768 return _LinuxDesktopEnvironment.PANTHEON
769 elif xdg_current_desktop == 'XFCE':
770 return _LinuxDesktopEnvironment.XFCE
b38d4c94
MB
771 elif xdg_current_desktop == 'UKUI':
772 return _LinuxDesktopEnvironment.UKUI
773 elif xdg_current_desktop == 'LXQt':
774 return _LinuxDesktopEnvironment.LXQT
775 else:
776 logger.info(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"')
777
f59f5ef8 778 elif desktop_session is not None:
b38d4c94
MB
779 if desktop_session == 'deepin':
780 return _LinuxDesktopEnvironment.DEEPIN
781 elif desktop_session in ('mate', 'gnome'):
f59f5ef8 782 return _LinuxDesktopEnvironment.GNOME
b38d4c94
MB
783 elif desktop_session in ('kde4', 'kde-plasma'):
784 return _LinuxDesktopEnvironment.KDE4
785 elif desktop_session == 'kde':
786 if 'KDE_SESSION_VERSION' in env:
787 return _LinuxDesktopEnvironment.KDE4
788 else:
789 return _LinuxDesktopEnvironment.KDE3
790 elif 'xfce' in desktop_session or desktop_session == 'xubuntu':
f59f5ef8 791 return _LinuxDesktopEnvironment.XFCE
b38d4c94
MB
792 elif desktop_session == 'ukui':
793 return _LinuxDesktopEnvironment.UKUI
794 else:
795 logger.info(f'DESKTOP_SESSION is set to an unknown value: "{desktop_session}"')
796
f59f5ef8
MB
797 else:
798 if 'GNOME_DESKTOP_SESSION_ID' in env:
799 return _LinuxDesktopEnvironment.GNOME
800 elif 'KDE_FULL_SESSION' in env:
b38d4c94
MB
801 if 'KDE_SESSION_VERSION' in env:
802 return _LinuxDesktopEnvironment.KDE4
803 else:
804 return _LinuxDesktopEnvironment.KDE3
fa8fd951 805 return _LinuxDesktopEnvironment.OTHER
f59f5ef8
MB
806
807
808def _choose_linux_keyring(logger):
809 """
b38d4c94
MB
810 SelectBackend in [1]
811
812 There is currently support for forcing chromium to use BASIC_TEXT by creating a file called
813 `Disable Local Encryption` [1] in the user data dir. The function to write this file (`WriteBackendUse()` [1])
814 does not appear to be called anywhere other than in tests, so the user would have to create this file manually
815 and so would be aware enough to tell yt-dlp to use the BASIC_TEXT keyring.
816
817 References:
818 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_util_linux.cc
f59f5ef8 819 """
b38d4c94 820 desktop_environment = _get_linux_desktop_environment(os.environ, logger)
86e5f3ed 821 logger.debug(f'detected desktop environment: {desktop_environment.name}')
b38d4c94 822 if desktop_environment == _LinuxDesktopEnvironment.KDE4:
2e023649 823 linux_keyring = _LinuxKeyring.KWALLET
b38d4c94
MB
824 elif desktop_environment == _LinuxDesktopEnvironment.KDE5:
825 linux_keyring = _LinuxKeyring.KWALLET5
826 elif desktop_environment == _LinuxDesktopEnvironment.KDE6:
827 linux_keyring = _LinuxKeyring.KWALLET6
828 elif desktop_environment in (
add96eb9 829 _LinuxDesktopEnvironment.KDE3, _LinuxDesktopEnvironment.LXQT, _LinuxDesktopEnvironment.OTHER,
b38d4c94 830 ):
2e023649 831 linux_keyring = _LinuxKeyring.BASICTEXT
982ee69a 832 else:
2e023649 833 linux_keyring = _LinuxKeyring.GNOMEKEYRING
f59f5ef8
MB
834 return linux_keyring
835
836
b38d4c94 837def _get_kwallet_network_wallet(keyring, logger):
f59f5ef8
MB
838 """ The name of the wallet used to store network passwords.
839
b38d4c94 840 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/kwallet_dbus.cc
f59f5ef8
MB
841 KWalletDBus::NetworkWallet
842 which does a dbus call to the following function:
843 https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html
844 Wallet::NetworkWallet
845 """
846 default_wallet = 'kdewallet'
847 try:
2e023649 848 if keyring == _LinuxKeyring.KWALLET:
b38d4c94
MB
849 service_name = 'org.kde.kwalletd'
850 wallet_path = '/modules/kwalletd'
851 elif keyring == _LinuxKeyring.KWALLET5:
852 service_name = 'org.kde.kwalletd5'
853 wallet_path = '/modules/kwalletd5'
854 elif keyring == _LinuxKeyring.KWALLET6:
855 service_name = 'org.kde.kwalletd6'
856 wallet_path = '/modules/kwalletd6'
857 else:
858 raise ValueError(keyring)
859
f0c9fb96 860 stdout, _, returncode = Popen.run([
f59f5ef8 861 'dbus-send', '--session', '--print-reply=literal',
b38d4c94
MB
862 f'--dest={service_name}',
863 wallet_path,
add96eb9 864 'org.kde.KWallet.networkWallet',
f0c9fb96 865 ], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
f59f5ef8 866
f0c9fb96 867 if returncode:
f59f5ef8
MB
868 logger.warning('failed to read NetworkWallet')
869 return default_wallet
870 else:
f0c9fb96 871 logger.debug(f'NetworkWallet = "{stdout.strip()}"')
872 return stdout.strip()
a44ca5a4 873 except Exception as e:
86e5f3ed 874 logger.warning(f'exception while obtaining NetworkWallet: {e}')
f59f5ef8
MB
875 return default_wallet
876
877
b38d4c94
MB
878def _get_kwallet_password(browser_keyring_name, keyring, logger):
879 logger.debug(f'using kwallet-query to obtain password from {keyring.name}')
f59f5ef8
MB
880
881 if shutil.which('kwallet-query') is None:
882 logger.error('kwallet-query command not found. KWallet and kwallet-query '
883 'must be installed to read from KWallet. kwallet-query should be'
884 'included in the kwallet package for your distribution')
885 return b''
886
b38d4c94 887 network_wallet = _get_kwallet_network_wallet(keyring, logger)
f59f5ef8
MB
888
889 try:
f0c9fb96 890 stdout, _, returncode = Popen.run([
f59f5ef8 891 'kwallet-query',
86e5f3ed 892 '--read-password', f'{browser_keyring_name} Safe Storage',
893 '--folder', f'{browser_keyring_name} Keys',
add96eb9 894 network_wallet,
f59f5ef8
MB
895 ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
896
f0c9fb96 897 if returncode:
898 logger.error(f'kwallet-query failed with return code {returncode}. '
899 'Please consult the kwallet-query man page for details')
f59f5ef8
MB
900 return b''
901 else:
902 if stdout.lower().startswith(b'failed to read'):
903 logger.debug('failed to read password from kwallet. Using empty string instead')
904 # this sometimes occurs in KDE because chrome does not check hasEntry and instead
905 # just tries to read the value (which kwallet returns "") whereas kwallet-query
906 # checks hasEntry. To verify this:
907 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
908 # while starting chrome.
b38d4c94
MB
909 # this was identified as a bug later and fixed in
910 # https://chromium.googlesource.com/chromium/src/+/bbd54702284caca1f92d656fdcadf2ccca6f4165%5E%21/#F0
911 # https://chromium.googlesource.com/chromium/src/+/5463af3c39d7f5b6d11db7fbd51e38cc1974d764
f59f5ef8
MB
912 return b''
913 else:
914 logger.debug('password found')
f0c9fb96 915 return stdout.rstrip(b'\n')
a44ca5a4 916 except Exception as e:
917 logger.warning(f'exception running kwallet-query: {error_to_str(e)}')
f59f5ef8
MB
918 return b''
919
920
921def _get_gnome_keyring_password(browser_keyring_name, logger):
9b8ee23b 922 if not secretstorage:
923 logger.error(f'secretstorage not available {_SECRETSTORAGE_UNAVAILABLE_REASON}')
f59f5ef8
MB
924 return b''
925 # the Gnome keyring does not seem to organise keys in the same way as KWallet,
926 # using `dbus-monitor` during startup, it can be observed that chromium lists all keys
927 # and presumably searches for its key in the list. It appears that we must do the same.
928 # https://github.com/jaraco/keyring/issues/556
929 with contextlib.closing(secretstorage.dbus_init()) as con:
930 col = secretstorage.get_default_collection(con)
931 for item in col.get_all_items():
86e5f3ed 932 if item.get_label() == f'{browser_keyring_name} Safe Storage':
f59f5ef8 933 return item.get_secret()
add96eb9 934 logger.error('failed to read from keyring')
935 return b''
f59f5ef8
MB
936
937
938def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
939 # note: chrome/chromium can be run with the following flags to determine which keyring backend
940 # it has chosen to use
941 # chromium --enable-logging=stderr --v=1 2>&1 | grep key_storage_
942 # Chromium supports a flag: --password-store=<basic|gnome|kwallet> so the automatic detection
943 # will not be sufficient in all cases.
944
2c539d49 945 keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger)
f59f5ef8
MB
946 logger.debug(f'Chosen keyring: {keyring.name}')
947
2e023649 948 if keyring in (_LinuxKeyring.KWALLET, _LinuxKeyring.KWALLET5, _LinuxKeyring.KWALLET6):
b38d4c94 949 return _get_kwallet_password(browser_keyring_name, keyring, logger)
2e023649 950 elif keyring == _LinuxKeyring.GNOMEKEYRING:
f59f5ef8 951 return _get_gnome_keyring_password(browser_keyring_name, logger)
2e023649 952 elif keyring == _LinuxKeyring.BASICTEXT:
f59f5ef8
MB
953 # when basic text is chosen, all cookies are stored as v10 (so no keyring password is required)
954 return None
955 assert False, f'Unknown keyring {keyring}'
956
957
958def _get_mac_keyring_password(browser_keyring_name, logger):
959 logger.debug('using find-generic-password to obtain password from OSX keychain')
960 try:
46a5b335 961 stdout, _, returncode = Popen.run(
d3c93ec2 962 ['security', 'find-generic-password',
963 '-w', # write password to stdout
964 '-a', browser_keyring_name, # match 'account'
86e5f3ed 965 '-s', f'{browser_keyring_name} Safe Storage'], # match 'service'
d3c93ec2 966 stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
46a5b335 967 if returncode:
968 logger.warning('find-generic-password failed')
969 return None
f0c9fb96 970 return stdout.rstrip(b'\n')
a44ca5a4 971 except Exception as e:
972 logger.warning(f'exception running find-generic-password: {error_to_str(e)}')
f59f5ef8 973 return None
982ee69a
MB
974
975
976def _get_windows_v10_key(browser_root, logger):
b38d4c94
MB
977 """
978 References:
979 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
980 """
cbed249a 981 path = _newest(_find_files(browser_root, 'Local State', logger))
982ee69a
MB
982 if path is None:
983 logger.error('could not find local state file')
984 return None
97ec5bc5 985 logger.debug(f'Found local state file at "{path}"')
86e5f3ed 986 with open(path, encoding='utf8') as f:
982ee69a
MB
987 data = json.load(f)
988 try:
b38d4c94 989 # kOsCryptEncryptedKeyPrefName in [1]
982ee69a
MB
990 base64_key = data['os_crypt']['encrypted_key']
991 except KeyError:
992 logger.error('no encrypted key in Local State')
993 return None
14f25df2 994 encrypted_key = base64.b64decode(base64_key)
b38d4c94 995 # kDPAPIKeyPrefix in [1]
982ee69a
MB
996 prefix = b'DPAPI'
997 if not encrypted_key.startswith(prefix):
998 logger.error('invalid key')
999 return None
1000 return _decrypt_windows_dpapi(encrypted_key[len(prefix):], logger)
1001
1002
1003def pbkdf2_sha1(password, salt, iterations, key_length):
1004 return pbkdf2_hmac('sha1', password, salt, iterations, key_length)
1005
1006
b38d4c94
MB
1007def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' ' * 16):
1008 for key in keys:
1009 plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
1010 try:
1011 return plaintext.decode()
1012 except UnicodeDecodeError:
1013 pass
1014 logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
1015 return None
982ee69a
MB
1016
1017
1018def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
982ee69a 1019 try:
09906f55 1020 plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce)
982ee69a 1021 except ValueError:
f440b14f 1022 logger.warning('failed to decrypt cookie (AES-GCM) because the MAC check failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
1023 return None
1024
1025 try:
0f06bcd7 1026 return plaintext.decode()
982ee69a 1027 except UnicodeDecodeError:
f440b14f 1028 logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
1029 return None
1030
1031
1032def _decrypt_windows_dpapi(ciphertext, logger):
1033 """
1034 References:
1035 - https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata
1036 """
fe0918bb 1037
1038 import ctypes
1039 import ctypes.wintypes
982ee69a
MB
1040
1041 class DATA_BLOB(ctypes.Structure):
fe0918bb 1042 _fields_ = [('cbData', ctypes.wintypes.DWORD),
982ee69a
MB
1043 ('pbData', ctypes.POINTER(ctypes.c_char))]
1044
1045 buffer = ctypes.create_string_buffer(ciphertext)
1046 blob_in = DATA_BLOB(ctypes.sizeof(buffer), buffer)
1047 blob_out = DATA_BLOB()
1048 ret = ctypes.windll.crypt32.CryptUnprotectData(
1049 ctypes.byref(blob_in), # pDataIn
1050 None, # ppszDataDescr: human readable description of pDataIn
1051 None, # pOptionalEntropy: salt?
1052 None, # pvReserved: must be NULL
1053 None, # pPromptStruct: information about prompts to display
1054 0, # dwFlags
add96eb9 1055 ctypes.byref(blob_out), # pDataOut
982ee69a
MB
1056 )
1057 if not ret:
f9be9cb9 1058 logger.warning('failed to decrypt with DPAPI', only_once=True)
982ee69a
MB
1059 return None
1060
1061 result = ctypes.string_at(blob_out.pbData, blob_out.cbData)
1062 ctypes.windll.kernel32.LocalFree(blob_out.pbData)
1063 return result
1064
1065
1066def _config_home():
1067 return os.environ.get('XDG_CONFIG_HOME', os.path.expanduser('~/.config'))
1068
1069
1070def _open_database_copy(database_path, tmpdir):
1071 # cannot open sqlite databases if they are already in use (e.g. by the browser)
1072 database_copy_path = os.path.join(tmpdir, 'temporary.sqlite')
1073 shutil.copy(database_path, database_copy_path)
1074 conn = sqlite3.connect(database_copy_path)
1075 return conn.cursor()
1076
1077
1078def _get_column_names(cursor, table_name):
86e5f3ed 1079 table_info = cursor.execute(f'PRAGMA table_info({table_name})').fetchall()
0f06bcd7 1080 return [row[1].decode() for row in table_info]
982ee69a
MB
1081
1082
cbed249a
SS
1083def _newest(files):
1084 return max(files, key=lambda path: os.lstat(path).st_mtime, default=None)
1085
1086
1087def _find_files(root, filename, logger):
982ee69a 1088 # if there are multiple browser profiles, take the most recently used one
cbed249a 1089 i = 0
97ec5bc5 1090 with _create_progress_bar(logger) as progress_bar:
cbed249a 1091 for curr_root, _, files in os.walk(root):
97ec5bc5 1092 for file in files:
1093 i += 1
1094 progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched')
1095 if file == filename:
cbed249a 1096 yield os.path.join(curr_root, file)
982ee69a
MB
1097
1098
1099def _merge_cookie_jars(jars):
1100 output_jar = YoutubeDLCookieJar()
1101 for jar in jars:
1102 for cookie in jar:
1103 output_jar.set_cookie(cookie)
1104 if jar.filename is not None:
1105 output_jar.filename = jar.filename
1106 return output_jar
1107
1108
1109def _is_path(value):
cbed249a 1110 return any(sep in value for sep in (os.path.sep, os.path.altsep) if sep)
982ee69a
MB
1111
1112
9bd13fe5 1113def _parse_browser_specification(browser_name, profile=None, keyring=None, container=None):
982ee69a
MB
1114 if browser_name not in SUPPORTED_BROWSERS:
1115 raise ValueError(f'unsupported browser: "{browser_name}"')
f59f5ef8
MB
1116 if keyring not in (None, *SUPPORTED_KEYRINGS):
1117 raise ValueError(f'unsupported keyring: "{keyring}"')
2b24afa6 1118 if profile is not None and _is_path(expand_path(profile)):
1119 profile = expand_path(profile)
9bd13fe5 1120 return browser_name, profile, keyring, container
8817a80d
SS
1121
1122
1123class LenientSimpleCookie(http.cookies.SimpleCookie):
1124 """More lenient version of http.cookies.SimpleCookie"""
1125 # From https://github.com/python/cpython/blob/v3.10.7/Lib/http/cookies.py
36069409
SS
1126 # We use Morsel's legal key chars to avoid errors on setting values
1127 _LEGAL_KEY_CHARS = r'\w\d' + re.escape('!#$%&\'*+-.:^_`|~')
1128 _LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + re.escape('(),/<=>?@[]{}')
8817a80d
SS
1129
1130 _RESERVED = {
add96eb9 1131 'expires',
1132 'path',
1133 'comment',
1134 'domain',
1135 'max-age',
1136 'secure',
1137 'httponly',
1138 'version',
1139 'samesite',
8817a80d
SS
1140 }
1141
add96eb9 1142 _FLAGS = {'secure', 'httponly'}
8817a80d
SS
1143
1144 # Added 'bad' group to catch the remaining value
add96eb9 1145 _COOKIE_PATTERN = re.compile(r'''
8817a80d
SS
1146 \s* # Optional whitespace at start of cookie
1147 (?P<key> # Start of group 'key'
add96eb9 1148 [''' + _LEGAL_KEY_CHARS + r''']+?# Any word of at least one letter
8817a80d
SS
1149 ) # End of group 'key'
1150 ( # Optional group: there may not be a value.
1151 \s*=\s* # Equal Sign
1152 ( # Start of potential value
1153 (?P<val> # Start of group 'val'
1154 "(?:[^\\"]|\\.)*" # Any doublequoted string
1155 | # or
1156 \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr
1157 | # or
add96eb9 1158 [''' + _LEGAL_VALUE_CHARS + r''']* # Any word or empty string
8817a80d
SS
1159 ) # End of group 'val'
1160 | # or
1161 (?P<bad>(?:\\;|[^;])*?) # 'bad' group fallback for invalid values
1162 ) # End of potential value
1163 )? # End of optional value group
1164 \s* # Any number of spaces.
1165 (\s+|;|$) # Ending either at space, semicolon, or EOS.
add96eb9 1166 ''', re.ASCII | re.VERBOSE)
8817a80d
SS
1167
1168 def load(self, data):
1169 # Workaround for https://github.com/yt-dlp/yt-dlp/issues/4776
1170 if not isinstance(data, str):
1171 return super().load(data)
1172
1173 morsel = None
36069409
SS
1174 for match in self._COOKIE_PATTERN.finditer(data):
1175 if match.group('bad'):
8817a80d
SS
1176 morsel = None
1177 continue
1178
36069409 1179 key, value = match.group('key', 'val')
8817a80d 1180
36069409
SS
1181 is_attribute = False
1182 if key.startswith('$'):
1183 key = key[1:]
1184 is_attribute = True
8817a80d
SS
1185
1186 lower_key = key.lower()
1187 if lower_key in self._RESERVED:
1188 if morsel is None:
1189 continue
1190
1191 if value is None:
1192 if lower_key not in self._FLAGS:
1193 morsel = None
1194 continue
1195 value = True
1196 else:
1197 value, _ = self.value_decode(value)
1198
1199 morsel[key] = value
1200
36069409
SS
1201 elif is_attribute:
1202 morsel = None
1203
8817a80d
SS
1204 elif value is not None:
1205 morsel = self.get(key, http.cookies.Morsel())
1206 real_value, coded_value = self.value_decode(value)
1207 morsel.set(key, real_value, coded_value)
1208 self[key] = morsel
1209
1210 else:
1211 morsel = None
b87e01c1 1212
1213
1214class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
1215 """
1216 See [1] for cookie file format.
1217
1218 1. https://curl.haxx.se/docs/http-cookies.html
1219 """
1220 _HTTPONLY_PREFIX = '#HttpOnly_'
1221 _ENTRY_LEN = 7
1222 _HEADER = '''# Netscape HTTP Cookie File
1223# This file is generated by yt-dlp. Do not edit.
1224
1225'''
1226 _CookieFileEntry = collections.namedtuple(
1227 'CookieFileEntry',
1228 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
1229
1230 def __init__(self, filename=None, *args, **kwargs):
1231 super().__init__(None, *args, **kwargs)
1232 if is_path_like(filename):
1233 filename = os.fspath(filename)
1234 self.filename = filename
1235
1236 @staticmethod
1237 def _true_or_false(cndn):
1238 return 'TRUE' if cndn else 'FALSE'
1239
1240 @contextlib.contextmanager
1241 def open(self, file, *, write=False):
1242 if is_path_like(file):
1243 with open(file, 'w' if write else 'r', encoding='utf-8') as f:
1244 yield f
1245 else:
1246 if write:
1247 file.truncate(0)
1248 yield file
1249
62b5c94c 1250 def _really_save(self, f, ignore_discard, ignore_expires):
b87e01c1 1251 now = time.time()
1252 for cookie in self:
1253 if (not ignore_discard and cookie.discard
1254 or not ignore_expires and cookie.is_expired(now)):
1255 continue
1256 name, value = cookie.name, cookie.value
1257 if value is None:
1258 # cookies.txt regards 'Set-Cookie: foo' as a cookie
1259 # with no name, whereas http.cookiejar regards it as a
1260 # cookie with no value.
1261 name, value = '', name
add96eb9 1262 f.write('{}\n'.format('\t'.join((
b87e01c1 1263 cookie.domain,
1264 self._true_or_false(cookie.domain.startswith('.')),
1265 cookie.path,
1266 self._true_or_false(cookie.secure),
1267 str_or_none(cookie.expires, default=''),
add96eb9 1268 name, value,
1269 ))))
b87e01c1 1270
62b5c94c 1271 def save(self, filename=None, ignore_discard=True, ignore_expires=True):
b87e01c1 1272 """
1273 Save cookies to a file.
1274 Code is taken from CPython 3.6
1275 https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """
1276
1277 if filename is None:
1278 if self.filename is not None:
1279 filename = self.filename
1280 else:
1281 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
1282
1283 # Store session cookies with `expires` set to 0 instead of an empty string
1284 for cookie in self:
1285 if cookie.expires is None:
1286 cookie.expires = 0
1287
1288 with self.open(filename, write=True) as f:
1289 f.write(self._HEADER)
62b5c94c 1290 self._really_save(f, ignore_discard, ignore_expires)
b87e01c1 1291
62b5c94c 1292 def load(self, filename=None, ignore_discard=True, ignore_expires=True):
b87e01c1 1293 """Load cookies from a file."""
1294 if filename is None:
1295 if self.filename is not None:
1296 filename = self.filename
1297 else:
1298 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
1299
1300 def prepare_line(line):
1301 if line.startswith(self._HTTPONLY_PREFIX):
1302 line = line[len(self._HTTPONLY_PREFIX):]
1303 # comments and empty lines are fine
1304 if line.startswith('#') or not line.strip():
1305 return line
1306 cookie_list = line.split('\t')
1307 if len(cookie_list) != self._ENTRY_LEN:
add96eb9 1308 raise http.cookiejar.LoadError(f'invalid length {len(cookie_list)}')
b87e01c1 1309 cookie = self._CookieFileEntry(*cookie_list)
1310 if cookie.expires_at and not cookie.expires_at.isdigit():
add96eb9 1311 raise http.cookiejar.LoadError(f'invalid expires at {cookie.expires_at}')
b87e01c1 1312 return line
1313
1314 cf = io.StringIO()
1315 with self.open(filename) as f:
1316 for line in f:
1317 try:
1318 cf.write(prepare_line(line))
1319 except http.cookiejar.LoadError as e:
1320 if f'{line.strip()} '[0] in '[{"':
1321 raise http.cookiejar.LoadError(
1322 'Cookies file must be Netscape formatted, not JSON. See '
1323 'https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp')
1324 write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
1325 continue
1326 cf.seek(0)
1327 self._really_load(cf, filename, ignore_discard, ignore_expires)
1328 # Session cookies are denoted by either `expires` field set to
1329 # an empty string or 0. MozillaCookieJar only recognizes the former
1330 # (see [1]). So we need force the latter to be recognized as session
1331 # cookies on our own.
1332 # Session cookies may be important for cookies-based authentication,
1333 # e.g. usually, when user does not check 'Remember me' check box while
1334 # logging in on a site, some important cookies are stored as session
1335 # cookies so that not recognizing them will result in failed login.
1336 # 1. https://bugs.python.org/issue17164
1337 for cookie in self:
1338 # Treat `expires=0` cookies as session cookies
1339 if cookie.expires == 0:
1340 cookie.expires = None
1341 cookie.discard = True
1342
1343 def get_cookie_header(self, url):
1344 """Generate a Cookie HTTP header for a given url"""
4bf91228 1345 cookie_req = urllib.request.Request(normalize_url(sanitize_url(url)))
b87e01c1 1346 self.add_cookie_header(cookie_req)
1347 return cookie_req.get_header('Cookie')
ad54c913 1348
1ceb657b 1349 def get_cookies_for_url(self, url):
1350 """Generate a list of Cookie objects for a given url"""
1351 # Policy `_now` attribute must be set before calling `_cookies_for_request`
1352 # Ref: https://github.com/python/cpython/blob/3.7/Lib/http/cookiejar.py#L1360
1353 self._policy._now = self._now = int(time.time())
4bf91228 1354 return self._cookies_for_request(urllib.request.Request(normalize_url(sanitize_url(url))))
1ceb657b 1355
ad54c913 1356 def clear(self, *args, **kwargs):
1357 with contextlib.suppress(KeyError):
1358 return super().clear(*args, **kwargs)