]> jfr.im git - yt-dlp.git/blame - yt_dlp/cookies.py
[ie/bbc] Fix and extend extraction (#9705)
[yt-dlp.git] / yt_dlp / cookies.py
CommitLineData
14f25df2 1import base64
b87e01c1 2import collections
f59f5ef8 3import contextlib
c305a25c 4import datetime as dt
cbed249a 5import glob
54007a45 6import http.cookiejar
8817a80d 7import http.cookies
b87e01c1 8import io
982ee69a
MB
9import json
10import os
9bd13fe5 11import re
982ee69a 12import shutil
982ee69a
MB
13import struct
14import subprocess
15import sys
16import tempfile
2e4585da 17import time
b87e01c1 18import urllib.request
f59f5ef8 19from enum import Enum, auto
982ee69a
MB
20from hashlib import pbkdf2_hmac
21
1d3586d0 22from .aes import (
23 aes_cbc_decrypt_bytes,
24 aes_gcm_decrypt_and_verify_bytes,
25 unpad_pkcs7,
26)
2792092a
SS
27from .compat import functools # isort: split
28from .compat import compat_os_name
9b8ee23b 29from .dependencies import (
30 _SECRETSTORAGE_UNAVAILABLE_REASON,
31 secretstorage,
32 sqlite3,
33)
97ec5bc5 34from .minicurses import MultilinePrinter, QuietMultilinePrinter
d2c8aadf 35from .utils import (
2792092a 36 DownloadError,
d2c8aadf 37 Popen,
d2c8aadf 38 error_to_str,
39 expand_path,
22df97f9 40 is_path_like,
b87e01c1 41 sanitize_url,
42 str_or_none,
d2c8aadf 43 try_call,
b87e01c1 44 write_string,
d2c8aadf 45)
1b392f90 46from .utils._utils import _YDLLogger
4bf91228 47from .utils.networking import normalize_url
982ee69a 48
982ee69a
MB
49CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
50SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
51
52
1b392f90 53class YDLLogger(_YDLLogger):
54 def warning(self, message, only_once=False): # compat
55 return super().warning(message, once=only_once)
982ee69a 56
2e4585da 57 class ProgressBar(MultilinePrinter):
58 _DELAY, _timer = 0.1, 0
59
60 def print(self, message):
61 if time.time() - self._timer > self._DELAY:
62 self.print_at_line(f'[Cookies] {message}', 0)
63 self._timer = time.time()
64
97ec5bc5 65 def progress_bar(self):
66 """Return a context manager with a print method. (Optional)"""
67 # Do not print to files/pipes, loggers, or when --no-progress is used
68 if not self._ydl or self._ydl.params.get('noprogress') or self._ydl.params.get('logger'):
69 return
591bb9d3 70 file = self._ydl._out_files.error
97ec5bc5 71 try:
72 if not file.isatty():
73 return
74 except BaseException:
75 return
2e4585da 76 return self.ProgressBar(file, preserve_output=False)
97ec5bc5 77
78
79def _create_progress_bar(logger):
80 if hasattr(logger, 'progress_bar'):
81 printer = logger.progress_bar()
82 if printer:
83 return printer
84 printer = QuietMultilinePrinter()
85 printer.print = lambda _: None
86 return printer
87
982ee69a
MB
88
89def load_cookies(cookie_file, browser_specification, ydl):
90 cookie_jars = []
91 if browser_specification is not None:
9bd13fe5 92 browser_name, profile, keyring, container = _parse_browser_specification(*browser_specification)
93 cookie_jars.append(
94 extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring, container=container))
982ee69a
MB
95
96 if cookie_file is not None:
22df97f9 97 is_filename = is_path_like(cookie_file)
d76fa1f3 98 if is_filename:
99 cookie_file = expand_path(cookie_file)
100
982ee69a 101 jar = YoutubeDLCookieJar(cookie_file)
d76fa1f3 102 if not is_filename or os.access(cookie_file, os.R_OK):
62b5c94c 103 jar.load()
982ee69a
MB
104 cookie_jars.append(jar)
105
106 return _merge_cookie_jars(cookie_jars)
107
108
9bd13fe5 109def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None, container=None):
982ee69a 110 if browser_name == 'firefox':
9bd13fe5 111 return _extract_firefox_cookies(profile, container, logger)
982ee69a
MB
112 elif browser_name == 'safari':
113 return _extract_safari_cookies(profile, logger)
114 elif browser_name in CHROMIUM_BASED_BROWSERS:
f59f5ef8 115 return _extract_chrome_cookies(browser_name, profile, keyring, logger)
982ee69a 116 else:
86e5f3ed 117 raise ValueError(f'unknown browser: {browser_name}')
982ee69a
MB
118
119
9bd13fe5 120def _extract_firefox_cookies(profile, container, logger):
982ee69a 121 logger.info('Extracting cookies from firefox')
9b8ee23b 122 if not sqlite3:
767b02a9 123 logger.warning('Cannot extract cookies from firefox without sqlite3 support. '
47ab66db 124 'Please use a Python interpreter compiled with sqlite3 support')
767b02a9 125 return YoutubeDLCookieJar()
982ee69a
MB
126
127 if profile is None:
cbed249a 128 search_roots = list(_firefox_browser_dirs())
982ee69a 129 elif _is_path(profile):
cbed249a 130 search_roots = [profile]
982ee69a 131 else:
cbed249a
SS
132 search_roots = [os.path.join(path, profile) for path in _firefox_browser_dirs()]
133 search_root = ', '.join(map(repr, search_roots))
982ee69a 134
cbed249a 135 cookie_database_path = _newest(_firefox_cookie_dbs(search_roots))
825d3ce3 136 if cookie_database_path is None:
137 raise FileNotFoundError(f'could not find firefox cookies database in {search_root}')
138 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
139
9bd13fe5 140 container_id = None
825d3ce3 141 if container not in (None, 'none'):
142 containers_path = os.path.join(os.path.dirname(cookie_database_path), 'containers.json')
9bd13fe5 143 if not os.path.isfile(containers_path) or not os.access(containers_path, os.R_OK):
144 raise FileNotFoundError(f'could not read containers.json in {search_root}')
dab87ca2 145 with open(containers_path, encoding='utf8') as containers:
9bd13fe5 146 identities = json.load(containers).get('identities', [])
147 container_id = next((context.get('userContextId') for context in identities if container in (
148 context.get('name'),
149 try_call(lambda: re.fullmatch(r'userContext([^\.]+)\.label', context['l10nID']).group())
150 )), None)
151 if not isinstance(container_id, int):
152 raise ValueError(f'could not find firefox container "{container}" in containers.json')
153
0930b11f 154 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
982ee69a
MB
155 cursor = None
156 try:
157 cursor = _open_database_copy(cookie_database_path, tmpdir)
9bd13fe5 158 if isinstance(container_id, int):
9bd13fe5 159 logger.debug(
160 f'Only loading cookies from firefox container "{container}", ID {container_id}')
9bd13fe5 161 cursor.execute(
825d3ce3 162 'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE originAttributes LIKE ? OR originAttributes LIKE ?',
163 (f'%userContextId={container_id}', f'%userContextId={container_id}&%'))
164 elif container == 'none':
165 logger.debug('Only loading cookies not belonging to any container')
166 cursor.execute(
167 'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE NOT INSTR(originAttributes,"userContextId=")')
168 else:
9bd13fe5 169 cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
982ee69a 170 jar = YoutubeDLCookieJar()
97ec5bc5 171 with _create_progress_bar(logger) as progress_bar:
172 table = cursor.fetchall()
173 total_cookie_count = len(table)
174 for i, (host, name, value, path, expiry, is_secure) in enumerate(table):
175 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
ac668111 176 cookie = http.cookiejar.Cookie(
97ec5bc5 177 version=0, name=name, value=value, port=None, port_specified=False,
178 domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'),
179 path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False,
180 comment=None, comment_url=None, rest={})
181 jar.set_cookie(cookie)
86e5f3ed 182 logger.info(f'Extracted {len(jar)} cookies from firefox')
982ee69a
MB
183 return jar
184 finally:
185 if cursor is not None:
186 cursor.connection.close()
187
188
cbed249a 189def _firefox_browser_dirs():
dec30912 190 if sys.platform in ('cygwin', 'win32'):
cbed249a
SS
191 yield os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles')
192
982ee69a 193 elif sys.platform == 'darwin':
cbed249a
SS
194 yield os.path.expanduser('~/Library/Application Support/Firefox/Profiles')
195
196 else:
2ab2651a
DM
197 yield from map(os.path.expanduser, (
198 '~/.mozilla/firefox',
199 '~/snap/firefox/common/.mozilla/firefox',
200 '~/.var/app/org.mozilla.firefox/.mozilla/firefox',
201 ))
cbed249a
SS
202
203
204def _firefox_cookie_dbs(roots):
205 for root in map(os.path.abspath, roots):
206 for pattern in ('', '*/', 'Profiles/*/'):
207 yield from glob.iglob(os.path.join(root, pattern, 'cookies.sqlite'))
982ee69a
MB
208
209
210def _get_chromium_based_browser_settings(browser_name):
211 # https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md
dec30912 212 if sys.platform in ('cygwin', 'win32'):
982ee69a
MB
213 appdata_local = os.path.expandvars('%LOCALAPPDATA%')
214 appdata_roaming = os.path.expandvars('%APPDATA%')
215 browser_dir = {
19a03940 216 'brave': os.path.join(appdata_local, R'BraveSoftware\Brave-Browser\User Data'),
217 'chrome': os.path.join(appdata_local, R'Google\Chrome\User Data'),
218 'chromium': os.path.join(appdata_local, R'Chromium\User Data'),
219 'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
220 'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
221 'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
982ee69a
MB
222 }[browser_name]
223
224 elif sys.platform == 'darwin':
225 appdata = os.path.expanduser('~/Library/Application Support')
226 browser_dir = {
227 'brave': os.path.join(appdata, 'BraveSoftware/Brave-Browser'),
228 'chrome': os.path.join(appdata, 'Google/Chrome'),
229 'chromium': os.path.join(appdata, 'Chromium'),
230 'edge': os.path.join(appdata, 'Microsoft Edge'),
231 'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
232 'vivaldi': os.path.join(appdata, 'Vivaldi'),
233 }[browser_name]
234
235 else:
dec30912
CMT
236 config = _config_home()
237 browser_dir = {
238 'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'),
239 'chrome': os.path.join(config, 'google-chrome'),
240 'chromium': os.path.join(config, 'chromium'),
241 'edge': os.path.join(config, 'microsoft-edge'),
242 'opera': os.path.join(config, 'opera'),
243 'vivaldi': os.path.join(config, 'vivaldi'),
244 }[browser_name]
982ee69a
MB
245
246 # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
247 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
248 keyring_name = {
249 'brave': 'Brave',
250 'chrome': 'Chrome',
251 'chromium': 'Chromium',
29b208f6 252 'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
982ee69a
MB
253 'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
254 'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
255 }[browser_name]
256
257 browsers_without_profiles = {'opera'}
258
259 return {
260 'browser_dir': browser_dir,
261 'keyring_name': keyring_name,
262 'supports_profiles': browser_name not in browsers_without_profiles
263 }
264
265
f59f5ef8 266def _extract_chrome_cookies(browser_name, profile, keyring, logger):
86e5f3ed 267 logger.info(f'Extracting cookies from {browser_name}')
767b02a9 268
9b8ee23b 269 if not sqlite3:
19a03940 270 logger.warning(f'Cannot extract cookies from {browser_name} without sqlite3 support. '
47ab66db 271 'Please use a Python interpreter compiled with sqlite3 support')
767b02a9
MB
272 return YoutubeDLCookieJar()
273
982ee69a
MB
274 config = _get_chromium_based_browser_settings(browser_name)
275
276 if profile is None:
277 search_root = config['browser_dir']
278 elif _is_path(profile):
279 search_root = profile
280 config['browser_dir'] = os.path.dirname(profile) if config['supports_profiles'] else profile
281 else:
282 if config['supports_profiles']:
283 search_root = os.path.join(config['browser_dir'], profile)
284 else:
86e5f3ed 285 logger.error(f'{browser_name} does not support profiles')
982ee69a
MB
286 search_root = config['browser_dir']
287
cbed249a 288 cookie_database_path = _newest(_find_files(search_root, 'Cookies', logger))
982ee69a 289 if cookie_database_path is None:
86e5f3ed 290 raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"')
291 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
982ee69a 292
f59f5ef8 293 decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring)
982ee69a 294
0930b11f 295 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
982ee69a
MB
296 cursor = None
297 try:
298 cursor = _open_database_copy(cookie_database_path, tmpdir)
299 cursor.connection.text_factory = bytes
300 column_names = _get_column_names(cursor, 'cookies')
301 secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
19a03940 302 cursor.execute(f'SELECT host_key, name, value, encrypted_value, path, expires_utc, {secure_column} FROM cookies')
982ee69a
MB
303 jar = YoutubeDLCookieJar()
304 failed_cookies = 0
f59f5ef8 305 unencrypted_cookies = 0
97ec5bc5 306 with _create_progress_bar(logger) as progress_bar:
307 table = cursor.fetchall()
308 total_cookie_count = len(table)
309 for i, line in enumerate(table):
310 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
311 is_encrypted, cookie = _process_chrome_cookie(decryptor, *line)
312 if not cookie:
982ee69a
MB
313 failed_cookies += 1
314 continue
97ec5bc5 315 elif not is_encrypted:
316 unencrypted_cookies += 1
317 jar.set_cookie(cookie)
982ee69a 318 if failed_cookies > 0:
86e5f3ed 319 failed_message = f' ({failed_cookies} could not be decrypted)'
982ee69a
MB
320 else:
321 failed_message = ''
86e5f3ed 322 logger.info(f'Extracted {len(jar)} cookies from {browser_name}{failed_message}')
24146491 323 counts = decryptor._cookie_counts.copy()
f59f5ef8 324 counts['unencrypted'] = unencrypted_cookies
86e5f3ed 325 logger.debug(f'cookie version breakdown: {counts}')
982ee69a 326 return jar
2792092a
SS
327 except PermissionError as error:
328 if compat_os_name == 'nt' and error.errno == 13:
329 message = 'Could not copy Chrome cookie database. See https://github.com/yt-dlp/yt-dlp/issues/7271 for more info'
330 logger.error(message)
331 raise DownloadError(message) # force exit
332 raise
982ee69a
MB
333 finally:
334 if cursor is not None:
335 cursor.connection.close()
336
337
97ec5bc5 338def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, path, expires_utc, is_secure):
0f06bcd7 339 host_key = host_key.decode()
340 name = name.decode()
341 value = value.decode()
342 path = path.decode()
97ec5bc5 343 is_encrypted = not value and encrypted_value
344
345 if is_encrypted:
346 value = decryptor.decrypt(encrypted_value)
347 if value is None:
348 return is_encrypted, None
349
f1f15897
SL
350 # In chrome, session cookies have expires_utc set to 0
351 # In our cookie-store, cookies that do not expire should have expires set to None
352 if not expires_utc:
353 expires_utc = None
354
ac668111 355 return is_encrypted, http.cookiejar.Cookie(
97ec5bc5 356 version=0, name=name, value=value, port=None, port_specified=False,
357 domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
358 path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False,
359 comment=None, comment_url=None, rest={})
360
361
982ee69a
MB
362class ChromeCookieDecryptor:
363 """
364 Overview:
365
366 Linux:
367 - cookies are either v10 or v11
368 - v10: AES-CBC encrypted with a fixed key
b38d4c94 369 - also attempts empty password if decryption fails
982ee69a 370 - v11: AES-CBC encrypted with an OS protected key (keyring)
b38d4c94 371 - also attempts empty password if decryption fails
982ee69a
MB
372 - v11 keys can be stored in various places depending on the activate desktop environment [2]
373
374 Mac:
375 - cookies are either v10 or not v10
376 - v10: AES-CBC encrypted with an OS protected key (keyring) and more key derivation iterations than linux
377 - not v10: 'old data' stored as plaintext
378
379 Windows:
380 - cookies are either v10 or not v10
381 - v10: AES-GCM encrypted with a key which is encrypted with DPAPI
382 - not v10: encrypted with DPAPI
383
384 Sources:
385 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/
b38d4c94 386 - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_linux.cc
982ee69a
MB
387 - KeyStorageLinux::CreateService
388 """
389
0fa7d2c8 390 _cookie_counts = {}
982ee69a 391
0fa7d2c8 392 def decrypt(self, encrypted_value):
19a03940 393 raise NotImplementedError('Must be implemented by sub classes')
f59f5ef8 394
982ee69a 395
f59f5ef8 396def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None):
56ba69e4 397 if sys.platform == 'darwin':
982ee69a 398 return MacChromeCookieDecryptor(browser_keyring_name, logger)
56ba69e4 399 elif sys.platform in ('win32', 'cygwin'):
982ee69a 400 return WindowsChromeCookieDecryptor(browser_root, logger)
56ba69e4 401 return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring)
982ee69a
MB
402
403
404class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
f59f5ef8 405 def __init__(self, browser_keyring_name, logger, *, keyring=None):
982ee69a
MB
406 self._logger = logger
407 self._v10_key = self.derive_key(b'peanuts')
b38d4c94 408 self._empty_key = self.derive_key(b'')
f59f5ef8 409 self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
9b7a48ab
SS
410 self._browser_keyring_name = browser_keyring_name
411 self._keyring = keyring
412
413 @functools.cached_property
414 def _v11_key(self):
415 password = _get_linux_keyring_password(self._browser_keyring_name, self._keyring, self._logger)
416 return None if password is None else self.derive_key(password)
982ee69a
MB
417
418 @staticmethod
419 def derive_key(password):
420 # values from
b38d4c94 421 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_linux.cc
982ee69a
MB
422 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16)
423
424 def decrypt(self, encrypted_value):
b38d4c94
MB
425 """
426
427 following the same approach as the fix in [1]: if cookies fail to decrypt then attempt to decrypt
428 with an empty password. The failure detection is not the same as what chromium uses so the
429 results won't be perfect
430
431 References:
432 - [1] https://chromium.googlesource.com/chromium/src/+/bbd54702284caca1f92d656fdcadf2ccca6f4165%5E%21/
433 - a bugfix to try an empty password as a fallback
434 """
982ee69a
MB
435 version = encrypted_value[:3]
436 ciphertext = encrypted_value[3:]
437
438 if version == b'v10':
f59f5ef8 439 self._cookie_counts['v10'] += 1
b38d4c94 440 return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key, self._empty_key), self._logger)
982ee69a
MB
441
442 elif version == b'v11':
f59f5ef8 443 self._cookie_counts['v11'] += 1
982ee69a 444 if self._v11_key is None:
f59f5ef8 445 self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
982ee69a 446 return None
b38d4c94 447 return _decrypt_aes_cbc_multi(ciphertext, (self._v11_key, self._empty_key), self._logger)
982ee69a
MB
448
449 else:
b38d4c94 450 self._logger.warning(f'unknown cookie version: "{version}"', only_once=True)
f59f5ef8 451 self._cookie_counts['other'] += 1
982ee69a
MB
452 return None
453
454
455class MacChromeCookieDecryptor(ChromeCookieDecryptor):
456 def __init__(self, browser_keyring_name, logger):
457 self._logger = logger
f440b14f 458 password = _get_mac_keyring_password(browser_keyring_name, logger)
982ee69a 459 self._v10_key = None if password is None else self.derive_key(password)
f59f5ef8 460 self._cookie_counts = {'v10': 0, 'other': 0}
982ee69a
MB
461
462 @staticmethod
463 def derive_key(password):
464 # values from
b38d4c94 465 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_mac.mm
982ee69a
MB
466 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16)
467
468 def decrypt(self, encrypted_value):
469 version = encrypted_value[:3]
470 ciphertext = encrypted_value[3:]
471
472 if version == b'v10':
f59f5ef8 473 self._cookie_counts['v10'] += 1
982ee69a
MB
474 if self._v10_key is None:
475 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
476 return None
477
b38d4c94 478 return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key,), self._logger)
982ee69a
MB
479
480 else:
f59f5ef8 481 self._cookie_counts['other'] += 1
982ee69a 482 # other prefixes are considered 'old data' which were stored as plaintext
b38d4c94 483 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_mac.mm
982ee69a
MB
484 return encrypted_value
485
486
487class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
488 def __init__(self, browser_root, logger):
489 self._logger = logger
490 self._v10_key = _get_windows_v10_key(browser_root, logger)
f59f5ef8
MB
491 self._cookie_counts = {'v10': 0, 'other': 0}
492
982ee69a
MB
493 def decrypt(self, encrypted_value):
494 version = encrypted_value[:3]
495 ciphertext = encrypted_value[3:]
496
497 if version == b'v10':
f59f5ef8 498 self._cookie_counts['v10'] += 1
982ee69a
MB
499 if self._v10_key is None:
500 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
501 return None
982ee69a 502
b38d4c94 503 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
982ee69a
MB
504 # kNonceLength
505 nonce_length = 96 // 8
506 # boringssl
507 # EVP_AEAD_AES_GCM_TAG_LEN
508 authentication_tag_length = 16
509
510 raw_ciphertext = ciphertext
511 nonce = raw_ciphertext[:nonce_length]
512 ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length]
513 authentication_tag = raw_ciphertext[-authentication_tag_length:]
514
515 return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger)
516
517 else:
f59f5ef8 518 self._cookie_counts['other'] += 1
982ee69a 519 # any other prefix means the data is DPAPI encrypted
b38d4c94 520 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
0f06bcd7 521 return _decrypt_windows_dpapi(encrypted_value, self._logger).decode()
982ee69a
MB
522
523
524def _extract_safari_cookies(profile, logger):
982ee69a 525 if sys.platform != 'darwin':
86e5f3ed 526 raise ValueError(f'unsupported platform: {sys.platform}')
982ee69a 527
a58182b7
NV
528 if profile:
529 cookies_path = os.path.expanduser(profile)
530 if not os.path.isfile(cookies_path):
531 raise FileNotFoundError('custom safari cookies database not found')
532
533 else:
534 cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')
982ee69a 535
1f7db853 536 if not os.path.isfile(cookies_path):
a58182b7
NV
537 logger.debug('Trying secondary cookie location')
538 cookies_path = os.path.expanduser('~/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies')
539 if not os.path.isfile(cookies_path):
540 raise FileNotFoundError('could not find safari cookies database')
982ee69a
MB
541
542 with open(cookies_path, 'rb') as f:
543 cookies_data = f.read()
544
545 jar = parse_safari_cookies(cookies_data, logger=logger)
86e5f3ed 546 logger.info(f'Extracted {len(jar)} cookies from safari')
982ee69a
MB
547 return jar
548
549
550class ParserError(Exception):
551 pass
552
553
554class DataParser:
555 def __init__(self, data, logger):
556 self._data = data
557 self.cursor = 0
558 self._logger = logger
559
560 def read_bytes(self, num_bytes):
561 if num_bytes < 0:
86e5f3ed 562 raise ParserError(f'invalid read of {num_bytes} bytes')
982ee69a
MB
563 end = self.cursor + num_bytes
564 if end > len(self._data):
565 raise ParserError('reached end of input')
566 data = self._data[self.cursor:end]
567 self.cursor = end
568 return data
569
570 def expect_bytes(self, expected_value, message):
571 value = self.read_bytes(len(expected_value))
572 if value != expected_value:
86e5f3ed 573 raise ParserError(f'unexpected value: {value} != {expected_value} ({message})')
982ee69a
MB
574
575 def read_uint(self, big_endian=False):
576 data_format = '>I' if big_endian else '<I'
577 return struct.unpack(data_format, self.read_bytes(4))[0]
578
579 def read_double(self, big_endian=False):
580 data_format = '>d' if big_endian else '<d'
581 return struct.unpack(data_format, self.read_bytes(8))[0]
582
583 def read_cstring(self):
584 buffer = []
585 while True:
586 c = self.read_bytes(1)
587 if c == b'\x00':
0f06bcd7 588 return b''.join(buffer).decode()
982ee69a
MB
589 else:
590 buffer.append(c)
591
592 def skip(self, num_bytes, description='unknown'):
593 if num_bytes > 0:
19a03940 594 self._logger.debug(f'skipping {num_bytes} bytes ({description}): {self.read_bytes(num_bytes)!r}')
982ee69a 595 elif num_bytes < 0:
86e5f3ed 596 raise ParserError(f'invalid skip of {num_bytes} bytes')
982ee69a
MB
597
598 def skip_to(self, offset, description='unknown'):
599 self.skip(offset - self.cursor, description)
600
601 def skip_to_end(self, description='unknown'):
602 self.skip_to(len(self._data), description)
603
604
605def _mac_absolute_time_to_posix(timestamp):
c305a25c 606 return int((dt.datetime(2001, 1, 1, 0, 0, tzinfo=dt.timezone.utc) + dt.timedelta(seconds=timestamp)).timestamp())
982ee69a
MB
607
608
609def _parse_safari_cookies_header(data, logger):
610 p = DataParser(data, logger)
611 p.expect_bytes(b'cook', 'database signature')
612 number_of_pages = p.read_uint(big_endian=True)
613 page_sizes = [p.read_uint(big_endian=True) for _ in range(number_of_pages)]
614 return page_sizes, p.cursor
615
616
617def _parse_safari_cookies_page(data, jar, logger):
618 p = DataParser(data, logger)
619 p.expect_bytes(b'\x00\x00\x01\x00', 'page signature')
620 number_of_cookies = p.read_uint()
621 record_offsets = [p.read_uint() for _ in range(number_of_cookies)]
622 if number_of_cookies == 0:
86e5f3ed 623 logger.debug(f'a cookies page of size {len(data)} has no cookies')
982ee69a
MB
624 return
625
626 p.skip_to(record_offsets[0], 'unknown page header field')
627
97ec5bc5 628 with _create_progress_bar(logger) as progress_bar:
629 for i, record_offset in enumerate(record_offsets):
630 progress_bar.print(f'Loading cookie {i: 6d}/{number_of_cookies: 6d}')
631 p.skip_to(record_offset, 'space between records')
632 record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger)
633 p.read_bytes(record_length)
982ee69a
MB
634 p.skip_to_end('space in between pages')
635
636
637def _parse_safari_cookies_record(data, jar, logger):
638 p = DataParser(data, logger)
639 record_size = p.read_uint()
640 p.skip(4, 'unknown record field 1')
641 flags = p.read_uint()
642 is_secure = bool(flags & 0x0001)
643 p.skip(4, 'unknown record field 2')
644 domain_offset = p.read_uint()
645 name_offset = p.read_uint()
646 path_offset = p.read_uint()
647 value_offset = p.read_uint()
648 p.skip(8, 'unknown record field 3')
649 expiration_date = _mac_absolute_time_to_posix(p.read_double())
650 _creation_date = _mac_absolute_time_to_posix(p.read_double()) # noqa: F841
651
652 try:
653 p.skip_to(domain_offset)
654 domain = p.read_cstring()
655
656 p.skip_to(name_offset)
657 name = p.read_cstring()
658
659 p.skip_to(path_offset)
660 path = p.read_cstring()
661
662 p.skip_to(value_offset)
663 value = p.read_cstring()
664 except UnicodeDecodeError:
f440b14f 665 logger.warning('failed to parse Safari cookie because UTF-8 decoding failed', only_once=True)
982ee69a
MB
666 return record_size
667
668 p.skip_to(record_size, 'space at the end of the record')
669
ac668111 670 cookie = http.cookiejar.Cookie(
982ee69a
MB
671 version=0, name=name, value=value, port=None, port_specified=False,
672 domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'),
673 path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False,
674 comment=None, comment_url=None, rest={})
675 jar.set_cookie(cookie)
676 return record_size
677
678
679def parse_safari_cookies(data, jar=None, logger=YDLLogger()):
680 """
681 References:
682 - https://github.com/libyal/dtformats/blob/main/documentation/Safari%20Cookies.asciidoc
683 - this data appears to be out of date but the important parts of the database structure is the same
684 - there are a few bytes here and there which are skipped during parsing
685 """
686 if jar is None:
687 jar = YoutubeDLCookieJar()
688 page_sizes, body_start = _parse_safari_cookies_header(data, logger)
689 p = DataParser(data[body_start:], logger)
690 for page_size in page_sizes:
691 _parse_safari_cookies_page(p.read_bytes(page_size), jar, logger)
692 p.skip_to_end('footer')
693 return jar
694
695
f59f5ef8
MB
696class _LinuxDesktopEnvironment(Enum):
697 """
698 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.h
699 DesktopEnvironment
700 """
701 OTHER = auto()
702 CINNAMON = auto()
b38d4c94 703 DEEPIN = auto()
f59f5ef8 704 GNOME = auto()
b38d4c94
MB
705 KDE3 = auto()
706 KDE4 = auto()
707 KDE5 = auto()
708 KDE6 = auto()
f59f5ef8 709 PANTHEON = auto()
b38d4c94 710 UKUI = auto()
f59f5ef8
MB
711 UNITY = auto()
712 XFCE = auto()
b38d4c94 713 LXQT = auto()
982ee69a
MB
714
715
f59f5ef8
MB
716class _LinuxKeyring(Enum):
717 """
b38d4c94 718 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_util_linux.h
f59f5ef8
MB
719 SelectedLinuxBackend
720 """
2e023649 721 KWALLET = auto() # KDE4
b38d4c94
MB
722 KWALLET5 = auto()
723 KWALLET6 = auto()
2e023649 724 GNOMEKEYRING = auto()
725 BASICTEXT = auto()
f59f5ef8
MB
726
727
728SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys()
729
730
b38d4c94 731def _get_linux_desktop_environment(env, logger):
f59f5ef8
MB
732 """
733 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc
734 GetDesktopEnvironment
735 """
736 xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
737 desktop_session = env.get('DESKTOP_SESSION', None)
738 if xdg_current_desktop is not None:
739 xdg_current_desktop = xdg_current_desktop.split(':')[0].strip()
740
741 if xdg_current_desktop == 'Unity':
742 if desktop_session is not None and 'gnome-fallback' in desktop_session:
743 return _LinuxDesktopEnvironment.GNOME
744 else:
745 return _LinuxDesktopEnvironment.UNITY
b38d4c94
MB
746 elif xdg_current_desktop == 'Deepin':
747 return _LinuxDesktopEnvironment.DEEPIN
f59f5ef8
MB
748 elif xdg_current_desktop == 'GNOME':
749 return _LinuxDesktopEnvironment.GNOME
750 elif xdg_current_desktop == 'X-Cinnamon':
751 return _LinuxDesktopEnvironment.CINNAMON
752 elif xdg_current_desktop == 'KDE':
b38d4c94
MB
753 kde_version = env.get('KDE_SESSION_VERSION', None)
754 if kde_version == '5':
755 return _LinuxDesktopEnvironment.KDE5
756 elif kde_version == '6':
757 return _LinuxDesktopEnvironment.KDE6
758 elif kde_version == '4':
759 return _LinuxDesktopEnvironment.KDE4
760 else:
761 logger.info(f'unknown KDE version: "{kde_version}". Assuming KDE4')
762 return _LinuxDesktopEnvironment.KDE4
f59f5ef8
MB
763 elif xdg_current_desktop == 'Pantheon':
764 return _LinuxDesktopEnvironment.PANTHEON
765 elif xdg_current_desktop == 'XFCE':
766 return _LinuxDesktopEnvironment.XFCE
b38d4c94
MB
767 elif xdg_current_desktop == 'UKUI':
768 return _LinuxDesktopEnvironment.UKUI
769 elif xdg_current_desktop == 'LXQt':
770 return _LinuxDesktopEnvironment.LXQT
771 else:
772 logger.info(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"')
773
f59f5ef8 774 elif desktop_session is not None:
b38d4c94
MB
775 if desktop_session == 'deepin':
776 return _LinuxDesktopEnvironment.DEEPIN
777 elif desktop_session in ('mate', 'gnome'):
f59f5ef8 778 return _LinuxDesktopEnvironment.GNOME
b38d4c94
MB
779 elif desktop_session in ('kde4', 'kde-plasma'):
780 return _LinuxDesktopEnvironment.KDE4
781 elif desktop_session == 'kde':
782 if 'KDE_SESSION_VERSION' in env:
783 return _LinuxDesktopEnvironment.KDE4
784 else:
785 return _LinuxDesktopEnvironment.KDE3
786 elif 'xfce' in desktop_session or desktop_session == 'xubuntu':
f59f5ef8 787 return _LinuxDesktopEnvironment.XFCE
b38d4c94
MB
788 elif desktop_session == 'ukui':
789 return _LinuxDesktopEnvironment.UKUI
790 else:
791 logger.info(f'DESKTOP_SESSION is set to an unknown value: "{desktop_session}"')
792
f59f5ef8
MB
793 else:
794 if 'GNOME_DESKTOP_SESSION_ID' in env:
795 return _LinuxDesktopEnvironment.GNOME
796 elif 'KDE_FULL_SESSION' in env:
b38d4c94
MB
797 if 'KDE_SESSION_VERSION' in env:
798 return _LinuxDesktopEnvironment.KDE4
799 else:
800 return _LinuxDesktopEnvironment.KDE3
fa8fd951 801 return _LinuxDesktopEnvironment.OTHER
f59f5ef8
MB
802
803
804def _choose_linux_keyring(logger):
805 """
b38d4c94
MB
806 SelectBackend in [1]
807
808 There is currently support for forcing chromium to use BASIC_TEXT by creating a file called
809 `Disable Local Encryption` [1] in the user data dir. The function to write this file (`WriteBackendUse()` [1])
810 does not appear to be called anywhere other than in tests, so the user would have to create this file manually
811 and so would be aware enough to tell yt-dlp to use the BASIC_TEXT keyring.
812
813 References:
814 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_util_linux.cc
f59f5ef8 815 """
b38d4c94 816 desktop_environment = _get_linux_desktop_environment(os.environ, logger)
86e5f3ed 817 logger.debug(f'detected desktop environment: {desktop_environment.name}')
b38d4c94 818 if desktop_environment == _LinuxDesktopEnvironment.KDE4:
2e023649 819 linux_keyring = _LinuxKeyring.KWALLET
b38d4c94
MB
820 elif desktop_environment == _LinuxDesktopEnvironment.KDE5:
821 linux_keyring = _LinuxKeyring.KWALLET5
822 elif desktop_environment == _LinuxDesktopEnvironment.KDE6:
823 linux_keyring = _LinuxKeyring.KWALLET6
824 elif desktop_environment in (
825 _LinuxDesktopEnvironment.KDE3, _LinuxDesktopEnvironment.LXQT, _LinuxDesktopEnvironment.OTHER
826 ):
2e023649 827 linux_keyring = _LinuxKeyring.BASICTEXT
982ee69a 828 else:
2e023649 829 linux_keyring = _LinuxKeyring.GNOMEKEYRING
f59f5ef8
MB
830 return linux_keyring
831
832
b38d4c94 833def _get_kwallet_network_wallet(keyring, logger):
f59f5ef8
MB
834 """ The name of the wallet used to store network passwords.
835
b38d4c94 836 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/kwallet_dbus.cc
f59f5ef8
MB
837 KWalletDBus::NetworkWallet
838 which does a dbus call to the following function:
839 https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html
840 Wallet::NetworkWallet
841 """
842 default_wallet = 'kdewallet'
843 try:
2e023649 844 if keyring == _LinuxKeyring.KWALLET:
b38d4c94
MB
845 service_name = 'org.kde.kwalletd'
846 wallet_path = '/modules/kwalletd'
847 elif keyring == _LinuxKeyring.KWALLET5:
848 service_name = 'org.kde.kwalletd5'
849 wallet_path = '/modules/kwalletd5'
850 elif keyring == _LinuxKeyring.KWALLET6:
851 service_name = 'org.kde.kwalletd6'
852 wallet_path = '/modules/kwalletd6'
853 else:
854 raise ValueError(keyring)
855
f0c9fb96 856 stdout, _, returncode = Popen.run([
f59f5ef8 857 'dbus-send', '--session', '--print-reply=literal',
b38d4c94
MB
858 f'--dest={service_name}',
859 wallet_path,
f59f5ef8 860 'org.kde.KWallet.networkWallet'
f0c9fb96 861 ], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
f59f5ef8 862
f0c9fb96 863 if returncode:
f59f5ef8
MB
864 logger.warning('failed to read NetworkWallet')
865 return default_wallet
866 else:
f0c9fb96 867 logger.debug(f'NetworkWallet = "{stdout.strip()}"')
868 return stdout.strip()
a44ca5a4 869 except Exception as e:
86e5f3ed 870 logger.warning(f'exception while obtaining NetworkWallet: {e}')
f59f5ef8
MB
871 return default_wallet
872
873
b38d4c94
MB
874def _get_kwallet_password(browser_keyring_name, keyring, logger):
875 logger.debug(f'using kwallet-query to obtain password from {keyring.name}')
f59f5ef8
MB
876
877 if shutil.which('kwallet-query') is None:
878 logger.error('kwallet-query command not found. KWallet and kwallet-query '
879 'must be installed to read from KWallet. kwallet-query should be'
880 'included in the kwallet package for your distribution')
881 return b''
882
b38d4c94 883 network_wallet = _get_kwallet_network_wallet(keyring, logger)
f59f5ef8
MB
884
885 try:
f0c9fb96 886 stdout, _, returncode = Popen.run([
f59f5ef8 887 'kwallet-query',
86e5f3ed 888 '--read-password', f'{browser_keyring_name} Safe Storage',
889 '--folder', f'{browser_keyring_name} Keys',
f59f5ef8
MB
890 network_wallet
891 ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
892
f0c9fb96 893 if returncode:
894 logger.error(f'kwallet-query failed with return code {returncode}. '
895 'Please consult the kwallet-query man page for details')
f59f5ef8
MB
896 return b''
897 else:
898 if stdout.lower().startswith(b'failed to read'):
899 logger.debug('failed to read password from kwallet. Using empty string instead')
900 # this sometimes occurs in KDE because chrome does not check hasEntry and instead
901 # just tries to read the value (which kwallet returns "") whereas kwallet-query
902 # checks hasEntry. To verify this:
903 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
904 # while starting chrome.
b38d4c94
MB
905 # this was identified as a bug later and fixed in
906 # https://chromium.googlesource.com/chromium/src/+/bbd54702284caca1f92d656fdcadf2ccca6f4165%5E%21/#F0
907 # https://chromium.googlesource.com/chromium/src/+/5463af3c39d7f5b6d11db7fbd51e38cc1974d764
f59f5ef8
MB
908 return b''
909 else:
910 logger.debug('password found')
f0c9fb96 911 return stdout.rstrip(b'\n')
a44ca5a4 912 except Exception as e:
913 logger.warning(f'exception running kwallet-query: {error_to_str(e)}')
f59f5ef8
MB
914 return b''
915
916
917def _get_gnome_keyring_password(browser_keyring_name, logger):
9b8ee23b 918 if not secretstorage:
919 logger.error(f'secretstorage not available {_SECRETSTORAGE_UNAVAILABLE_REASON}')
f59f5ef8
MB
920 return b''
921 # the Gnome keyring does not seem to organise keys in the same way as KWallet,
922 # using `dbus-monitor` during startup, it can be observed that chromium lists all keys
923 # and presumably searches for its key in the list. It appears that we must do the same.
924 # https://github.com/jaraco/keyring/issues/556
925 with contextlib.closing(secretstorage.dbus_init()) as con:
926 col = secretstorage.get_default_collection(con)
927 for item in col.get_all_items():
86e5f3ed 928 if item.get_label() == f'{browser_keyring_name} Safe Storage':
f59f5ef8
MB
929 return item.get_secret()
930 else:
931 logger.error('failed to read from keyring')
932 return b''
933
934
935def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
936 # note: chrome/chromium can be run with the following flags to determine which keyring backend
937 # it has chosen to use
938 # chromium --enable-logging=stderr --v=1 2>&1 | grep key_storage_
939 # Chromium supports a flag: --password-store=<basic|gnome|kwallet> so the automatic detection
940 # will not be sufficient in all cases.
941
2c539d49 942 keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger)
f59f5ef8
MB
943 logger.debug(f'Chosen keyring: {keyring.name}')
944
2e023649 945 if keyring in (_LinuxKeyring.KWALLET, _LinuxKeyring.KWALLET5, _LinuxKeyring.KWALLET6):
b38d4c94 946 return _get_kwallet_password(browser_keyring_name, keyring, logger)
2e023649 947 elif keyring == _LinuxKeyring.GNOMEKEYRING:
f59f5ef8 948 return _get_gnome_keyring_password(browser_keyring_name, logger)
2e023649 949 elif keyring == _LinuxKeyring.BASICTEXT:
f59f5ef8
MB
950 # when basic text is chosen, all cookies are stored as v10 (so no keyring password is required)
951 return None
952 assert False, f'Unknown keyring {keyring}'
953
954
955def _get_mac_keyring_password(browser_keyring_name, logger):
956 logger.debug('using find-generic-password to obtain password from OSX keychain')
957 try:
46a5b335 958 stdout, _, returncode = Popen.run(
d3c93ec2 959 ['security', 'find-generic-password',
960 '-w', # write password to stdout
961 '-a', browser_keyring_name, # match 'account'
86e5f3ed 962 '-s', f'{browser_keyring_name} Safe Storage'], # match 'service'
d3c93ec2 963 stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
46a5b335 964 if returncode:
965 logger.warning('find-generic-password failed')
966 return None
f0c9fb96 967 return stdout.rstrip(b'\n')
a44ca5a4 968 except Exception as e:
969 logger.warning(f'exception running find-generic-password: {error_to_str(e)}')
f59f5ef8 970 return None
982ee69a
MB
971
972
973def _get_windows_v10_key(browser_root, logger):
b38d4c94
MB
974 """
975 References:
976 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
977 """
cbed249a 978 path = _newest(_find_files(browser_root, 'Local State', logger))
982ee69a
MB
979 if path is None:
980 logger.error('could not find local state file')
981 return None
97ec5bc5 982 logger.debug(f'Found local state file at "{path}"')
86e5f3ed 983 with open(path, encoding='utf8') as f:
982ee69a
MB
984 data = json.load(f)
985 try:
b38d4c94 986 # kOsCryptEncryptedKeyPrefName in [1]
982ee69a
MB
987 base64_key = data['os_crypt']['encrypted_key']
988 except KeyError:
989 logger.error('no encrypted key in Local State')
990 return None
14f25df2 991 encrypted_key = base64.b64decode(base64_key)
b38d4c94 992 # kDPAPIKeyPrefix in [1]
982ee69a
MB
993 prefix = b'DPAPI'
994 if not encrypted_key.startswith(prefix):
995 logger.error('invalid key')
996 return None
997 return _decrypt_windows_dpapi(encrypted_key[len(prefix):], logger)
998
999
1000def pbkdf2_sha1(password, salt, iterations, key_length):
1001 return pbkdf2_hmac('sha1', password, salt, iterations, key_length)
1002
1003
b38d4c94
MB
1004def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' ' * 16):
1005 for key in keys:
1006 plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
1007 try:
1008 return plaintext.decode()
1009 except UnicodeDecodeError:
1010 pass
1011 logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
1012 return None
982ee69a
MB
1013
1014
1015def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
982ee69a 1016 try:
09906f55 1017 plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce)
982ee69a 1018 except ValueError:
f440b14f 1019 logger.warning('failed to decrypt cookie (AES-GCM) because the MAC check failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
1020 return None
1021
1022 try:
0f06bcd7 1023 return plaintext.decode()
982ee69a 1024 except UnicodeDecodeError:
f440b14f 1025 logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
1026 return None
1027
1028
1029def _decrypt_windows_dpapi(ciphertext, logger):
1030 """
1031 References:
1032 - https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata
1033 """
fe0918bb 1034
1035 import ctypes
1036 import ctypes.wintypes
982ee69a
MB
1037
1038 class DATA_BLOB(ctypes.Structure):
fe0918bb 1039 _fields_ = [('cbData', ctypes.wintypes.DWORD),
982ee69a
MB
1040 ('pbData', ctypes.POINTER(ctypes.c_char))]
1041
1042 buffer = ctypes.create_string_buffer(ciphertext)
1043 blob_in = DATA_BLOB(ctypes.sizeof(buffer), buffer)
1044 blob_out = DATA_BLOB()
1045 ret = ctypes.windll.crypt32.CryptUnprotectData(
1046 ctypes.byref(blob_in), # pDataIn
1047 None, # ppszDataDescr: human readable description of pDataIn
1048 None, # pOptionalEntropy: salt?
1049 None, # pvReserved: must be NULL
1050 None, # pPromptStruct: information about prompts to display
1051 0, # dwFlags
1052 ctypes.byref(blob_out) # pDataOut
1053 )
1054 if not ret:
f9be9cb9 1055 logger.warning('failed to decrypt with DPAPI', only_once=True)
982ee69a
MB
1056 return None
1057
1058 result = ctypes.string_at(blob_out.pbData, blob_out.cbData)
1059 ctypes.windll.kernel32.LocalFree(blob_out.pbData)
1060 return result
1061
1062
1063def _config_home():
1064 return os.environ.get('XDG_CONFIG_HOME', os.path.expanduser('~/.config'))
1065
1066
1067def _open_database_copy(database_path, tmpdir):
1068 # cannot open sqlite databases if they are already in use (e.g. by the browser)
1069 database_copy_path = os.path.join(tmpdir, 'temporary.sqlite')
1070 shutil.copy(database_path, database_copy_path)
1071 conn = sqlite3.connect(database_copy_path)
1072 return conn.cursor()
1073
1074
1075def _get_column_names(cursor, table_name):
86e5f3ed 1076 table_info = cursor.execute(f'PRAGMA table_info({table_name})').fetchall()
0f06bcd7 1077 return [row[1].decode() for row in table_info]
982ee69a
MB
1078
1079
cbed249a
SS
1080def _newest(files):
1081 return max(files, key=lambda path: os.lstat(path).st_mtime, default=None)
1082
1083
1084def _find_files(root, filename, logger):
982ee69a 1085 # if there are multiple browser profiles, take the most recently used one
cbed249a 1086 i = 0
97ec5bc5 1087 with _create_progress_bar(logger) as progress_bar:
cbed249a 1088 for curr_root, _, files in os.walk(root):
97ec5bc5 1089 for file in files:
1090 i += 1
1091 progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched')
1092 if file == filename:
cbed249a 1093 yield os.path.join(curr_root, file)
982ee69a
MB
1094
1095
1096def _merge_cookie_jars(jars):
1097 output_jar = YoutubeDLCookieJar()
1098 for jar in jars:
1099 for cookie in jar:
1100 output_jar.set_cookie(cookie)
1101 if jar.filename is not None:
1102 output_jar.filename = jar.filename
1103 return output_jar
1104
1105
1106def _is_path(value):
cbed249a 1107 return any(sep in value for sep in (os.path.sep, os.path.altsep) if sep)
982ee69a
MB
1108
1109
9bd13fe5 1110def _parse_browser_specification(browser_name, profile=None, keyring=None, container=None):
982ee69a
MB
1111 if browser_name not in SUPPORTED_BROWSERS:
1112 raise ValueError(f'unsupported browser: "{browser_name}"')
f59f5ef8
MB
1113 if keyring not in (None, *SUPPORTED_KEYRINGS):
1114 raise ValueError(f'unsupported keyring: "{keyring}"')
2b24afa6 1115 if profile is not None and _is_path(expand_path(profile)):
1116 profile = expand_path(profile)
9bd13fe5 1117 return browser_name, profile, keyring, container
8817a80d
SS
1118
1119
1120class LenientSimpleCookie(http.cookies.SimpleCookie):
1121 """More lenient version of http.cookies.SimpleCookie"""
1122 # From https://github.com/python/cpython/blob/v3.10.7/Lib/http/cookies.py
36069409
SS
1123 # We use Morsel's legal key chars to avoid errors on setting values
1124 _LEGAL_KEY_CHARS = r'\w\d' + re.escape('!#$%&\'*+-.:^_`|~')
1125 _LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + re.escape('(),/<=>?@[]{}')
8817a80d
SS
1126
1127 _RESERVED = {
1128 "expires",
1129 "path",
1130 "comment",
1131 "domain",
1132 "max-age",
1133 "secure",
1134 "httponly",
1135 "version",
1136 "samesite",
1137 }
1138
1139 _FLAGS = {"secure", "httponly"}
1140
1141 # Added 'bad' group to catch the remaining value
1142 _COOKIE_PATTERN = re.compile(r"""
1143 \s* # Optional whitespace at start of cookie
1144 (?P<key> # Start of group 'key'
1145 [""" + _LEGAL_KEY_CHARS + r"""]+?# Any word of at least one letter
1146 ) # End of group 'key'
1147 ( # Optional group: there may not be a value.
1148 \s*=\s* # Equal Sign
1149 ( # Start of potential value
1150 (?P<val> # Start of group 'val'
1151 "(?:[^\\"]|\\.)*" # Any doublequoted string
1152 | # or
1153 \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr
1154 | # or
1155 [""" + _LEGAL_VALUE_CHARS + r"""]* # Any word or empty string
1156 ) # End of group 'val'
1157 | # or
1158 (?P<bad>(?:\\;|[^;])*?) # 'bad' group fallback for invalid values
1159 ) # End of potential value
1160 )? # End of optional value group
1161 \s* # Any number of spaces.
1162 (\s+|;|$) # Ending either at space, semicolon, or EOS.
1163 """, re.ASCII | re.VERBOSE)
1164
1165 def load(self, data):
1166 # Workaround for https://github.com/yt-dlp/yt-dlp/issues/4776
1167 if not isinstance(data, str):
1168 return super().load(data)
1169
1170 morsel = None
36069409
SS
1171 for match in self._COOKIE_PATTERN.finditer(data):
1172 if match.group('bad'):
8817a80d
SS
1173 morsel = None
1174 continue
1175
36069409 1176 key, value = match.group('key', 'val')
8817a80d 1177
36069409
SS
1178 is_attribute = False
1179 if key.startswith('$'):
1180 key = key[1:]
1181 is_attribute = True
8817a80d
SS
1182
1183 lower_key = key.lower()
1184 if lower_key in self._RESERVED:
1185 if morsel is None:
1186 continue
1187
1188 if value is None:
1189 if lower_key not in self._FLAGS:
1190 morsel = None
1191 continue
1192 value = True
1193 else:
1194 value, _ = self.value_decode(value)
1195
1196 morsel[key] = value
1197
36069409
SS
1198 elif is_attribute:
1199 morsel = None
1200
8817a80d
SS
1201 elif value is not None:
1202 morsel = self.get(key, http.cookies.Morsel())
1203 real_value, coded_value = self.value_decode(value)
1204 morsel.set(key, real_value, coded_value)
1205 self[key] = morsel
1206
1207 else:
1208 morsel = None
b87e01c1 1209
1210
1211class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
1212 """
1213 See [1] for cookie file format.
1214
1215 1. https://curl.haxx.se/docs/http-cookies.html
1216 """
1217 _HTTPONLY_PREFIX = '#HttpOnly_'
1218 _ENTRY_LEN = 7
1219 _HEADER = '''# Netscape HTTP Cookie File
1220# This file is generated by yt-dlp. Do not edit.
1221
1222'''
1223 _CookieFileEntry = collections.namedtuple(
1224 'CookieFileEntry',
1225 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
1226
1227 def __init__(self, filename=None, *args, **kwargs):
1228 super().__init__(None, *args, **kwargs)
1229 if is_path_like(filename):
1230 filename = os.fspath(filename)
1231 self.filename = filename
1232
1233 @staticmethod
1234 def _true_or_false(cndn):
1235 return 'TRUE' if cndn else 'FALSE'
1236
1237 @contextlib.contextmanager
1238 def open(self, file, *, write=False):
1239 if is_path_like(file):
1240 with open(file, 'w' if write else 'r', encoding='utf-8') as f:
1241 yield f
1242 else:
1243 if write:
1244 file.truncate(0)
1245 yield file
1246
62b5c94c 1247 def _really_save(self, f, ignore_discard, ignore_expires):
b87e01c1 1248 now = time.time()
1249 for cookie in self:
1250 if (not ignore_discard and cookie.discard
1251 or not ignore_expires and cookie.is_expired(now)):
1252 continue
1253 name, value = cookie.name, cookie.value
1254 if value is None:
1255 # cookies.txt regards 'Set-Cookie: foo' as a cookie
1256 # with no name, whereas http.cookiejar regards it as a
1257 # cookie with no value.
1258 name, value = '', name
1259 f.write('%s\n' % '\t'.join((
1260 cookie.domain,
1261 self._true_or_false(cookie.domain.startswith('.')),
1262 cookie.path,
1263 self._true_or_false(cookie.secure),
1264 str_or_none(cookie.expires, default=''),
1265 name, value
1266 )))
1267
62b5c94c 1268 def save(self, filename=None, ignore_discard=True, ignore_expires=True):
b87e01c1 1269 """
1270 Save cookies to a file.
1271 Code is taken from CPython 3.6
1272 https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """
1273
1274 if filename is None:
1275 if self.filename is not None:
1276 filename = self.filename
1277 else:
1278 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
1279
1280 # Store session cookies with `expires` set to 0 instead of an empty string
1281 for cookie in self:
1282 if cookie.expires is None:
1283 cookie.expires = 0
1284
1285 with self.open(filename, write=True) as f:
1286 f.write(self._HEADER)
62b5c94c 1287 self._really_save(f, ignore_discard, ignore_expires)
b87e01c1 1288
62b5c94c 1289 def load(self, filename=None, ignore_discard=True, ignore_expires=True):
b87e01c1 1290 """Load cookies from a file."""
1291 if filename is None:
1292 if self.filename is not None:
1293 filename = self.filename
1294 else:
1295 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
1296
1297 def prepare_line(line):
1298 if line.startswith(self._HTTPONLY_PREFIX):
1299 line = line[len(self._HTTPONLY_PREFIX):]
1300 # comments and empty lines are fine
1301 if line.startswith('#') or not line.strip():
1302 return line
1303 cookie_list = line.split('\t')
1304 if len(cookie_list) != self._ENTRY_LEN:
1305 raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list))
1306 cookie = self._CookieFileEntry(*cookie_list)
1307 if cookie.expires_at and not cookie.expires_at.isdigit():
1308 raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
1309 return line
1310
1311 cf = io.StringIO()
1312 with self.open(filename) as f:
1313 for line in f:
1314 try:
1315 cf.write(prepare_line(line))
1316 except http.cookiejar.LoadError as e:
1317 if f'{line.strip()} '[0] in '[{"':
1318 raise http.cookiejar.LoadError(
1319 'Cookies file must be Netscape formatted, not JSON. See '
1320 'https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp')
1321 write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
1322 continue
1323 cf.seek(0)
1324 self._really_load(cf, filename, ignore_discard, ignore_expires)
1325 # Session cookies are denoted by either `expires` field set to
1326 # an empty string or 0. MozillaCookieJar only recognizes the former
1327 # (see [1]). So we need force the latter to be recognized as session
1328 # cookies on our own.
1329 # Session cookies may be important for cookies-based authentication,
1330 # e.g. usually, when user does not check 'Remember me' check box while
1331 # logging in on a site, some important cookies are stored as session
1332 # cookies so that not recognizing them will result in failed login.
1333 # 1. https://bugs.python.org/issue17164
1334 for cookie in self:
1335 # Treat `expires=0` cookies as session cookies
1336 if cookie.expires == 0:
1337 cookie.expires = None
1338 cookie.discard = True
1339
1340 def get_cookie_header(self, url):
1341 """Generate a Cookie HTTP header for a given url"""
4bf91228 1342 cookie_req = urllib.request.Request(normalize_url(sanitize_url(url)))
b87e01c1 1343 self.add_cookie_header(cookie_req)
1344 return cookie_req.get_header('Cookie')
ad54c913 1345
1ceb657b 1346 def get_cookies_for_url(self, url):
1347 """Generate a list of Cookie objects for a given url"""
1348 # Policy `_now` attribute must be set before calling `_cookies_for_request`
1349 # Ref: https://github.com/python/cpython/blob/3.7/Lib/http/cookiejar.py#L1360
1350 self._policy._now = self._now = int(time.time())
4bf91228 1351 return self._cookies_for_request(urllib.request.Request(normalize_url(sanitize_url(url))))
1ceb657b 1352
ad54c913 1353 def clear(self, *args, **kwargs):
1354 with contextlib.suppress(KeyError):
1355 return super().clear(*args, **kwargs)