]> jfr.im git - yt-dlp.git/blame - yt_dlp/cookies.py
[ie/Wimbledon] Add extractor (#7551)
[yt-dlp.git] / yt_dlp / cookies.py
CommitLineData
14f25df2 1import base64
b87e01c1 2import collections
f59f5ef8 3import contextlib
54007a45 4import http.cookiejar
8817a80d 5import http.cookies
b87e01c1 6import io
982ee69a
MB
7import json
8import os
9bd13fe5 9import re
982ee69a 10import shutil
982ee69a
MB
11import struct
12import subprocess
13import sys
14import tempfile
2e4585da 15import time
b87e01c1 16import urllib.request
982ee69a 17from datetime import datetime, timedelta, timezone
f59f5ef8 18from enum import Enum, auto
982ee69a
MB
19from hashlib import pbkdf2_hmac
20
1d3586d0 21from .aes import (
22 aes_cbc_decrypt_bytes,
23 aes_gcm_decrypt_and_verify_bytes,
24 unpad_pkcs7,
25)
9b7a48ab 26from .compat import functools
9b8ee23b 27from .dependencies import (
28 _SECRETSTORAGE_UNAVAILABLE_REASON,
29 secretstorage,
30 sqlite3,
31)
97ec5bc5 32from .minicurses import MultilinePrinter, QuietMultilinePrinter
d2c8aadf 33from .utils import (
34 Popen,
d2c8aadf 35 error_to_str,
b87e01c1 36 escape_url,
d2c8aadf 37 expand_path,
22df97f9 38 is_path_like,
b87e01c1 39 sanitize_url,
40 str_or_none,
d2c8aadf 41 try_call,
b87e01c1 42 write_string,
d2c8aadf 43)
1b392f90 44from .utils._utils import _YDLLogger
982ee69a 45
982ee69a
MB
46CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
47SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
48
49
1b392f90 50class YDLLogger(_YDLLogger):
51 def warning(self, message, only_once=False): # compat
52 return super().warning(message, once=only_once)
982ee69a 53
2e4585da 54 class ProgressBar(MultilinePrinter):
55 _DELAY, _timer = 0.1, 0
56
57 def print(self, message):
58 if time.time() - self._timer > self._DELAY:
59 self.print_at_line(f'[Cookies] {message}', 0)
60 self._timer = time.time()
61
97ec5bc5 62 def progress_bar(self):
63 """Return a context manager with a print method. (Optional)"""
64 # Do not print to files/pipes, loggers, or when --no-progress is used
65 if not self._ydl or self._ydl.params.get('noprogress') or self._ydl.params.get('logger'):
66 return
591bb9d3 67 file = self._ydl._out_files.error
97ec5bc5 68 try:
69 if not file.isatty():
70 return
71 except BaseException:
72 return
2e4585da 73 return self.ProgressBar(file, preserve_output=False)
97ec5bc5 74
75
76def _create_progress_bar(logger):
77 if hasattr(logger, 'progress_bar'):
78 printer = logger.progress_bar()
79 if printer:
80 return printer
81 printer = QuietMultilinePrinter()
82 printer.print = lambda _: None
83 return printer
84
982ee69a
MB
85
86def load_cookies(cookie_file, browser_specification, ydl):
87 cookie_jars = []
88 if browser_specification is not None:
9bd13fe5 89 browser_name, profile, keyring, container = _parse_browser_specification(*browser_specification)
90 cookie_jars.append(
91 extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring, container=container))
982ee69a
MB
92
93 if cookie_file is not None:
22df97f9 94 is_filename = is_path_like(cookie_file)
d76fa1f3 95 if is_filename:
96 cookie_file = expand_path(cookie_file)
97
982ee69a 98 jar = YoutubeDLCookieJar(cookie_file)
d76fa1f3 99 if not is_filename or os.access(cookie_file, os.R_OK):
62b5c94c 100 jar.load()
982ee69a
MB
101 cookie_jars.append(jar)
102
103 return _merge_cookie_jars(cookie_jars)
104
105
9bd13fe5 106def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None, container=None):
982ee69a 107 if browser_name == 'firefox':
9bd13fe5 108 return _extract_firefox_cookies(profile, container, logger)
982ee69a
MB
109 elif browser_name == 'safari':
110 return _extract_safari_cookies(profile, logger)
111 elif browser_name in CHROMIUM_BASED_BROWSERS:
f59f5ef8 112 return _extract_chrome_cookies(browser_name, profile, keyring, logger)
982ee69a 113 else:
86e5f3ed 114 raise ValueError(f'unknown browser: {browser_name}')
982ee69a
MB
115
116
9bd13fe5 117def _extract_firefox_cookies(profile, container, logger):
982ee69a 118 logger.info('Extracting cookies from firefox')
9b8ee23b 119 if not sqlite3:
767b02a9
MB
120 logger.warning('Cannot extract cookies from firefox without sqlite3 support. '
121 'Please use a python interpreter compiled with sqlite3 support')
122 return YoutubeDLCookieJar()
982ee69a
MB
123
124 if profile is None:
125 search_root = _firefox_browser_dir()
126 elif _is_path(profile):
127 search_root = profile
128 else:
129 search_root = os.path.join(_firefox_browser_dir(), profile)
130
825d3ce3 131 cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite', logger)
132 if cookie_database_path is None:
133 raise FileNotFoundError(f'could not find firefox cookies database in {search_root}')
134 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
135
9bd13fe5 136 container_id = None
825d3ce3 137 if container not in (None, 'none'):
138 containers_path = os.path.join(os.path.dirname(cookie_database_path), 'containers.json')
9bd13fe5 139 if not os.path.isfile(containers_path) or not os.access(containers_path, os.R_OK):
140 raise FileNotFoundError(f'could not read containers.json in {search_root}')
d2c8aadf 141 with open(containers_path) as containers:
9bd13fe5 142 identities = json.load(containers).get('identities', [])
143 container_id = next((context.get('userContextId') for context in identities if container in (
144 context.get('name'),
145 try_call(lambda: re.fullmatch(r'userContext([^\.]+)\.label', context['l10nID']).group())
146 )), None)
147 if not isinstance(container_id, int):
148 raise ValueError(f'could not find firefox container "{container}" in containers.json')
149
0930b11f 150 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
982ee69a
MB
151 cursor = None
152 try:
153 cursor = _open_database_copy(cookie_database_path, tmpdir)
9bd13fe5 154 if isinstance(container_id, int):
9bd13fe5 155 logger.debug(
156 f'Only loading cookies from firefox container "{container}", ID {container_id}')
9bd13fe5 157 cursor.execute(
825d3ce3 158 'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE originAttributes LIKE ? OR originAttributes LIKE ?',
159 (f'%userContextId={container_id}', f'%userContextId={container_id}&%'))
160 elif container == 'none':
161 logger.debug('Only loading cookies not belonging to any container')
162 cursor.execute(
163 'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE NOT INSTR(originAttributes,"userContextId=")')
164 else:
9bd13fe5 165 cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
982ee69a 166 jar = YoutubeDLCookieJar()
97ec5bc5 167 with _create_progress_bar(logger) as progress_bar:
168 table = cursor.fetchall()
169 total_cookie_count = len(table)
170 for i, (host, name, value, path, expiry, is_secure) in enumerate(table):
171 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
ac668111 172 cookie = http.cookiejar.Cookie(
97ec5bc5 173 version=0, name=name, value=value, port=None, port_specified=False,
174 domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'),
175 path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False,
176 comment=None, comment_url=None, rest={})
177 jar.set_cookie(cookie)
86e5f3ed 178 logger.info(f'Extracted {len(jar)} cookies from firefox')
982ee69a
MB
179 return jar
180 finally:
181 if cursor is not None:
182 cursor.connection.close()
183
184
185def _firefox_browser_dir():
dec30912 186 if sys.platform in ('cygwin', 'win32'):
19a03940 187 return os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles')
982ee69a
MB
188 elif sys.platform == 'darwin':
189 return os.path.expanduser('~/Library/Application Support/Firefox')
dec30912 190 return os.path.expanduser('~/.mozilla/firefox')
982ee69a
MB
191
192
193def _get_chromium_based_browser_settings(browser_name):
194 # https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md
dec30912 195 if sys.platform in ('cygwin', 'win32'):
982ee69a
MB
196 appdata_local = os.path.expandvars('%LOCALAPPDATA%')
197 appdata_roaming = os.path.expandvars('%APPDATA%')
198 browser_dir = {
19a03940 199 'brave': os.path.join(appdata_local, R'BraveSoftware\Brave-Browser\User Data'),
200 'chrome': os.path.join(appdata_local, R'Google\Chrome\User Data'),
201 'chromium': os.path.join(appdata_local, R'Chromium\User Data'),
202 'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
203 'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
204 'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
982ee69a
MB
205 }[browser_name]
206
207 elif sys.platform == 'darwin':
208 appdata = os.path.expanduser('~/Library/Application Support')
209 browser_dir = {
210 'brave': os.path.join(appdata, 'BraveSoftware/Brave-Browser'),
211 'chrome': os.path.join(appdata, 'Google/Chrome'),
212 'chromium': os.path.join(appdata, 'Chromium'),
213 'edge': os.path.join(appdata, 'Microsoft Edge'),
214 'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
215 'vivaldi': os.path.join(appdata, 'Vivaldi'),
216 }[browser_name]
217
218 else:
dec30912
CMT
219 config = _config_home()
220 browser_dir = {
221 'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'),
222 'chrome': os.path.join(config, 'google-chrome'),
223 'chromium': os.path.join(config, 'chromium'),
224 'edge': os.path.join(config, 'microsoft-edge'),
225 'opera': os.path.join(config, 'opera'),
226 'vivaldi': os.path.join(config, 'vivaldi'),
227 }[browser_name]
982ee69a
MB
228
229 # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
230 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
231 keyring_name = {
232 'brave': 'Brave',
233 'chrome': 'Chrome',
234 'chromium': 'Chromium',
29b208f6 235 'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
982ee69a
MB
236 'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
237 'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
238 }[browser_name]
239
240 browsers_without_profiles = {'opera'}
241
242 return {
243 'browser_dir': browser_dir,
244 'keyring_name': keyring_name,
245 'supports_profiles': browser_name not in browsers_without_profiles
246 }
247
248
f59f5ef8 249def _extract_chrome_cookies(browser_name, profile, keyring, logger):
86e5f3ed 250 logger.info(f'Extracting cookies from {browser_name}')
767b02a9 251
9b8ee23b 252 if not sqlite3:
19a03940 253 logger.warning(f'Cannot extract cookies from {browser_name} without sqlite3 support. '
254 'Please use a python interpreter compiled with sqlite3 support')
767b02a9
MB
255 return YoutubeDLCookieJar()
256
982ee69a
MB
257 config = _get_chromium_based_browser_settings(browser_name)
258
259 if profile is None:
260 search_root = config['browser_dir']
261 elif _is_path(profile):
262 search_root = profile
263 config['browser_dir'] = os.path.dirname(profile) if config['supports_profiles'] else profile
264 else:
265 if config['supports_profiles']:
266 search_root = os.path.join(config['browser_dir'], profile)
267 else:
86e5f3ed 268 logger.error(f'{browser_name} does not support profiles')
982ee69a
MB
269 search_root = config['browser_dir']
270
97ec5bc5 271 cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies', logger)
982ee69a 272 if cookie_database_path is None:
86e5f3ed 273 raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"')
274 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
982ee69a 275
f59f5ef8 276 decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring)
982ee69a 277
0930b11f 278 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
982ee69a
MB
279 cursor = None
280 try:
281 cursor = _open_database_copy(cookie_database_path, tmpdir)
282 cursor.connection.text_factory = bytes
283 column_names = _get_column_names(cursor, 'cookies')
284 secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
19a03940 285 cursor.execute(f'SELECT host_key, name, value, encrypted_value, path, expires_utc, {secure_column} FROM cookies')
982ee69a
MB
286 jar = YoutubeDLCookieJar()
287 failed_cookies = 0
f59f5ef8 288 unencrypted_cookies = 0
97ec5bc5 289 with _create_progress_bar(logger) as progress_bar:
290 table = cursor.fetchall()
291 total_cookie_count = len(table)
292 for i, line in enumerate(table):
293 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
294 is_encrypted, cookie = _process_chrome_cookie(decryptor, *line)
295 if not cookie:
982ee69a
MB
296 failed_cookies += 1
297 continue
97ec5bc5 298 elif not is_encrypted:
299 unencrypted_cookies += 1
300 jar.set_cookie(cookie)
982ee69a 301 if failed_cookies > 0:
86e5f3ed 302 failed_message = f' ({failed_cookies} could not be decrypted)'
982ee69a
MB
303 else:
304 failed_message = ''
86e5f3ed 305 logger.info(f'Extracted {len(jar)} cookies from {browser_name}{failed_message}')
24146491 306 counts = decryptor._cookie_counts.copy()
f59f5ef8 307 counts['unencrypted'] = unencrypted_cookies
86e5f3ed 308 logger.debug(f'cookie version breakdown: {counts}')
982ee69a
MB
309 return jar
310 finally:
311 if cursor is not None:
312 cursor.connection.close()
313
314
97ec5bc5 315def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, path, expires_utc, is_secure):
0f06bcd7 316 host_key = host_key.decode()
317 name = name.decode()
318 value = value.decode()
319 path = path.decode()
97ec5bc5 320 is_encrypted = not value and encrypted_value
321
322 if is_encrypted:
323 value = decryptor.decrypt(encrypted_value)
324 if value is None:
325 return is_encrypted, None
326
ac668111 327 return is_encrypted, http.cookiejar.Cookie(
97ec5bc5 328 version=0, name=name, value=value, port=None, port_specified=False,
329 domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
330 path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False,
331 comment=None, comment_url=None, rest={})
332
333
982ee69a
MB
334class ChromeCookieDecryptor:
335 """
336 Overview:
337
338 Linux:
339 - cookies are either v10 or v11
340 - v10: AES-CBC encrypted with a fixed key
b38d4c94 341 - also attempts empty password if decryption fails
982ee69a 342 - v11: AES-CBC encrypted with an OS protected key (keyring)
b38d4c94 343 - also attempts empty password if decryption fails
982ee69a
MB
344 - v11 keys can be stored in various places depending on the activate desktop environment [2]
345
346 Mac:
347 - cookies are either v10 or not v10
348 - v10: AES-CBC encrypted with an OS protected key (keyring) and more key derivation iterations than linux
349 - not v10: 'old data' stored as plaintext
350
351 Windows:
352 - cookies are either v10 or not v10
353 - v10: AES-GCM encrypted with a key which is encrypted with DPAPI
354 - not v10: encrypted with DPAPI
355
356 Sources:
357 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/
b38d4c94 358 - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_linux.cc
982ee69a
MB
359 - KeyStorageLinux::CreateService
360 """
361
0fa7d2c8 362 _cookie_counts = {}
982ee69a 363
0fa7d2c8 364 def decrypt(self, encrypted_value):
19a03940 365 raise NotImplementedError('Must be implemented by sub classes')
f59f5ef8 366
982ee69a 367
f59f5ef8 368def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None):
56ba69e4 369 if sys.platform == 'darwin':
982ee69a 370 return MacChromeCookieDecryptor(browser_keyring_name, logger)
56ba69e4 371 elif sys.platform in ('win32', 'cygwin'):
982ee69a 372 return WindowsChromeCookieDecryptor(browser_root, logger)
56ba69e4 373 return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring)
982ee69a
MB
374
375
376class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
f59f5ef8 377 def __init__(self, browser_keyring_name, logger, *, keyring=None):
982ee69a
MB
378 self._logger = logger
379 self._v10_key = self.derive_key(b'peanuts')
b38d4c94 380 self._empty_key = self.derive_key(b'')
f59f5ef8 381 self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
9b7a48ab
SS
382 self._browser_keyring_name = browser_keyring_name
383 self._keyring = keyring
384
385 @functools.cached_property
386 def _v11_key(self):
387 password = _get_linux_keyring_password(self._browser_keyring_name, self._keyring, self._logger)
388 return None if password is None else self.derive_key(password)
982ee69a
MB
389
390 @staticmethod
391 def derive_key(password):
392 # values from
b38d4c94 393 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_linux.cc
982ee69a
MB
394 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16)
395
396 def decrypt(self, encrypted_value):
b38d4c94
MB
397 """
398
399 following the same approach as the fix in [1]: if cookies fail to decrypt then attempt to decrypt
400 with an empty password. The failure detection is not the same as what chromium uses so the
401 results won't be perfect
402
403 References:
404 - [1] https://chromium.googlesource.com/chromium/src/+/bbd54702284caca1f92d656fdcadf2ccca6f4165%5E%21/
405 - a bugfix to try an empty password as a fallback
406 """
982ee69a
MB
407 version = encrypted_value[:3]
408 ciphertext = encrypted_value[3:]
409
410 if version == b'v10':
f59f5ef8 411 self._cookie_counts['v10'] += 1
b38d4c94 412 return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key, self._empty_key), self._logger)
982ee69a
MB
413
414 elif version == b'v11':
f59f5ef8 415 self._cookie_counts['v11'] += 1
982ee69a 416 if self._v11_key is None:
f59f5ef8 417 self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
982ee69a 418 return None
b38d4c94 419 return _decrypt_aes_cbc_multi(ciphertext, (self._v11_key, self._empty_key), self._logger)
982ee69a
MB
420
421 else:
b38d4c94 422 self._logger.warning(f'unknown cookie version: "{version}"', only_once=True)
f59f5ef8 423 self._cookie_counts['other'] += 1
982ee69a
MB
424 return None
425
426
427class MacChromeCookieDecryptor(ChromeCookieDecryptor):
428 def __init__(self, browser_keyring_name, logger):
429 self._logger = logger
f440b14f 430 password = _get_mac_keyring_password(browser_keyring_name, logger)
982ee69a 431 self._v10_key = None if password is None else self.derive_key(password)
f59f5ef8 432 self._cookie_counts = {'v10': 0, 'other': 0}
982ee69a
MB
433
434 @staticmethod
435 def derive_key(password):
436 # values from
b38d4c94 437 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_mac.mm
982ee69a
MB
438 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16)
439
440 def decrypt(self, encrypted_value):
441 version = encrypted_value[:3]
442 ciphertext = encrypted_value[3:]
443
444 if version == b'v10':
f59f5ef8 445 self._cookie_counts['v10'] += 1
982ee69a
MB
446 if self._v10_key is None:
447 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
448 return None
449
b38d4c94 450 return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key,), self._logger)
982ee69a
MB
451
452 else:
f59f5ef8 453 self._cookie_counts['other'] += 1
982ee69a 454 # other prefixes are considered 'old data' which were stored as plaintext
b38d4c94 455 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_mac.mm
982ee69a
MB
456 return encrypted_value
457
458
459class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
460 def __init__(self, browser_root, logger):
461 self._logger = logger
462 self._v10_key = _get_windows_v10_key(browser_root, logger)
f59f5ef8
MB
463 self._cookie_counts = {'v10': 0, 'other': 0}
464
982ee69a
MB
465 def decrypt(self, encrypted_value):
466 version = encrypted_value[:3]
467 ciphertext = encrypted_value[3:]
468
469 if version == b'v10':
f59f5ef8 470 self._cookie_counts['v10'] += 1
982ee69a
MB
471 if self._v10_key is None:
472 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
473 return None
982ee69a 474
b38d4c94 475 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
982ee69a
MB
476 # kNonceLength
477 nonce_length = 96 // 8
478 # boringssl
479 # EVP_AEAD_AES_GCM_TAG_LEN
480 authentication_tag_length = 16
481
482 raw_ciphertext = ciphertext
483 nonce = raw_ciphertext[:nonce_length]
484 ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length]
485 authentication_tag = raw_ciphertext[-authentication_tag_length:]
486
487 return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger)
488
489 else:
f59f5ef8 490 self._cookie_counts['other'] += 1
982ee69a 491 # any other prefix means the data is DPAPI encrypted
b38d4c94 492 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
0f06bcd7 493 return _decrypt_windows_dpapi(encrypted_value, self._logger).decode()
982ee69a
MB
494
495
496def _extract_safari_cookies(profile, logger):
982ee69a 497 if sys.platform != 'darwin':
86e5f3ed 498 raise ValueError(f'unsupported platform: {sys.platform}')
982ee69a 499
a58182b7
NV
500 if profile:
501 cookies_path = os.path.expanduser(profile)
502 if not os.path.isfile(cookies_path):
503 raise FileNotFoundError('custom safari cookies database not found')
504
505 else:
506 cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')
982ee69a 507
1f7db853 508 if not os.path.isfile(cookies_path):
a58182b7
NV
509 logger.debug('Trying secondary cookie location')
510 cookies_path = os.path.expanduser('~/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies')
511 if not os.path.isfile(cookies_path):
512 raise FileNotFoundError('could not find safari cookies database')
982ee69a
MB
513
514 with open(cookies_path, 'rb') as f:
515 cookies_data = f.read()
516
517 jar = parse_safari_cookies(cookies_data, logger=logger)
86e5f3ed 518 logger.info(f'Extracted {len(jar)} cookies from safari')
982ee69a
MB
519 return jar
520
521
522class ParserError(Exception):
523 pass
524
525
526class DataParser:
527 def __init__(self, data, logger):
528 self._data = data
529 self.cursor = 0
530 self._logger = logger
531
532 def read_bytes(self, num_bytes):
533 if num_bytes < 0:
86e5f3ed 534 raise ParserError(f'invalid read of {num_bytes} bytes')
982ee69a
MB
535 end = self.cursor + num_bytes
536 if end > len(self._data):
537 raise ParserError('reached end of input')
538 data = self._data[self.cursor:end]
539 self.cursor = end
540 return data
541
542 def expect_bytes(self, expected_value, message):
543 value = self.read_bytes(len(expected_value))
544 if value != expected_value:
86e5f3ed 545 raise ParserError(f'unexpected value: {value} != {expected_value} ({message})')
982ee69a
MB
546
547 def read_uint(self, big_endian=False):
548 data_format = '>I' if big_endian else '<I'
549 return struct.unpack(data_format, self.read_bytes(4))[0]
550
551 def read_double(self, big_endian=False):
552 data_format = '>d' if big_endian else '<d'
553 return struct.unpack(data_format, self.read_bytes(8))[0]
554
555 def read_cstring(self):
556 buffer = []
557 while True:
558 c = self.read_bytes(1)
559 if c == b'\x00':
0f06bcd7 560 return b''.join(buffer).decode()
982ee69a
MB
561 else:
562 buffer.append(c)
563
564 def skip(self, num_bytes, description='unknown'):
565 if num_bytes > 0:
19a03940 566 self._logger.debug(f'skipping {num_bytes} bytes ({description}): {self.read_bytes(num_bytes)!r}')
982ee69a 567 elif num_bytes < 0:
86e5f3ed 568 raise ParserError(f'invalid skip of {num_bytes} bytes')
982ee69a
MB
569
570 def skip_to(self, offset, description='unknown'):
571 self.skip(offset - self.cursor, description)
572
573 def skip_to_end(self, description='unknown'):
574 self.skip_to(len(self._data), description)
575
576
577def _mac_absolute_time_to_posix(timestamp):
578 return int((datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc) + timedelta(seconds=timestamp)).timestamp())
579
580
581def _parse_safari_cookies_header(data, logger):
582 p = DataParser(data, logger)
583 p.expect_bytes(b'cook', 'database signature')
584 number_of_pages = p.read_uint(big_endian=True)
585 page_sizes = [p.read_uint(big_endian=True) for _ in range(number_of_pages)]
586 return page_sizes, p.cursor
587
588
589def _parse_safari_cookies_page(data, jar, logger):
590 p = DataParser(data, logger)
591 p.expect_bytes(b'\x00\x00\x01\x00', 'page signature')
592 number_of_cookies = p.read_uint()
593 record_offsets = [p.read_uint() for _ in range(number_of_cookies)]
594 if number_of_cookies == 0:
86e5f3ed 595 logger.debug(f'a cookies page of size {len(data)} has no cookies')
982ee69a
MB
596 return
597
598 p.skip_to(record_offsets[0], 'unknown page header field')
599
97ec5bc5 600 with _create_progress_bar(logger) as progress_bar:
601 for i, record_offset in enumerate(record_offsets):
602 progress_bar.print(f'Loading cookie {i: 6d}/{number_of_cookies: 6d}')
603 p.skip_to(record_offset, 'space between records')
604 record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger)
605 p.read_bytes(record_length)
982ee69a
MB
606 p.skip_to_end('space in between pages')
607
608
609def _parse_safari_cookies_record(data, jar, logger):
610 p = DataParser(data, logger)
611 record_size = p.read_uint()
612 p.skip(4, 'unknown record field 1')
613 flags = p.read_uint()
614 is_secure = bool(flags & 0x0001)
615 p.skip(4, 'unknown record field 2')
616 domain_offset = p.read_uint()
617 name_offset = p.read_uint()
618 path_offset = p.read_uint()
619 value_offset = p.read_uint()
620 p.skip(8, 'unknown record field 3')
621 expiration_date = _mac_absolute_time_to_posix(p.read_double())
622 _creation_date = _mac_absolute_time_to_posix(p.read_double()) # noqa: F841
623
624 try:
625 p.skip_to(domain_offset)
626 domain = p.read_cstring()
627
628 p.skip_to(name_offset)
629 name = p.read_cstring()
630
631 p.skip_to(path_offset)
632 path = p.read_cstring()
633
634 p.skip_to(value_offset)
635 value = p.read_cstring()
636 except UnicodeDecodeError:
f440b14f 637 logger.warning('failed to parse Safari cookie because UTF-8 decoding failed', only_once=True)
982ee69a
MB
638 return record_size
639
640 p.skip_to(record_size, 'space at the end of the record')
641
ac668111 642 cookie = http.cookiejar.Cookie(
982ee69a
MB
643 version=0, name=name, value=value, port=None, port_specified=False,
644 domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'),
645 path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False,
646 comment=None, comment_url=None, rest={})
647 jar.set_cookie(cookie)
648 return record_size
649
650
651def parse_safari_cookies(data, jar=None, logger=YDLLogger()):
652 """
653 References:
654 - https://github.com/libyal/dtformats/blob/main/documentation/Safari%20Cookies.asciidoc
655 - this data appears to be out of date but the important parts of the database structure is the same
656 - there are a few bytes here and there which are skipped during parsing
657 """
658 if jar is None:
659 jar = YoutubeDLCookieJar()
660 page_sizes, body_start = _parse_safari_cookies_header(data, logger)
661 p = DataParser(data[body_start:], logger)
662 for page_size in page_sizes:
663 _parse_safari_cookies_page(p.read_bytes(page_size), jar, logger)
664 p.skip_to_end('footer')
665 return jar
666
667
f59f5ef8
MB
668class _LinuxDesktopEnvironment(Enum):
669 """
670 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.h
671 DesktopEnvironment
672 """
673 OTHER = auto()
674 CINNAMON = auto()
b38d4c94 675 DEEPIN = auto()
f59f5ef8 676 GNOME = auto()
b38d4c94
MB
677 KDE3 = auto()
678 KDE4 = auto()
679 KDE5 = auto()
680 KDE6 = auto()
f59f5ef8 681 PANTHEON = auto()
b38d4c94 682 UKUI = auto()
f59f5ef8
MB
683 UNITY = auto()
684 XFCE = auto()
b38d4c94 685 LXQT = auto()
982ee69a
MB
686
687
f59f5ef8
MB
688class _LinuxKeyring(Enum):
689 """
b38d4c94 690 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_util_linux.h
f59f5ef8
MB
691 SelectedLinuxBackend
692 """
2e023649 693 KWALLET = auto() # KDE4
b38d4c94
MB
694 KWALLET5 = auto()
695 KWALLET6 = auto()
2e023649 696 GNOMEKEYRING = auto()
697 BASICTEXT = auto()
f59f5ef8
MB
698
699
700SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys()
701
702
b38d4c94 703def _get_linux_desktop_environment(env, logger):
f59f5ef8
MB
704 """
705 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc
706 GetDesktopEnvironment
707 """
708 xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
709 desktop_session = env.get('DESKTOP_SESSION', None)
710 if xdg_current_desktop is not None:
711 xdg_current_desktop = xdg_current_desktop.split(':')[0].strip()
712
713 if xdg_current_desktop == 'Unity':
714 if desktop_session is not None and 'gnome-fallback' in desktop_session:
715 return _LinuxDesktopEnvironment.GNOME
716 else:
717 return _LinuxDesktopEnvironment.UNITY
b38d4c94
MB
718 elif xdg_current_desktop == 'Deepin':
719 return _LinuxDesktopEnvironment.DEEPIN
f59f5ef8
MB
720 elif xdg_current_desktop == 'GNOME':
721 return _LinuxDesktopEnvironment.GNOME
722 elif xdg_current_desktop == 'X-Cinnamon':
723 return _LinuxDesktopEnvironment.CINNAMON
724 elif xdg_current_desktop == 'KDE':
b38d4c94
MB
725 kde_version = env.get('KDE_SESSION_VERSION', None)
726 if kde_version == '5':
727 return _LinuxDesktopEnvironment.KDE5
728 elif kde_version == '6':
729 return _LinuxDesktopEnvironment.KDE6
730 elif kde_version == '4':
731 return _LinuxDesktopEnvironment.KDE4
732 else:
733 logger.info(f'unknown KDE version: "{kde_version}". Assuming KDE4')
734 return _LinuxDesktopEnvironment.KDE4
f59f5ef8
MB
735 elif xdg_current_desktop == 'Pantheon':
736 return _LinuxDesktopEnvironment.PANTHEON
737 elif xdg_current_desktop == 'XFCE':
738 return _LinuxDesktopEnvironment.XFCE
b38d4c94
MB
739 elif xdg_current_desktop == 'UKUI':
740 return _LinuxDesktopEnvironment.UKUI
741 elif xdg_current_desktop == 'LXQt':
742 return _LinuxDesktopEnvironment.LXQT
743 else:
744 logger.info(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"')
745
f59f5ef8 746 elif desktop_session is not None:
b38d4c94
MB
747 if desktop_session == 'deepin':
748 return _LinuxDesktopEnvironment.DEEPIN
749 elif desktop_session in ('mate', 'gnome'):
f59f5ef8 750 return _LinuxDesktopEnvironment.GNOME
b38d4c94
MB
751 elif desktop_session in ('kde4', 'kde-plasma'):
752 return _LinuxDesktopEnvironment.KDE4
753 elif desktop_session == 'kde':
754 if 'KDE_SESSION_VERSION' in env:
755 return _LinuxDesktopEnvironment.KDE4
756 else:
757 return _LinuxDesktopEnvironment.KDE3
758 elif 'xfce' in desktop_session or desktop_session == 'xubuntu':
f59f5ef8 759 return _LinuxDesktopEnvironment.XFCE
b38d4c94
MB
760 elif desktop_session == 'ukui':
761 return _LinuxDesktopEnvironment.UKUI
762 else:
763 logger.info(f'DESKTOP_SESSION is set to an unknown value: "{desktop_session}"')
764
f59f5ef8
MB
765 else:
766 if 'GNOME_DESKTOP_SESSION_ID' in env:
767 return _LinuxDesktopEnvironment.GNOME
768 elif 'KDE_FULL_SESSION' in env:
b38d4c94
MB
769 if 'KDE_SESSION_VERSION' in env:
770 return _LinuxDesktopEnvironment.KDE4
771 else:
772 return _LinuxDesktopEnvironment.KDE3
fa8fd951 773 return _LinuxDesktopEnvironment.OTHER
f59f5ef8
MB
774
775
776def _choose_linux_keyring(logger):
777 """
b38d4c94
MB
778 SelectBackend in [1]
779
780 There is currently support for forcing chromium to use BASIC_TEXT by creating a file called
781 `Disable Local Encryption` [1] in the user data dir. The function to write this file (`WriteBackendUse()` [1])
782 does not appear to be called anywhere other than in tests, so the user would have to create this file manually
783 and so would be aware enough to tell yt-dlp to use the BASIC_TEXT keyring.
784
785 References:
786 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_util_linux.cc
f59f5ef8 787 """
b38d4c94 788 desktop_environment = _get_linux_desktop_environment(os.environ, logger)
86e5f3ed 789 logger.debug(f'detected desktop environment: {desktop_environment.name}')
b38d4c94 790 if desktop_environment == _LinuxDesktopEnvironment.KDE4:
2e023649 791 linux_keyring = _LinuxKeyring.KWALLET
b38d4c94
MB
792 elif desktop_environment == _LinuxDesktopEnvironment.KDE5:
793 linux_keyring = _LinuxKeyring.KWALLET5
794 elif desktop_environment == _LinuxDesktopEnvironment.KDE6:
795 linux_keyring = _LinuxKeyring.KWALLET6
796 elif desktop_environment in (
797 _LinuxDesktopEnvironment.KDE3, _LinuxDesktopEnvironment.LXQT, _LinuxDesktopEnvironment.OTHER
798 ):
2e023649 799 linux_keyring = _LinuxKeyring.BASICTEXT
982ee69a 800 else:
2e023649 801 linux_keyring = _LinuxKeyring.GNOMEKEYRING
f59f5ef8
MB
802 return linux_keyring
803
804
b38d4c94 805def _get_kwallet_network_wallet(keyring, logger):
f59f5ef8
MB
806 """ The name of the wallet used to store network passwords.
807
b38d4c94 808 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/kwallet_dbus.cc
f59f5ef8
MB
809 KWalletDBus::NetworkWallet
810 which does a dbus call to the following function:
811 https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html
812 Wallet::NetworkWallet
813 """
814 default_wallet = 'kdewallet'
815 try:
2e023649 816 if keyring == _LinuxKeyring.KWALLET:
b38d4c94
MB
817 service_name = 'org.kde.kwalletd'
818 wallet_path = '/modules/kwalletd'
819 elif keyring == _LinuxKeyring.KWALLET5:
820 service_name = 'org.kde.kwalletd5'
821 wallet_path = '/modules/kwalletd5'
822 elif keyring == _LinuxKeyring.KWALLET6:
823 service_name = 'org.kde.kwalletd6'
824 wallet_path = '/modules/kwalletd6'
825 else:
826 raise ValueError(keyring)
827
f0c9fb96 828 stdout, _, returncode = Popen.run([
f59f5ef8 829 'dbus-send', '--session', '--print-reply=literal',
b38d4c94
MB
830 f'--dest={service_name}',
831 wallet_path,
f59f5ef8 832 'org.kde.KWallet.networkWallet'
f0c9fb96 833 ], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
f59f5ef8 834
f0c9fb96 835 if returncode:
f59f5ef8
MB
836 logger.warning('failed to read NetworkWallet')
837 return default_wallet
838 else:
f0c9fb96 839 logger.debug(f'NetworkWallet = "{stdout.strip()}"')
840 return stdout.strip()
a44ca5a4 841 except Exception as e:
86e5f3ed 842 logger.warning(f'exception while obtaining NetworkWallet: {e}')
f59f5ef8
MB
843 return default_wallet
844
845
b38d4c94
MB
846def _get_kwallet_password(browser_keyring_name, keyring, logger):
847 logger.debug(f'using kwallet-query to obtain password from {keyring.name}')
f59f5ef8
MB
848
849 if shutil.which('kwallet-query') is None:
850 logger.error('kwallet-query command not found. KWallet and kwallet-query '
851 'must be installed to read from KWallet. kwallet-query should be'
852 'included in the kwallet package for your distribution')
853 return b''
854
b38d4c94 855 network_wallet = _get_kwallet_network_wallet(keyring, logger)
f59f5ef8
MB
856
857 try:
f0c9fb96 858 stdout, _, returncode = Popen.run([
f59f5ef8 859 'kwallet-query',
86e5f3ed 860 '--read-password', f'{browser_keyring_name} Safe Storage',
861 '--folder', f'{browser_keyring_name} Keys',
f59f5ef8
MB
862 network_wallet
863 ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
864
f0c9fb96 865 if returncode:
866 logger.error(f'kwallet-query failed with return code {returncode}. '
867 'Please consult the kwallet-query man page for details')
f59f5ef8
MB
868 return b''
869 else:
870 if stdout.lower().startswith(b'failed to read'):
871 logger.debug('failed to read password from kwallet. Using empty string instead')
872 # this sometimes occurs in KDE because chrome does not check hasEntry and instead
873 # just tries to read the value (which kwallet returns "") whereas kwallet-query
874 # checks hasEntry. To verify this:
875 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
876 # while starting chrome.
b38d4c94
MB
877 # this was identified as a bug later and fixed in
878 # https://chromium.googlesource.com/chromium/src/+/bbd54702284caca1f92d656fdcadf2ccca6f4165%5E%21/#F0
879 # https://chromium.googlesource.com/chromium/src/+/5463af3c39d7f5b6d11db7fbd51e38cc1974d764
f59f5ef8
MB
880 return b''
881 else:
882 logger.debug('password found')
f0c9fb96 883 return stdout.rstrip(b'\n')
a44ca5a4 884 except Exception as e:
885 logger.warning(f'exception running kwallet-query: {error_to_str(e)}')
f59f5ef8
MB
886 return b''
887
888
889def _get_gnome_keyring_password(browser_keyring_name, logger):
9b8ee23b 890 if not secretstorage:
891 logger.error(f'secretstorage not available {_SECRETSTORAGE_UNAVAILABLE_REASON}')
f59f5ef8
MB
892 return b''
893 # the Gnome keyring does not seem to organise keys in the same way as KWallet,
894 # using `dbus-monitor` during startup, it can be observed that chromium lists all keys
895 # and presumably searches for its key in the list. It appears that we must do the same.
896 # https://github.com/jaraco/keyring/issues/556
897 with contextlib.closing(secretstorage.dbus_init()) as con:
898 col = secretstorage.get_default_collection(con)
899 for item in col.get_all_items():
86e5f3ed 900 if item.get_label() == f'{browser_keyring_name} Safe Storage':
f59f5ef8
MB
901 return item.get_secret()
902 else:
903 logger.error('failed to read from keyring')
904 return b''
905
906
907def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
908 # note: chrome/chromium can be run with the following flags to determine which keyring backend
909 # it has chosen to use
910 # chromium --enable-logging=stderr --v=1 2>&1 | grep key_storage_
911 # Chromium supports a flag: --password-store=<basic|gnome|kwallet> so the automatic detection
912 # will not be sufficient in all cases.
913
2c539d49 914 keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger)
f59f5ef8
MB
915 logger.debug(f'Chosen keyring: {keyring.name}')
916
2e023649 917 if keyring in (_LinuxKeyring.KWALLET, _LinuxKeyring.KWALLET5, _LinuxKeyring.KWALLET6):
b38d4c94 918 return _get_kwallet_password(browser_keyring_name, keyring, logger)
2e023649 919 elif keyring == _LinuxKeyring.GNOMEKEYRING:
f59f5ef8 920 return _get_gnome_keyring_password(browser_keyring_name, logger)
2e023649 921 elif keyring == _LinuxKeyring.BASICTEXT:
f59f5ef8
MB
922 # when basic text is chosen, all cookies are stored as v10 (so no keyring password is required)
923 return None
924 assert False, f'Unknown keyring {keyring}'
925
926
927def _get_mac_keyring_password(browser_keyring_name, logger):
928 logger.debug('using find-generic-password to obtain password from OSX keychain')
929 try:
46a5b335 930 stdout, _, returncode = Popen.run(
d3c93ec2 931 ['security', 'find-generic-password',
932 '-w', # write password to stdout
933 '-a', browser_keyring_name, # match 'account'
86e5f3ed 934 '-s', f'{browser_keyring_name} Safe Storage'], # match 'service'
d3c93ec2 935 stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
46a5b335 936 if returncode:
937 logger.warning('find-generic-password failed')
938 return None
f0c9fb96 939 return stdout.rstrip(b'\n')
a44ca5a4 940 except Exception as e:
941 logger.warning(f'exception running find-generic-password: {error_to_str(e)}')
f59f5ef8 942 return None
982ee69a
MB
943
944
945def _get_windows_v10_key(browser_root, logger):
b38d4c94
MB
946 """
947 References:
948 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
949 """
97ec5bc5 950 path = _find_most_recently_used_file(browser_root, 'Local State', logger)
982ee69a
MB
951 if path is None:
952 logger.error('could not find local state file')
953 return None
97ec5bc5 954 logger.debug(f'Found local state file at "{path}"')
86e5f3ed 955 with open(path, encoding='utf8') as f:
982ee69a
MB
956 data = json.load(f)
957 try:
b38d4c94 958 # kOsCryptEncryptedKeyPrefName in [1]
982ee69a
MB
959 base64_key = data['os_crypt']['encrypted_key']
960 except KeyError:
961 logger.error('no encrypted key in Local State')
962 return None
14f25df2 963 encrypted_key = base64.b64decode(base64_key)
b38d4c94 964 # kDPAPIKeyPrefix in [1]
982ee69a
MB
965 prefix = b'DPAPI'
966 if not encrypted_key.startswith(prefix):
967 logger.error('invalid key')
968 return None
969 return _decrypt_windows_dpapi(encrypted_key[len(prefix):], logger)
970
971
972def pbkdf2_sha1(password, salt, iterations, key_length):
973 return pbkdf2_hmac('sha1', password, salt, iterations, key_length)
974
975
b38d4c94
MB
976def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' ' * 16):
977 for key in keys:
978 plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
979 try:
980 return plaintext.decode()
981 except UnicodeDecodeError:
982 pass
983 logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
984 return None
982ee69a
MB
985
986
987def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
982ee69a 988 try:
09906f55 989 plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce)
982ee69a 990 except ValueError:
f440b14f 991 logger.warning('failed to decrypt cookie (AES-GCM) because the MAC check failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
992 return None
993
994 try:
0f06bcd7 995 return plaintext.decode()
982ee69a 996 except UnicodeDecodeError:
f440b14f 997 logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
998 return None
999
1000
1001def _decrypt_windows_dpapi(ciphertext, logger):
1002 """
1003 References:
1004 - https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata
1005 """
fe0918bb 1006
1007 import ctypes
1008 import ctypes.wintypes
982ee69a
MB
1009
1010 class DATA_BLOB(ctypes.Structure):
fe0918bb 1011 _fields_ = [('cbData', ctypes.wintypes.DWORD),
982ee69a
MB
1012 ('pbData', ctypes.POINTER(ctypes.c_char))]
1013
1014 buffer = ctypes.create_string_buffer(ciphertext)
1015 blob_in = DATA_BLOB(ctypes.sizeof(buffer), buffer)
1016 blob_out = DATA_BLOB()
1017 ret = ctypes.windll.crypt32.CryptUnprotectData(
1018 ctypes.byref(blob_in), # pDataIn
1019 None, # ppszDataDescr: human readable description of pDataIn
1020 None, # pOptionalEntropy: salt?
1021 None, # pvReserved: must be NULL
1022 None, # pPromptStruct: information about prompts to display
1023 0, # dwFlags
1024 ctypes.byref(blob_out) # pDataOut
1025 )
1026 if not ret:
f9be9cb9 1027 logger.warning('failed to decrypt with DPAPI', only_once=True)
982ee69a
MB
1028 return None
1029
1030 result = ctypes.string_at(blob_out.pbData, blob_out.cbData)
1031 ctypes.windll.kernel32.LocalFree(blob_out.pbData)
1032 return result
1033
1034
1035def _config_home():
1036 return os.environ.get('XDG_CONFIG_HOME', os.path.expanduser('~/.config'))
1037
1038
1039def _open_database_copy(database_path, tmpdir):
1040 # cannot open sqlite databases if they are already in use (e.g. by the browser)
1041 database_copy_path = os.path.join(tmpdir, 'temporary.sqlite')
1042 shutil.copy(database_path, database_copy_path)
1043 conn = sqlite3.connect(database_copy_path)
1044 return conn.cursor()
1045
1046
1047def _get_column_names(cursor, table_name):
86e5f3ed 1048 table_info = cursor.execute(f'PRAGMA table_info({table_name})').fetchall()
0f06bcd7 1049 return [row[1].decode() for row in table_info]
982ee69a
MB
1050
1051
97ec5bc5 1052def _find_most_recently_used_file(root, filename, logger):
982ee69a 1053 # if there are multiple browser profiles, take the most recently used one
97ec5bc5 1054 i, paths = 0, []
1055 with _create_progress_bar(logger) as progress_bar:
1056 for curr_root, dirs, files in os.walk(root):
1057 for file in files:
1058 i += 1
1059 progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched')
1060 if file == filename:
1061 paths.append(os.path.join(curr_root, file))
982ee69a
MB
1062 return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime)
1063
1064
1065def _merge_cookie_jars(jars):
1066 output_jar = YoutubeDLCookieJar()
1067 for jar in jars:
1068 for cookie in jar:
1069 output_jar.set_cookie(cookie)
1070 if jar.filename is not None:
1071 output_jar.filename = jar.filename
1072 return output_jar
1073
1074
1075def _is_path(value):
1076 return os.path.sep in value
1077
1078
9bd13fe5 1079def _parse_browser_specification(browser_name, profile=None, keyring=None, container=None):
982ee69a
MB
1080 if browser_name not in SUPPORTED_BROWSERS:
1081 raise ValueError(f'unsupported browser: "{browser_name}"')
f59f5ef8
MB
1082 if keyring not in (None, *SUPPORTED_KEYRINGS):
1083 raise ValueError(f'unsupported keyring: "{keyring}"')
2b24afa6 1084 if profile is not None and _is_path(expand_path(profile)):
1085 profile = expand_path(profile)
9bd13fe5 1086 return browser_name, profile, keyring, container
8817a80d
SS
1087
1088
1089class LenientSimpleCookie(http.cookies.SimpleCookie):
1090 """More lenient version of http.cookies.SimpleCookie"""
1091 # From https://github.com/python/cpython/blob/v3.10.7/Lib/http/cookies.py
36069409
SS
1092 # We use Morsel's legal key chars to avoid errors on setting values
1093 _LEGAL_KEY_CHARS = r'\w\d' + re.escape('!#$%&\'*+-.:^_`|~')
1094 _LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + re.escape('(),/<=>?@[]{}')
8817a80d
SS
1095
1096 _RESERVED = {
1097 "expires",
1098 "path",
1099 "comment",
1100 "domain",
1101 "max-age",
1102 "secure",
1103 "httponly",
1104 "version",
1105 "samesite",
1106 }
1107
1108 _FLAGS = {"secure", "httponly"}
1109
1110 # Added 'bad' group to catch the remaining value
1111 _COOKIE_PATTERN = re.compile(r"""
1112 \s* # Optional whitespace at start of cookie
1113 (?P<key> # Start of group 'key'
1114 [""" + _LEGAL_KEY_CHARS + r"""]+?# Any word of at least one letter
1115 ) # End of group 'key'
1116 ( # Optional group: there may not be a value.
1117 \s*=\s* # Equal Sign
1118 ( # Start of potential value
1119 (?P<val> # Start of group 'val'
1120 "(?:[^\\"]|\\.)*" # Any doublequoted string
1121 | # or
1122 \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr
1123 | # or
1124 [""" + _LEGAL_VALUE_CHARS + r"""]* # Any word or empty string
1125 ) # End of group 'val'
1126 | # or
1127 (?P<bad>(?:\\;|[^;])*?) # 'bad' group fallback for invalid values
1128 ) # End of potential value
1129 )? # End of optional value group
1130 \s* # Any number of spaces.
1131 (\s+|;|$) # Ending either at space, semicolon, or EOS.
1132 """, re.ASCII | re.VERBOSE)
1133
1134 def load(self, data):
1135 # Workaround for https://github.com/yt-dlp/yt-dlp/issues/4776
1136 if not isinstance(data, str):
1137 return super().load(data)
1138
1139 morsel = None
36069409
SS
1140 for match in self._COOKIE_PATTERN.finditer(data):
1141 if match.group('bad'):
8817a80d
SS
1142 morsel = None
1143 continue
1144
36069409 1145 key, value = match.group('key', 'val')
8817a80d 1146
36069409
SS
1147 is_attribute = False
1148 if key.startswith('$'):
1149 key = key[1:]
1150 is_attribute = True
8817a80d
SS
1151
1152 lower_key = key.lower()
1153 if lower_key in self._RESERVED:
1154 if morsel is None:
1155 continue
1156
1157 if value is None:
1158 if lower_key not in self._FLAGS:
1159 morsel = None
1160 continue
1161 value = True
1162 else:
1163 value, _ = self.value_decode(value)
1164
1165 morsel[key] = value
1166
36069409
SS
1167 elif is_attribute:
1168 morsel = None
1169
8817a80d
SS
1170 elif value is not None:
1171 morsel = self.get(key, http.cookies.Morsel())
1172 real_value, coded_value = self.value_decode(value)
1173 morsel.set(key, real_value, coded_value)
1174 self[key] = morsel
1175
1176 else:
1177 morsel = None
b87e01c1 1178
1179
1180class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
1181 """
1182 See [1] for cookie file format.
1183
1184 1. https://curl.haxx.se/docs/http-cookies.html
1185 """
1186 _HTTPONLY_PREFIX = '#HttpOnly_'
1187 _ENTRY_LEN = 7
1188 _HEADER = '''# Netscape HTTP Cookie File
1189# This file is generated by yt-dlp. Do not edit.
1190
1191'''
1192 _CookieFileEntry = collections.namedtuple(
1193 'CookieFileEntry',
1194 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
1195
1196 def __init__(self, filename=None, *args, **kwargs):
1197 super().__init__(None, *args, **kwargs)
1198 if is_path_like(filename):
1199 filename = os.fspath(filename)
1200 self.filename = filename
1201
1202 @staticmethod
1203 def _true_or_false(cndn):
1204 return 'TRUE' if cndn else 'FALSE'
1205
1206 @contextlib.contextmanager
1207 def open(self, file, *, write=False):
1208 if is_path_like(file):
1209 with open(file, 'w' if write else 'r', encoding='utf-8') as f:
1210 yield f
1211 else:
1212 if write:
1213 file.truncate(0)
1214 yield file
1215
62b5c94c 1216 def _really_save(self, f, ignore_discard, ignore_expires):
b87e01c1 1217 now = time.time()
1218 for cookie in self:
1219 if (not ignore_discard and cookie.discard
1220 or not ignore_expires and cookie.is_expired(now)):
1221 continue
1222 name, value = cookie.name, cookie.value
1223 if value is None:
1224 # cookies.txt regards 'Set-Cookie: foo' as a cookie
1225 # with no name, whereas http.cookiejar regards it as a
1226 # cookie with no value.
1227 name, value = '', name
1228 f.write('%s\n' % '\t'.join((
1229 cookie.domain,
1230 self._true_or_false(cookie.domain.startswith('.')),
1231 cookie.path,
1232 self._true_or_false(cookie.secure),
1233 str_or_none(cookie.expires, default=''),
1234 name, value
1235 )))
1236
62b5c94c 1237 def save(self, filename=None, ignore_discard=True, ignore_expires=True):
b87e01c1 1238 """
1239 Save cookies to a file.
1240 Code is taken from CPython 3.6
1241 https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """
1242
1243 if filename is None:
1244 if self.filename is not None:
1245 filename = self.filename
1246 else:
1247 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
1248
1249 # Store session cookies with `expires` set to 0 instead of an empty string
1250 for cookie in self:
1251 if cookie.expires is None:
1252 cookie.expires = 0
1253
1254 with self.open(filename, write=True) as f:
1255 f.write(self._HEADER)
62b5c94c 1256 self._really_save(f, ignore_discard, ignore_expires)
b87e01c1 1257
62b5c94c 1258 def load(self, filename=None, ignore_discard=True, ignore_expires=True):
b87e01c1 1259 """Load cookies from a file."""
1260 if filename is None:
1261 if self.filename is not None:
1262 filename = self.filename
1263 else:
1264 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
1265
1266 def prepare_line(line):
1267 if line.startswith(self._HTTPONLY_PREFIX):
1268 line = line[len(self._HTTPONLY_PREFIX):]
1269 # comments and empty lines are fine
1270 if line.startswith('#') or not line.strip():
1271 return line
1272 cookie_list = line.split('\t')
1273 if len(cookie_list) != self._ENTRY_LEN:
1274 raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list))
1275 cookie = self._CookieFileEntry(*cookie_list)
1276 if cookie.expires_at and not cookie.expires_at.isdigit():
1277 raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
1278 return line
1279
1280 cf = io.StringIO()
1281 with self.open(filename) as f:
1282 for line in f:
1283 try:
1284 cf.write(prepare_line(line))
1285 except http.cookiejar.LoadError as e:
1286 if f'{line.strip()} '[0] in '[{"':
1287 raise http.cookiejar.LoadError(
1288 'Cookies file must be Netscape formatted, not JSON. See '
1289 'https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp')
1290 write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
1291 continue
1292 cf.seek(0)
1293 self._really_load(cf, filename, ignore_discard, ignore_expires)
1294 # Session cookies are denoted by either `expires` field set to
1295 # an empty string or 0. MozillaCookieJar only recognizes the former
1296 # (see [1]). So we need force the latter to be recognized as session
1297 # cookies on our own.
1298 # Session cookies may be important for cookies-based authentication,
1299 # e.g. usually, when user does not check 'Remember me' check box while
1300 # logging in on a site, some important cookies are stored as session
1301 # cookies so that not recognizing them will result in failed login.
1302 # 1. https://bugs.python.org/issue17164
1303 for cookie in self:
1304 # Treat `expires=0` cookies as session cookies
1305 if cookie.expires == 0:
1306 cookie.expires = None
1307 cookie.discard = True
1308
1309 def get_cookie_header(self, url):
1310 """Generate a Cookie HTTP header for a given url"""
1311 cookie_req = urllib.request.Request(escape_url(sanitize_url(url)))
1312 self.add_cookie_header(cookie_req)
1313 return cookie_req.get_header('Cookie')
ad54c913 1314
1ceb657b 1315 def get_cookies_for_url(self, url):
1316 """Generate a list of Cookie objects for a given url"""
1317 # Policy `_now` attribute must be set before calling `_cookies_for_request`
1318 # Ref: https://github.com/python/cpython/blob/3.7/Lib/http/cookiejar.py#L1360
1319 self._policy._now = self._now = int(time.time())
1320 return self._cookies_for_request(urllib.request.Request(escape_url(sanitize_url(url))))
1321
ad54c913 1322 def clear(self, *args, **kwargs):
1323 with contextlib.suppress(KeyError):
1324 return super().clear(*args, **kwargs)