]> jfr.im git - yt-dlp.git/blame - yt_dlp/cookies.py
Add option `--netrc-cmd` (#6682)
[yt-dlp.git] / yt_dlp / cookies.py
CommitLineData
14f25df2 1import base64
b87e01c1 2import collections
f59f5ef8 3import contextlib
54007a45 4import http.cookiejar
8817a80d 5import http.cookies
b87e01c1 6import io
982ee69a
MB
7import json
8import os
9bd13fe5 9import re
982ee69a 10import shutil
982ee69a
MB
11import struct
12import subprocess
13import sys
14import tempfile
2e4585da 15import time
b87e01c1 16import urllib.request
982ee69a 17from datetime import datetime, timedelta, timezone
f59f5ef8 18from enum import Enum, auto
982ee69a
MB
19from hashlib import pbkdf2_hmac
20
1d3586d0 21from .aes import (
22 aes_cbc_decrypt_bytes,
23 aes_gcm_decrypt_and_verify_bytes,
24 unpad_pkcs7,
25)
9b7a48ab 26from .compat import functools
9b8ee23b 27from .dependencies import (
28 _SECRETSTORAGE_UNAVAILABLE_REASON,
29 secretstorage,
30 sqlite3,
31)
97ec5bc5 32from .minicurses import MultilinePrinter, QuietMultilinePrinter
d2c8aadf 33from .utils import (
34 Popen,
d2c8aadf 35 error_to_str,
b87e01c1 36 escape_url,
d2c8aadf 37 expand_path,
22df97f9 38 is_path_like,
b87e01c1 39 sanitize_url,
40 str_or_none,
d2c8aadf 41 try_call,
b87e01c1 42 write_string,
d2c8aadf 43)
982ee69a 44
982ee69a
MB
45CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
46SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
47
48
49class YDLLogger:
50 def __init__(self, ydl=None):
51 self._ydl = ydl
52
53 def debug(self, message):
54 if self._ydl:
55 self._ydl.write_debug(message)
56
57 def info(self, message):
58 if self._ydl:
59 self._ydl.to_screen(f'[Cookies] {message}')
60
61 def warning(self, message, only_once=False):
62 if self._ydl:
63 self._ydl.report_warning(message, only_once)
64
65 def error(self, message):
66 if self._ydl:
67 self._ydl.report_error(message)
68
2e4585da 69 class ProgressBar(MultilinePrinter):
70 _DELAY, _timer = 0.1, 0
71
72 def print(self, message):
73 if time.time() - self._timer > self._DELAY:
74 self.print_at_line(f'[Cookies] {message}', 0)
75 self._timer = time.time()
76
97ec5bc5 77 def progress_bar(self):
78 """Return a context manager with a print method. (Optional)"""
79 # Do not print to files/pipes, loggers, or when --no-progress is used
80 if not self._ydl or self._ydl.params.get('noprogress') or self._ydl.params.get('logger'):
81 return
591bb9d3 82 file = self._ydl._out_files.error
97ec5bc5 83 try:
84 if not file.isatty():
85 return
86 except BaseException:
87 return
2e4585da 88 return self.ProgressBar(file, preserve_output=False)
97ec5bc5 89
90
91def _create_progress_bar(logger):
92 if hasattr(logger, 'progress_bar'):
93 printer = logger.progress_bar()
94 if printer:
95 return printer
96 printer = QuietMultilinePrinter()
97 printer.print = lambda _: None
98 return printer
99
982ee69a
MB
100
101def load_cookies(cookie_file, browser_specification, ydl):
102 cookie_jars = []
103 if browser_specification is not None:
9bd13fe5 104 browser_name, profile, keyring, container = _parse_browser_specification(*browser_specification)
105 cookie_jars.append(
106 extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring, container=container))
982ee69a
MB
107
108 if cookie_file is not None:
22df97f9 109 is_filename = is_path_like(cookie_file)
d76fa1f3 110 if is_filename:
111 cookie_file = expand_path(cookie_file)
112
982ee69a 113 jar = YoutubeDLCookieJar(cookie_file)
d76fa1f3 114 if not is_filename or os.access(cookie_file, os.R_OK):
982ee69a
MB
115 jar.load(ignore_discard=True, ignore_expires=True)
116 cookie_jars.append(jar)
117
118 return _merge_cookie_jars(cookie_jars)
119
120
9bd13fe5 121def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None, container=None):
982ee69a 122 if browser_name == 'firefox':
9bd13fe5 123 return _extract_firefox_cookies(profile, container, logger)
982ee69a
MB
124 elif browser_name == 'safari':
125 return _extract_safari_cookies(profile, logger)
126 elif browser_name in CHROMIUM_BASED_BROWSERS:
f59f5ef8 127 return _extract_chrome_cookies(browser_name, profile, keyring, logger)
982ee69a 128 else:
86e5f3ed 129 raise ValueError(f'unknown browser: {browser_name}')
982ee69a
MB
130
131
9bd13fe5 132def _extract_firefox_cookies(profile, container, logger):
982ee69a 133 logger.info('Extracting cookies from firefox')
9b8ee23b 134 if not sqlite3:
767b02a9
MB
135 logger.warning('Cannot extract cookies from firefox without sqlite3 support. '
136 'Please use a python interpreter compiled with sqlite3 support')
137 return YoutubeDLCookieJar()
982ee69a
MB
138
139 if profile is None:
140 search_root = _firefox_browser_dir()
141 elif _is_path(profile):
142 search_root = profile
143 else:
144 search_root = os.path.join(_firefox_browser_dir(), profile)
145
825d3ce3 146 cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite', logger)
147 if cookie_database_path is None:
148 raise FileNotFoundError(f'could not find firefox cookies database in {search_root}')
149 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
150
9bd13fe5 151 container_id = None
825d3ce3 152 if container not in (None, 'none'):
153 containers_path = os.path.join(os.path.dirname(cookie_database_path), 'containers.json')
9bd13fe5 154 if not os.path.isfile(containers_path) or not os.access(containers_path, os.R_OK):
155 raise FileNotFoundError(f'could not read containers.json in {search_root}')
d2c8aadf 156 with open(containers_path) as containers:
9bd13fe5 157 identities = json.load(containers).get('identities', [])
158 container_id = next((context.get('userContextId') for context in identities if container in (
159 context.get('name'),
160 try_call(lambda: re.fullmatch(r'userContext([^\.]+)\.label', context['l10nID']).group())
161 )), None)
162 if not isinstance(container_id, int):
163 raise ValueError(f'could not find firefox container "{container}" in containers.json')
164
0930b11f 165 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
982ee69a
MB
166 cursor = None
167 try:
168 cursor = _open_database_copy(cookie_database_path, tmpdir)
9bd13fe5 169 if isinstance(container_id, int):
9bd13fe5 170 logger.debug(
171 f'Only loading cookies from firefox container "{container}", ID {container_id}')
9bd13fe5 172 cursor.execute(
825d3ce3 173 'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE originAttributes LIKE ? OR originAttributes LIKE ?',
174 (f'%userContextId={container_id}', f'%userContextId={container_id}&%'))
175 elif container == 'none':
176 logger.debug('Only loading cookies not belonging to any container')
177 cursor.execute(
178 'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE NOT INSTR(originAttributes,"userContextId=")')
179 else:
9bd13fe5 180 cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
982ee69a 181 jar = YoutubeDLCookieJar()
97ec5bc5 182 with _create_progress_bar(logger) as progress_bar:
183 table = cursor.fetchall()
184 total_cookie_count = len(table)
185 for i, (host, name, value, path, expiry, is_secure) in enumerate(table):
186 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
ac668111 187 cookie = http.cookiejar.Cookie(
97ec5bc5 188 version=0, name=name, value=value, port=None, port_specified=False,
189 domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'),
190 path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False,
191 comment=None, comment_url=None, rest={})
192 jar.set_cookie(cookie)
86e5f3ed 193 logger.info(f'Extracted {len(jar)} cookies from firefox')
982ee69a
MB
194 return jar
195 finally:
196 if cursor is not None:
197 cursor.connection.close()
198
199
200def _firefox_browser_dir():
dec30912 201 if sys.platform in ('cygwin', 'win32'):
19a03940 202 return os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles')
982ee69a
MB
203 elif sys.platform == 'darwin':
204 return os.path.expanduser('~/Library/Application Support/Firefox')
dec30912 205 return os.path.expanduser('~/.mozilla/firefox')
982ee69a
MB
206
207
208def _get_chromium_based_browser_settings(browser_name):
209 # https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md
dec30912 210 if sys.platform in ('cygwin', 'win32'):
982ee69a
MB
211 appdata_local = os.path.expandvars('%LOCALAPPDATA%')
212 appdata_roaming = os.path.expandvars('%APPDATA%')
213 browser_dir = {
19a03940 214 'brave': os.path.join(appdata_local, R'BraveSoftware\Brave-Browser\User Data'),
215 'chrome': os.path.join(appdata_local, R'Google\Chrome\User Data'),
216 'chromium': os.path.join(appdata_local, R'Chromium\User Data'),
217 'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
218 'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
219 'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
982ee69a
MB
220 }[browser_name]
221
222 elif sys.platform == 'darwin':
223 appdata = os.path.expanduser('~/Library/Application Support')
224 browser_dir = {
225 'brave': os.path.join(appdata, 'BraveSoftware/Brave-Browser'),
226 'chrome': os.path.join(appdata, 'Google/Chrome'),
227 'chromium': os.path.join(appdata, 'Chromium'),
228 'edge': os.path.join(appdata, 'Microsoft Edge'),
229 'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
230 'vivaldi': os.path.join(appdata, 'Vivaldi'),
231 }[browser_name]
232
233 else:
dec30912
CMT
234 config = _config_home()
235 browser_dir = {
236 'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'),
237 'chrome': os.path.join(config, 'google-chrome'),
238 'chromium': os.path.join(config, 'chromium'),
239 'edge': os.path.join(config, 'microsoft-edge'),
240 'opera': os.path.join(config, 'opera'),
241 'vivaldi': os.path.join(config, 'vivaldi'),
242 }[browser_name]
982ee69a
MB
243
244 # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
245 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
246 keyring_name = {
247 'brave': 'Brave',
248 'chrome': 'Chrome',
249 'chromium': 'Chromium',
29b208f6 250 'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
982ee69a
MB
251 'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
252 'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
253 }[browser_name]
254
255 browsers_without_profiles = {'opera'}
256
257 return {
258 'browser_dir': browser_dir,
259 'keyring_name': keyring_name,
260 'supports_profiles': browser_name not in browsers_without_profiles
261 }
262
263
f59f5ef8 264def _extract_chrome_cookies(browser_name, profile, keyring, logger):
86e5f3ed 265 logger.info(f'Extracting cookies from {browser_name}')
767b02a9 266
9b8ee23b 267 if not sqlite3:
19a03940 268 logger.warning(f'Cannot extract cookies from {browser_name} without sqlite3 support. '
269 'Please use a python interpreter compiled with sqlite3 support')
767b02a9
MB
270 return YoutubeDLCookieJar()
271
982ee69a
MB
272 config = _get_chromium_based_browser_settings(browser_name)
273
274 if profile is None:
275 search_root = config['browser_dir']
276 elif _is_path(profile):
277 search_root = profile
278 config['browser_dir'] = os.path.dirname(profile) if config['supports_profiles'] else profile
279 else:
280 if config['supports_profiles']:
281 search_root = os.path.join(config['browser_dir'], profile)
282 else:
86e5f3ed 283 logger.error(f'{browser_name} does not support profiles')
982ee69a
MB
284 search_root = config['browser_dir']
285
97ec5bc5 286 cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies', logger)
982ee69a 287 if cookie_database_path is None:
86e5f3ed 288 raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"')
289 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
982ee69a 290
f59f5ef8 291 decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring)
982ee69a 292
0930b11f 293 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
982ee69a
MB
294 cursor = None
295 try:
296 cursor = _open_database_copy(cookie_database_path, tmpdir)
297 cursor.connection.text_factory = bytes
298 column_names = _get_column_names(cursor, 'cookies')
299 secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
19a03940 300 cursor.execute(f'SELECT host_key, name, value, encrypted_value, path, expires_utc, {secure_column} FROM cookies')
982ee69a
MB
301 jar = YoutubeDLCookieJar()
302 failed_cookies = 0
f59f5ef8 303 unencrypted_cookies = 0
97ec5bc5 304 with _create_progress_bar(logger) as progress_bar:
305 table = cursor.fetchall()
306 total_cookie_count = len(table)
307 for i, line in enumerate(table):
308 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
309 is_encrypted, cookie = _process_chrome_cookie(decryptor, *line)
310 if not cookie:
982ee69a
MB
311 failed_cookies += 1
312 continue
97ec5bc5 313 elif not is_encrypted:
314 unencrypted_cookies += 1
315 jar.set_cookie(cookie)
982ee69a 316 if failed_cookies > 0:
86e5f3ed 317 failed_message = f' ({failed_cookies} could not be decrypted)'
982ee69a
MB
318 else:
319 failed_message = ''
86e5f3ed 320 logger.info(f'Extracted {len(jar)} cookies from {browser_name}{failed_message}')
24146491 321 counts = decryptor._cookie_counts.copy()
f59f5ef8 322 counts['unencrypted'] = unencrypted_cookies
86e5f3ed 323 logger.debug(f'cookie version breakdown: {counts}')
982ee69a
MB
324 return jar
325 finally:
326 if cursor is not None:
327 cursor.connection.close()
328
329
97ec5bc5 330def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, path, expires_utc, is_secure):
0f06bcd7 331 host_key = host_key.decode()
332 name = name.decode()
333 value = value.decode()
334 path = path.decode()
97ec5bc5 335 is_encrypted = not value and encrypted_value
336
337 if is_encrypted:
338 value = decryptor.decrypt(encrypted_value)
339 if value is None:
340 return is_encrypted, None
341
ac668111 342 return is_encrypted, http.cookiejar.Cookie(
97ec5bc5 343 version=0, name=name, value=value, port=None, port_specified=False,
344 domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
345 path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False,
346 comment=None, comment_url=None, rest={})
347
348
982ee69a
MB
349class ChromeCookieDecryptor:
350 """
351 Overview:
352
353 Linux:
354 - cookies are either v10 or v11
355 - v10: AES-CBC encrypted with a fixed key
b38d4c94 356 - also attempts empty password if decryption fails
982ee69a 357 - v11: AES-CBC encrypted with an OS protected key (keyring)
b38d4c94 358 - also attempts empty password if decryption fails
982ee69a
MB
359 - v11 keys can be stored in various places depending on the activate desktop environment [2]
360
361 Mac:
362 - cookies are either v10 or not v10
363 - v10: AES-CBC encrypted with an OS protected key (keyring) and more key derivation iterations than linux
364 - not v10: 'old data' stored as plaintext
365
366 Windows:
367 - cookies are either v10 or not v10
368 - v10: AES-GCM encrypted with a key which is encrypted with DPAPI
369 - not v10: encrypted with DPAPI
370
371 Sources:
372 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/
b38d4c94 373 - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_linux.cc
982ee69a
MB
374 - KeyStorageLinux::CreateService
375 """
376
0fa7d2c8 377 _cookie_counts = {}
982ee69a 378
0fa7d2c8 379 def decrypt(self, encrypted_value):
19a03940 380 raise NotImplementedError('Must be implemented by sub classes')
f59f5ef8 381
982ee69a 382
f59f5ef8 383def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None):
56ba69e4 384 if sys.platform == 'darwin':
982ee69a 385 return MacChromeCookieDecryptor(browser_keyring_name, logger)
56ba69e4 386 elif sys.platform in ('win32', 'cygwin'):
982ee69a 387 return WindowsChromeCookieDecryptor(browser_root, logger)
56ba69e4 388 return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring)
982ee69a
MB
389
390
391class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
f59f5ef8 392 def __init__(self, browser_keyring_name, logger, *, keyring=None):
982ee69a
MB
393 self._logger = logger
394 self._v10_key = self.derive_key(b'peanuts')
b38d4c94 395 self._empty_key = self.derive_key(b'')
f59f5ef8 396 self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
9b7a48ab
SS
397 self._browser_keyring_name = browser_keyring_name
398 self._keyring = keyring
399
400 @functools.cached_property
401 def _v11_key(self):
402 password = _get_linux_keyring_password(self._browser_keyring_name, self._keyring, self._logger)
403 return None if password is None else self.derive_key(password)
982ee69a
MB
404
405 @staticmethod
406 def derive_key(password):
407 # values from
b38d4c94 408 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_linux.cc
982ee69a
MB
409 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16)
410
411 def decrypt(self, encrypted_value):
b38d4c94
MB
412 """
413
414 following the same approach as the fix in [1]: if cookies fail to decrypt then attempt to decrypt
415 with an empty password. The failure detection is not the same as what chromium uses so the
416 results won't be perfect
417
418 References:
419 - [1] https://chromium.googlesource.com/chromium/src/+/bbd54702284caca1f92d656fdcadf2ccca6f4165%5E%21/
420 - a bugfix to try an empty password as a fallback
421 """
982ee69a
MB
422 version = encrypted_value[:3]
423 ciphertext = encrypted_value[3:]
424
425 if version == b'v10':
f59f5ef8 426 self._cookie_counts['v10'] += 1
b38d4c94 427 return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key, self._empty_key), self._logger)
982ee69a
MB
428
429 elif version == b'v11':
f59f5ef8 430 self._cookie_counts['v11'] += 1
982ee69a 431 if self._v11_key is None:
f59f5ef8 432 self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
982ee69a 433 return None
b38d4c94 434 return _decrypt_aes_cbc_multi(ciphertext, (self._v11_key, self._empty_key), self._logger)
982ee69a
MB
435
436 else:
b38d4c94 437 self._logger.warning(f'unknown cookie version: "{version}"', only_once=True)
f59f5ef8 438 self._cookie_counts['other'] += 1
982ee69a
MB
439 return None
440
441
442class MacChromeCookieDecryptor(ChromeCookieDecryptor):
443 def __init__(self, browser_keyring_name, logger):
444 self._logger = logger
f440b14f 445 password = _get_mac_keyring_password(browser_keyring_name, logger)
982ee69a 446 self._v10_key = None if password is None else self.derive_key(password)
f59f5ef8 447 self._cookie_counts = {'v10': 0, 'other': 0}
982ee69a
MB
448
449 @staticmethod
450 def derive_key(password):
451 # values from
b38d4c94 452 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_mac.mm
982ee69a
MB
453 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16)
454
455 def decrypt(self, encrypted_value):
456 version = encrypted_value[:3]
457 ciphertext = encrypted_value[3:]
458
459 if version == b'v10':
f59f5ef8 460 self._cookie_counts['v10'] += 1
982ee69a
MB
461 if self._v10_key is None:
462 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
463 return None
464
b38d4c94 465 return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key,), self._logger)
982ee69a
MB
466
467 else:
f59f5ef8 468 self._cookie_counts['other'] += 1
982ee69a 469 # other prefixes are considered 'old data' which were stored as plaintext
b38d4c94 470 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_mac.mm
982ee69a
MB
471 return encrypted_value
472
473
474class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
475 def __init__(self, browser_root, logger):
476 self._logger = logger
477 self._v10_key = _get_windows_v10_key(browser_root, logger)
f59f5ef8
MB
478 self._cookie_counts = {'v10': 0, 'other': 0}
479
982ee69a
MB
480 def decrypt(self, encrypted_value):
481 version = encrypted_value[:3]
482 ciphertext = encrypted_value[3:]
483
484 if version == b'v10':
f59f5ef8 485 self._cookie_counts['v10'] += 1
982ee69a
MB
486 if self._v10_key is None:
487 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
488 return None
982ee69a 489
b38d4c94 490 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
982ee69a
MB
491 # kNonceLength
492 nonce_length = 96 // 8
493 # boringssl
494 # EVP_AEAD_AES_GCM_TAG_LEN
495 authentication_tag_length = 16
496
497 raw_ciphertext = ciphertext
498 nonce = raw_ciphertext[:nonce_length]
499 ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length]
500 authentication_tag = raw_ciphertext[-authentication_tag_length:]
501
502 return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger)
503
504 else:
f59f5ef8 505 self._cookie_counts['other'] += 1
982ee69a 506 # any other prefix means the data is DPAPI encrypted
b38d4c94 507 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
0f06bcd7 508 return _decrypt_windows_dpapi(encrypted_value, self._logger).decode()
982ee69a
MB
509
510
511def _extract_safari_cookies(profile, logger):
982ee69a 512 if sys.platform != 'darwin':
86e5f3ed 513 raise ValueError(f'unsupported platform: {sys.platform}')
982ee69a 514
a58182b7
NV
515 if profile:
516 cookies_path = os.path.expanduser(profile)
517 if not os.path.isfile(cookies_path):
518 raise FileNotFoundError('custom safari cookies database not found')
519
520 else:
521 cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')
982ee69a 522
1f7db853 523 if not os.path.isfile(cookies_path):
a58182b7
NV
524 logger.debug('Trying secondary cookie location')
525 cookies_path = os.path.expanduser('~/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies')
526 if not os.path.isfile(cookies_path):
527 raise FileNotFoundError('could not find safari cookies database')
982ee69a
MB
528
529 with open(cookies_path, 'rb') as f:
530 cookies_data = f.read()
531
532 jar = parse_safari_cookies(cookies_data, logger=logger)
86e5f3ed 533 logger.info(f'Extracted {len(jar)} cookies from safari')
982ee69a
MB
534 return jar
535
536
537class ParserError(Exception):
538 pass
539
540
541class DataParser:
542 def __init__(self, data, logger):
543 self._data = data
544 self.cursor = 0
545 self._logger = logger
546
547 def read_bytes(self, num_bytes):
548 if num_bytes < 0:
86e5f3ed 549 raise ParserError(f'invalid read of {num_bytes} bytes')
982ee69a
MB
550 end = self.cursor + num_bytes
551 if end > len(self._data):
552 raise ParserError('reached end of input')
553 data = self._data[self.cursor:end]
554 self.cursor = end
555 return data
556
557 def expect_bytes(self, expected_value, message):
558 value = self.read_bytes(len(expected_value))
559 if value != expected_value:
86e5f3ed 560 raise ParserError(f'unexpected value: {value} != {expected_value} ({message})')
982ee69a
MB
561
562 def read_uint(self, big_endian=False):
563 data_format = '>I' if big_endian else '<I'
564 return struct.unpack(data_format, self.read_bytes(4))[0]
565
566 def read_double(self, big_endian=False):
567 data_format = '>d' if big_endian else '<d'
568 return struct.unpack(data_format, self.read_bytes(8))[0]
569
570 def read_cstring(self):
571 buffer = []
572 while True:
573 c = self.read_bytes(1)
574 if c == b'\x00':
0f06bcd7 575 return b''.join(buffer).decode()
982ee69a
MB
576 else:
577 buffer.append(c)
578
579 def skip(self, num_bytes, description='unknown'):
580 if num_bytes > 0:
19a03940 581 self._logger.debug(f'skipping {num_bytes} bytes ({description}): {self.read_bytes(num_bytes)!r}')
982ee69a 582 elif num_bytes < 0:
86e5f3ed 583 raise ParserError(f'invalid skip of {num_bytes} bytes')
982ee69a
MB
584
585 def skip_to(self, offset, description='unknown'):
586 self.skip(offset - self.cursor, description)
587
588 def skip_to_end(self, description='unknown'):
589 self.skip_to(len(self._data), description)
590
591
592def _mac_absolute_time_to_posix(timestamp):
593 return int((datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc) + timedelta(seconds=timestamp)).timestamp())
594
595
596def _parse_safari_cookies_header(data, logger):
597 p = DataParser(data, logger)
598 p.expect_bytes(b'cook', 'database signature')
599 number_of_pages = p.read_uint(big_endian=True)
600 page_sizes = [p.read_uint(big_endian=True) for _ in range(number_of_pages)]
601 return page_sizes, p.cursor
602
603
604def _parse_safari_cookies_page(data, jar, logger):
605 p = DataParser(data, logger)
606 p.expect_bytes(b'\x00\x00\x01\x00', 'page signature')
607 number_of_cookies = p.read_uint()
608 record_offsets = [p.read_uint() for _ in range(number_of_cookies)]
609 if number_of_cookies == 0:
86e5f3ed 610 logger.debug(f'a cookies page of size {len(data)} has no cookies')
982ee69a
MB
611 return
612
613 p.skip_to(record_offsets[0], 'unknown page header field')
614
97ec5bc5 615 with _create_progress_bar(logger) as progress_bar:
616 for i, record_offset in enumerate(record_offsets):
617 progress_bar.print(f'Loading cookie {i: 6d}/{number_of_cookies: 6d}')
618 p.skip_to(record_offset, 'space between records')
619 record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger)
620 p.read_bytes(record_length)
982ee69a
MB
621 p.skip_to_end('space in between pages')
622
623
624def _parse_safari_cookies_record(data, jar, logger):
625 p = DataParser(data, logger)
626 record_size = p.read_uint()
627 p.skip(4, 'unknown record field 1')
628 flags = p.read_uint()
629 is_secure = bool(flags & 0x0001)
630 p.skip(4, 'unknown record field 2')
631 domain_offset = p.read_uint()
632 name_offset = p.read_uint()
633 path_offset = p.read_uint()
634 value_offset = p.read_uint()
635 p.skip(8, 'unknown record field 3')
636 expiration_date = _mac_absolute_time_to_posix(p.read_double())
637 _creation_date = _mac_absolute_time_to_posix(p.read_double()) # noqa: F841
638
639 try:
640 p.skip_to(domain_offset)
641 domain = p.read_cstring()
642
643 p.skip_to(name_offset)
644 name = p.read_cstring()
645
646 p.skip_to(path_offset)
647 path = p.read_cstring()
648
649 p.skip_to(value_offset)
650 value = p.read_cstring()
651 except UnicodeDecodeError:
f440b14f 652 logger.warning('failed to parse Safari cookie because UTF-8 decoding failed', only_once=True)
982ee69a
MB
653 return record_size
654
655 p.skip_to(record_size, 'space at the end of the record')
656
ac668111 657 cookie = http.cookiejar.Cookie(
982ee69a
MB
658 version=0, name=name, value=value, port=None, port_specified=False,
659 domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'),
660 path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False,
661 comment=None, comment_url=None, rest={})
662 jar.set_cookie(cookie)
663 return record_size
664
665
666def parse_safari_cookies(data, jar=None, logger=YDLLogger()):
667 """
668 References:
669 - https://github.com/libyal/dtformats/blob/main/documentation/Safari%20Cookies.asciidoc
670 - this data appears to be out of date but the important parts of the database structure is the same
671 - there are a few bytes here and there which are skipped during parsing
672 """
673 if jar is None:
674 jar = YoutubeDLCookieJar()
675 page_sizes, body_start = _parse_safari_cookies_header(data, logger)
676 p = DataParser(data[body_start:], logger)
677 for page_size in page_sizes:
678 _parse_safari_cookies_page(p.read_bytes(page_size), jar, logger)
679 p.skip_to_end('footer')
680 return jar
681
682
f59f5ef8
MB
683class _LinuxDesktopEnvironment(Enum):
684 """
685 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.h
686 DesktopEnvironment
687 """
688 OTHER = auto()
689 CINNAMON = auto()
b38d4c94 690 DEEPIN = auto()
f59f5ef8 691 GNOME = auto()
b38d4c94
MB
692 KDE3 = auto()
693 KDE4 = auto()
694 KDE5 = auto()
695 KDE6 = auto()
f59f5ef8 696 PANTHEON = auto()
b38d4c94 697 UKUI = auto()
f59f5ef8
MB
698 UNITY = auto()
699 XFCE = auto()
b38d4c94 700 LXQT = auto()
982ee69a
MB
701
702
f59f5ef8
MB
703class _LinuxKeyring(Enum):
704 """
b38d4c94 705 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_util_linux.h
f59f5ef8
MB
706 SelectedLinuxBackend
707 """
2e023649 708 KWALLET = auto() # KDE4
b38d4c94
MB
709 KWALLET5 = auto()
710 KWALLET6 = auto()
2e023649 711 GNOMEKEYRING = auto()
712 BASICTEXT = auto()
f59f5ef8
MB
713
714
715SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys()
716
717
b38d4c94 718def _get_linux_desktop_environment(env, logger):
f59f5ef8
MB
719 """
720 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc
721 GetDesktopEnvironment
722 """
723 xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
724 desktop_session = env.get('DESKTOP_SESSION', None)
725 if xdg_current_desktop is not None:
726 xdg_current_desktop = xdg_current_desktop.split(':')[0].strip()
727
728 if xdg_current_desktop == 'Unity':
729 if desktop_session is not None and 'gnome-fallback' in desktop_session:
730 return _LinuxDesktopEnvironment.GNOME
731 else:
732 return _LinuxDesktopEnvironment.UNITY
b38d4c94
MB
733 elif xdg_current_desktop == 'Deepin':
734 return _LinuxDesktopEnvironment.DEEPIN
f59f5ef8
MB
735 elif xdg_current_desktop == 'GNOME':
736 return _LinuxDesktopEnvironment.GNOME
737 elif xdg_current_desktop == 'X-Cinnamon':
738 return _LinuxDesktopEnvironment.CINNAMON
739 elif xdg_current_desktop == 'KDE':
b38d4c94
MB
740 kde_version = env.get('KDE_SESSION_VERSION', None)
741 if kde_version == '5':
742 return _LinuxDesktopEnvironment.KDE5
743 elif kde_version == '6':
744 return _LinuxDesktopEnvironment.KDE6
745 elif kde_version == '4':
746 return _LinuxDesktopEnvironment.KDE4
747 else:
748 logger.info(f'unknown KDE version: "{kde_version}". Assuming KDE4')
749 return _LinuxDesktopEnvironment.KDE4
f59f5ef8
MB
750 elif xdg_current_desktop == 'Pantheon':
751 return _LinuxDesktopEnvironment.PANTHEON
752 elif xdg_current_desktop == 'XFCE':
753 return _LinuxDesktopEnvironment.XFCE
b38d4c94
MB
754 elif xdg_current_desktop == 'UKUI':
755 return _LinuxDesktopEnvironment.UKUI
756 elif xdg_current_desktop == 'LXQt':
757 return _LinuxDesktopEnvironment.LXQT
758 else:
759 logger.info(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"')
760
f59f5ef8 761 elif desktop_session is not None:
b38d4c94
MB
762 if desktop_session == 'deepin':
763 return _LinuxDesktopEnvironment.DEEPIN
764 elif desktop_session in ('mate', 'gnome'):
f59f5ef8 765 return _LinuxDesktopEnvironment.GNOME
b38d4c94
MB
766 elif desktop_session in ('kde4', 'kde-plasma'):
767 return _LinuxDesktopEnvironment.KDE4
768 elif desktop_session == 'kde':
769 if 'KDE_SESSION_VERSION' in env:
770 return _LinuxDesktopEnvironment.KDE4
771 else:
772 return _LinuxDesktopEnvironment.KDE3
773 elif 'xfce' in desktop_session or desktop_session == 'xubuntu':
f59f5ef8 774 return _LinuxDesktopEnvironment.XFCE
b38d4c94
MB
775 elif desktop_session == 'ukui':
776 return _LinuxDesktopEnvironment.UKUI
777 else:
778 logger.info(f'DESKTOP_SESSION is set to an unknown value: "{desktop_session}"')
779
f59f5ef8
MB
780 else:
781 if 'GNOME_DESKTOP_SESSION_ID' in env:
782 return _LinuxDesktopEnvironment.GNOME
783 elif 'KDE_FULL_SESSION' in env:
b38d4c94
MB
784 if 'KDE_SESSION_VERSION' in env:
785 return _LinuxDesktopEnvironment.KDE4
786 else:
787 return _LinuxDesktopEnvironment.KDE3
fa8fd951 788 return _LinuxDesktopEnvironment.OTHER
f59f5ef8
MB
789
790
791def _choose_linux_keyring(logger):
792 """
b38d4c94
MB
793 SelectBackend in [1]
794
795 There is currently support for forcing chromium to use BASIC_TEXT by creating a file called
796 `Disable Local Encryption` [1] in the user data dir. The function to write this file (`WriteBackendUse()` [1])
797 does not appear to be called anywhere other than in tests, so the user would have to create this file manually
798 and so would be aware enough to tell yt-dlp to use the BASIC_TEXT keyring.
799
800 References:
801 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_util_linux.cc
f59f5ef8 802 """
b38d4c94 803 desktop_environment = _get_linux_desktop_environment(os.environ, logger)
86e5f3ed 804 logger.debug(f'detected desktop environment: {desktop_environment.name}')
b38d4c94 805 if desktop_environment == _LinuxDesktopEnvironment.KDE4:
2e023649 806 linux_keyring = _LinuxKeyring.KWALLET
b38d4c94
MB
807 elif desktop_environment == _LinuxDesktopEnvironment.KDE5:
808 linux_keyring = _LinuxKeyring.KWALLET5
809 elif desktop_environment == _LinuxDesktopEnvironment.KDE6:
810 linux_keyring = _LinuxKeyring.KWALLET6
811 elif desktop_environment in (
812 _LinuxDesktopEnvironment.KDE3, _LinuxDesktopEnvironment.LXQT, _LinuxDesktopEnvironment.OTHER
813 ):
2e023649 814 linux_keyring = _LinuxKeyring.BASICTEXT
982ee69a 815 else:
2e023649 816 linux_keyring = _LinuxKeyring.GNOMEKEYRING
f59f5ef8
MB
817 return linux_keyring
818
819
b38d4c94 820def _get_kwallet_network_wallet(keyring, logger):
f59f5ef8
MB
821 """ The name of the wallet used to store network passwords.
822
b38d4c94 823 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/kwallet_dbus.cc
f59f5ef8
MB
824 KWalletDBus::NetworkWallet
825 which does a dbus call to the following function:
826 https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html
827 Wallet::NetworkWallet
828 """
829 default_wallet = 'kdewallet'
830 try:
2e023649 831 if keyring == _LinuxKeyring.KWALLET:
b38d4c94
MB
832 service_name = 'org.kde.kwalletd'
833 wallet_path = '/modules/kwalletd'
834 elif keyring == _LinuxKeyring.KWALLET5:
835 service_name = 'org.kde.kwalletd5'
836 wallet_path = '/modules/kwalletd5'
837 elif keyring == _LinuxKeyring.KWALLET6:
838 service_name = 'org.kde.kwalletd6'
839 wallet_path = '/modules/kwalletd6'
840 else:
841 raise ValueError(keyring)
842
f0c9fb96 843 stdout, _, returncode = Popen.run([
f59f5ef8 844 'dbus-send', '--session', '--print-reply=literal',
b38d4c94
MB
845 f'--dest={service_name}',
846 wallet_path,
f59f5ef8 847 'org.kde.KWallet.networkWallet'
f0c9fb96 848 ], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
f59f5ef8 849
f0c9fb96 850 if returncode:
f59f5ef8
MB
851 logger.warning('failed to read NetworkWallet')
852 return default_wallet
853 else:
f0c9fb96 854 logger.debug(f'NetworkWallet = "{stdout.strip()}"')
855 return stdout.strip()
a44ca5a4 856 except Exception as e:
86e5f3ed 857 logger.warning(f'exception while obtaining NetworkWallet: {e}')
f59f5ef8
MB
858 return default_wallet
859
860
b38d4c94
MB
861def _get_kwallet_password(browser_keyring_name, keyring, logger):
862 logger.debug(f'using kwallet-query to obtain password from {keyring.name}')
f59f5ef8
MB
863
864 if shutil.which('kwallet-query') is None:
865 logger.error('kwallet-query command not found. KWallet and kwallet-query '
866 'must be installed to read from KWallet. kwallet-query should be'
867 'included in the kwallet package for your distribution')
868 return b''
869
b38d4c94 870 network_wallet = _get_kwallet_network_wallet(keyring, logger)
f59f5ef8
MB
871
872 try:
f0c9fb96 873 stdout, _, returncode = Popen.run([
f59f5ef8 874 'kwallet-query',
86e5f3ed 875 '--read-password', f'{browser_keyring_name} Safe Storage',
876 '--folder', f'{browser_keyring_name} Keys',
f59f5ef8
MB
877 network_wallet
878 ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
879
f0c9fb96 880 if returncode:
881 logger.error(f'kwallet-query failed with return code {returncode}. '
882 'Please consult the kwallet-query man page for details')
f59f5ef8
MB
883 return b''
884 else:
885 if stdout.lower().startswith(b'failed to read'):
886 logger.debug('failed to read password from kwallet. Using empty string instead')
887 # this sometimes occurs in KDE because chrome does not check hasEntry and instead
888 # just tries to read the value (which kwallet returns "") whereas kwallet-query
889 # checks hasEntry. To verify this:
890 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
891 # while starting chrome.
b38d4c94
MB
892 # this was identified as a bug later and fixed in
893 # https://chromium.googlesource.com/chromium/src/+/bbd54702284caca1f92d656fdcadf2ccca6f4165%5E%21/#F0
894 # https://chromium.googlesource.com/chromium/src/+/5463af3c39d7f5b6d11db7fbd51e38cc1974d764
f59f5ef8
MB
895 return b''
896 else:
897 logger.debug('password found')
f0c9fb96 898 return stdout.rstrip(b'\n')
a44ca5a4 899 except Exception as e:
900 logger.warning(f'exception running kwallet-query: {error_to_str(e)}')
f59f5ef8
MB
901 return b''
902
903
904def _get_gnome_keyring_password(browser_keyring_name, logger):
9b8ee23b 905 if not secretstorage:
906 logger.error(f'secretstorage not available {_SECRETSTORAGE_UNAVAILABLE_REASON}')
f59f5ef8
MB
907 return b''
908 # the Gnome keyring does not seem to organise keys in the same way as KWallet,
909 # using `dbus-monitor` during startup, it can be observed that chromium lists all keys
910 # and presumably searches for its key in the list. It appears that we must do the same.
911 # https://github.com/jaraco/keyring/issues/556
912 with contextlib.closing(secretstorage.dbus_init()) as con:
913 col = secretstorage.get_default_collection(con)
914 for item in col.get_all_items():
86e5f3ed 915 if item.get_label() == f'{browser_keyring_name} Safe Storage':
f59f5ef8
MB
916 return item.get_secret()
917 else:
918 logger.error('failed to read from keyring')
919 return b''
920
921
922def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
923 # note: chrome/chromium can be run with the following flags to determine which keyring backend
924 # it has chosen to use
925 # chromium --enable-logging=stderr --v=1 2>&1 | grep key_storage_
926 # Chromium supports a flag: --password-store=<basic|gnome|kwallet> so the automatic detection
927 # will not be sufficient in all cases.
928
2c539d49 929 keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger)
f59f5ef8
MB
930 logger.debug(f'Chosen keyring: {keyring.name}')
931
2e023649 932 if keyring in (_LinuxKeyring.KWALLET, _LinuxKeyring.KWALLET5, _LinuxKeyring.KWALLET6):
b38d4c94 933 return _get_kwallet_password(browser_keyring_name, keyring, logger)
2e023649 934 elif keyring == _LinuxKeyring.GNOMEKEYRING:
f59f5ef8 935 return _get_gnome_keyring_password(browser_keyring_name, logger)
2e023649 936 elif keyring == _LinuxKeyring.BASICTEXT:
f59f5ef8
MB
937 # when basic text is chosen, all cookies are stored as v10 (so no keyring password is required)
938 return None
939 assert False, f'Unknown keyring {keyring}'
940
941
942def _get_mac_keyring_password(browser_keyring_name, logger):
943 logger.debug('using find-generic-password to obtain password from OSX keychain')
944 try:
46a5b335 945 stdout, _, returncode = Popen.run(
d3c93ec2 946 ['security', 'find-generic-password',
947 '-w', # write password to stdout
948 '-a', browser_keyring_name, # match 'account'
86e5f3ed 949 '-s', f'{browser_keyring_name} Safe Storage'], # match 'service'
d3c93ec2 950 stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
46a5b335 951 if returncode:
952 logger.warning('find-generic-password failed')
953 return None
f0c9fb96 954 return stdout.rstrip(b'\n')
a44ca5a4 955 except Exception as e:
956 logger.warning(f'exception running find-generic-password: {error_to_str(e)}')
f59f5ef8 957 return None
982ee69a
MB
958
959
960def _get_windows_v10_key(browser_root, logger):
b38d4c94
MB
961 """
962 References:
963 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
964 """
97ec5bc5 965 path = _find_most_recently_used_file(browser_root, 'Local State', logger)
982ee69a
MB
966 if path is None:
967 logger.error('could not find local state file')
968 return None
97ec5bc5 969 logger.debug(f'Found local state file at "{path}"')
86e5f3ed 970 with open(path, encoding='utf8') as f:
982ee69a
MB
971 data = json.load(f)
972 try:
b38d4c94 973 # kOsCryptEncryptedKeyPrefName in [1]
982ee69a
MB
974 base64_key = data['os_crypt']['encrypted_key']
975 except KeyError:
976 logger.error('no encrypted key in Local State')
977 return None
14f25df2 978 encrypted_key = base64.b64decode(base64_key)
b38d4c94 979 # kDPAPIKeyPrefix in [1]
982ee69a
MB
980 prefix = b'DPAPI'
981 if not encrypted_key.startswith(prefix):
982 logger.error('invalid key')
983 return None
984 return _decrypt_windows_dpapi(encrypted_key[len(prefix):], logger)
985
986
987def pbkdf2_sha1(password, salt, iterations, key_length):
988 return pbkdf2_hmac('sha1', password, salt, iterations, key_length)
989
990
b38d4c94
MB
991def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' ' * 16):
992 for key in keys:
993 plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
994 try:
995 return plaintext.decode()
996 except UnicodeDecodeError:
997 pass
998 logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
999 return None
982ee69a
MB
1000
1001
1002def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
982ee69a 1003 try:
09906f55 1004 plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce)
982ee69a 1005 except ValueError:
f440b14f 1006 logger.warning('failed to decrypt cookie (AES-GCM) because the MAC check failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
1007 return None
1008
1009 try:
0f06bcd7 1010 return plaintext.decode()
982ee69a 1011 except UnicodeDecodeError:
f440b14f 1012 logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
1013 return None
1014
1015
1016def _decrypt_windows_dpapi(ciphertext, logger):
1017 """
1018 References:
1019 - https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata
1020 """
fe0918bb 1021
1022 import ctypes
1023 import ctypes.wintypes
982ee69a
MB
1024
1025 class DATA_BLOB(ctypes.Structure):
fe0918bb 1026 _fields_ = [('cbData', ctypes.wintypes.DWORD),
982ee69a
MB
1027 ('pbData', ctypes.POINTER(ctypes.c_char))]
1028
1029 buffer = ctypes.create_string_buffer(ciphertext)
1030 blob_in = DATA_BLOB(ctypes.sizeof(buffer), buffer)
1031 blob_out = DATA_BLOB()
1032 ret = ctypes.windll.crypt32.CryptUnprotectData(
1033 ctypes.byref(blob_in), # pDataIn
1034 None, # ppszDataDescr: human readable description of pDataIn
1035 None, # pOptionalEntropy: salt?
1036 None, # pvReserved: must be NULL
1037 None, # pPromptStruct: information about prompts to display
1038 0, # dwFlags
1039 ctypes.byref(blob_out) # pDataOut
1040 )
1041 if not ret:
f9be9cb9 1042 logger.warning('failed to decrypt with DPAPI', only_once=True)
982ee69a
MB
1043 return None
1044
1045 result = ctypes.string_at(blob_out.pbData, blob_out.cbData)
1046 ctypes.windll.kernel32.LocalFree(blob_out.pbData)
1047 return result
1048
1049
1050def _config_home():
1051 return os.environ.get('XDG_CONFIG_HOME', os.path.expanduser('~/.config'))
1052
1053
1054def _open_database_copy(database_path, tmpdir):
1055 # cannot open sqlite databases if they are already in use (e.g. by the browser)
1056 database_copy_path = os.path.join(tmpdir, 'temporary.sqlite')
1057 shutil.copy(database_path, database_copy_path)
1058 conn = sqlite3.connect(database_copy_path)
1059 return conn.cursor()
1060
1061
1062def _get_column_names(cursor, table_name):
86e5f3ed 1063 table_info = cursor.execute(f'PRAGMA table_info({table_name})').fetchall()
0f06bcd7 1064 return [row[1].decode() for row in table_info]
982ee69a
MB
1065
1066
97ec5bc5 1067def _find_most_recently_used_file(root, filename, logger):
982ee69a 1068 # if there are multiple browser profiles, take the most recently used one
97ec5bc5 1069 i, paths = 0, []
1070 with _create_progress_bar(logger) as progress_bar:
1071 for curr_root, dirs, files in os.walk(root):
1072 for file in files:
1073 i += 1
1074 progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched')
1075 if file == filename:
1076 paths.append(os.path.join(curr_root, file))
982ee69a
MB
1077 return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime)
1078
1079
1080def _merge_cookie_jars(jars):
1081 output_jar = YoutubeDLCookieJar()
1082 for jar in jars:
1083 for cookie in jar:
1084 output_jar.set_cookie(cookie)
1085 if jar.filename is not None:
1086 output_jar.filename = jar.filename
1087 return output_jar
1088
1089
1090def _is_path(value):
1091 return os.path.sep in value
1092
1093
9bd13fe5 1094def _parse_browser_specification(browser_name, profile=None, keyring=None, container=None):
982ee69a
MB
1095 if browser_name not in SUPPORTED_BROWSERS:
1096 raise ValueError(f'unsupported browser: "{browser_name}"')
f59f5ef8
MB
1097 if keyring not in (None, *SUPPORTED_KEYRINGS):
1098 raise ValueError(f'unsupported keyring: "{keyring}"')
2b24afa6 1099 if profile is not None and _is_path(expand_path(profile)):
1100 profile = expand_path(profile)
9bd13fe5 1101 return browser_name, profile, keyring, container
8817a80d
SS
1102
1103
1104class LenientSimpleCookie(http.cookies.SimpleCookie):
1105 """More lenient version of http.cookies.SimpleCookie"""
1106 # From https://github.com/python/cpython/blob/v3.10.7/Lib/http/cookies.py
36069409
SS
1107 # We use Morsel's legal key chars to avoid errors on setting values
1108 _LEGAL_KEY_CHARS = r'\w\d' + re.escape('!#$%&\'*+-.:^_`|~')
1109 _LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + re.escape('(),/<=>?@[]{}')
8817a80d
SS
1110
1111 _RESERVED = {
1112 "expires",
1113 "path",
1114 "comment",
1115 "domain",
1116 "max-age",
1117 "secure",
1118 "httponly",
1119 "version",
1120 "samesite",
1121 }
1122
1123 _FLAGS = {"secure", "httponly"}
1124
1125 # Added 'bad' group to catch the remaining value
1126 _COOKIE_PATTERN = re.compile(r"""
1127 \s* # Optional whitespace at start of cookie
1128 (?P<key> # Start of group 'key'
1129 [""" + _LEGAL_KEY_CHARS + r"""]+?# Any word of at least one letter
1130 ) # End of group 'key'
1131 ( # Optional group: there may not be a value.
1132 \s*=\s* # Equal Sign
1133 ( # Start of potential value
1134 (?P<val> # Start of group 'val'
1135 "(?:[^\\"]|\\.)*" # Any doublequoted string
1136 | # or
1137 \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr
1138 | # or
1139 [""" + _LEGAL_VALUE_CHARS + r"""]* # Any word or empty string
1140 ) # End of group 'val'
1141 | # or
1142 (?P<bad>(?:\\;|[^;])*?) # 'bad' group fallback for invalid values
1143 ) # End of potential value
1144 )? # End of optional value group
1145 \s* # Any number of spaces.
1146 (\s+|;|$) # Ending either at space, semicolon, or EOS.
1147 """, re.ASCII | re.VERBOSE)
1148
1149 def load(self, data):
1150 # Workaround for https://github.com/yt-dlp/yt-dlp/issues/4776
1151 if not isinstance(data, str):
1152 return super().load(data)
1153
1154 morsel = None
36069409
SS
1155 for match in self._COOKIE_PATTERN.finditer(data):
1156 if match.group('bad'):
8817a80d
SS
1157 morsel = None
1158 continue
1159
36069409 1160 key, value = match.group('key', 'val')
8817a80d 1161
36069409
SS
1162 is_attribute = False
1163 if key.startswith('$'):
1164 key = key[1:]
1165 is_attribute = True
8817a80d
SS
1166
1167 lower_key = key.lower()
1168 if lower_key in self._RESERVED:
1169 if morsel is None:
1170 continue
1171
1172 if value is None:
1173 if lower_key not in self._FLAGS:
1174 morsel = None
1175 continue
1176 value = True
1177 else:
1178 value, _ = self.value_decode(value)
1179
1180 morsel[key] = value
1181
36069409
SS
1182 elif is_attribute:
1183 morsel = None
1184
8817a80d
SS
1185 elif value is not None:
1186 morsel = self.get(key, http.cookies.Morsel())
1187 real_value, coded_value = self.value_decode(value)
1188 morsel.set(key, real_value, coded_value)
1189 self[key] = morsel
1190
1191 else:
1192 morsel = None
b87e01c1 1193
1194
1195class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
1196 """
1197 See [1] for cookie file format.
1198
1199 1. https://curl.haxx.se/docs/http-cookies.html
1200 """
1201 _HTTPONLY_PREFIX = '#HttpOnly_'
1202 _ENTRY_LEN = 7
1203 _HEADER = '''# Netscape HTTP Cookie File
1204# This file is generated by yt-dlp. Do not edit.
1205
1206'''
1207 _CookieFileEntry = collections.namedtuple(
1208 'CookieFileEntry',
1209 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
1210
1211 def __init__(self, filename=None, *args, **kwargs):
1212 super().__init__(None, *args, **kwargs)
1213 if is_path_like(filename):
1214 filename = os.fspath(filename)
1215 self.filename = filename
1216
1217 @staticmethod
1218 def _true_or_false(cndn):
1219 return 'TRUE' if cndn else 'FALSE'
1220
1221 @contextlib.contextmanager
1222 def open(self, file, *, write=False):
1223 if is_path_like(file):
1224 with open(file, 'w' if write else 'r', encoding='utf-8') as f:
1225 yield f
1226 else:
1227 if write:
1228 file.truncate(0)
1229 yield file
1230
1231 def _really_save(self, f, ignore_discard=False, ignore_expires=False):
1232 now = time.time()
1233 for cookie in self:
1234 if (not ignore_discard and cookie.discard
1235 or not ignore_expires and cookie.is_expired(now)):
1236 continue
1237 name, value = cookie.name, cookie.value
1238 if value is None:
1239 # cookies.txt regards 'Set-Cookie: foo' as a cookie
1240 # with no name, whereas http.cookiejar regards it as a
1241 # cookie with no value.
1242 name, value = '', name
1243 f.write('%s\n' % '\t'.join((
1244 cookie.domain,
1245 self._true_or_false(cookie.domain.startswith('.')),
1246 cookie.path,
1247 self._true_or_false(cookie.secure),
1248 str_or_none(cookie.expires, default=''),
1249 name, value
1250 )))
1251
1252 def save(self, filename=None, *args, **kwargs):
1253 """
1254 Save cookies to a file.
1255 Code is taken from CPython 3.6
1256 https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """
1257
1258 if filename is None:
1259 if self.filename is not None:
1260 filename = self.filename
1261 else:
1262 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
1263
1264 # Store session cookies with `expires` set to 0 instead of an empty string
1265 for cookie in self:
1266 if cookie.expires is None:
1267 cookie.expires = 0
1268
1269 with self.open(filename, write=True) as f:
1270 f.write(self._HEADER)
1271 self._really_save(f, *args, **kwargs)
1272
1273 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
1274 """Load cookies from a file."""
1275 if filename is None:
1276 if self.filename is not None:
1277 filename = self.filename
1278 else:
1279 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
1280
1281 def prepare_line(line):
1282 if line.startswith(self._HTTPONLY_PREFIX):
1283 line = line[len(self._HTTPONLY_PREFIX):]
1284 # comments and empty lines are fine
1285 if line.startswith('#') or not line.strip():
1286 return line
1287 cookie_list = line.split('\t')
1288 if len(cookie_list) != self._ENTRY_LEN:
1289 raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list))
1290 cookie = self._CookieFileEntry(*cookie_list)
1291 if cookie.expires_at and not cookie.expires_at.isdigit():
1292 raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
1293 return line
1294
1295 cf = io.StringIO()
1296 with self.open(filename) as f:
1297 for line in f:
1298 try:
1299 cf.write(prepare_line(line))
1300 except http.cookiejar.LoadError as e:
1301 if f'{line.strip()} '[0] in '[{"':
1302 raise http.cookiejar.LoadError(
1303 'Cookies file must be Netscape formatted, not JSON. See '
1304 'https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp')
1305 write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
1306 continue
1307 cf.seek(0)
1308 self._really_load(cf, filename, ignore_discard, ignore_expires)
1309 # Session cookies are denoted by either `expires` field set to
1310 # an empty string or 0. MozillaCookieJar only recognizes the former
1311 # (see [1]). So we need force the latter to be recognized as session
1312 # cookies on our own.
1313 # Session cookies may be important for cookies-based authentication,
1314 # e.g. usually, when user does not check 'Remember me' check box while
1315 # logging in on a site, some important cookies are stored as session
1316 # cookies so that not recognizing them will result in failed login.
1317 # 1. https://bugs.python.org/issue17164
1318 for cookie in self:
1319 # Treat `expires=0` cookies as session cookies
1320 if cookie.expires == 0:
1321 cookie.expires = None
1322 cookie.discard = True
1323
1324 def get_cookie_header(self, url):
1325 """Generate a Cookie HTTP header for a given url"""
1326 cookie_req = urllib.request.Request(escape_url(sanitize_url(url)))
1327 self.add_cookie_header(cookie_req)
1328 return cookie_req.get_header('Cookie')