]> jfr.im git - yt-dlp.git/blame - yt_dlp/cookies.py
[extractor/nekohacker] Add extractor (#7003)
[yt-dlp.git] / yt_dlp / cookies.py
CommitLineData
14f25df2 1import base64
b87e01c1 2import collections
f59f5ef8 3import contextlib
54007a45 4import http.cookiejar
8817a80d 5import http.cookies
b87e01c1 6import io
982ee69a
MB
7import json
8import os
9bd13fe5 9import re
982ee69a 10import shutil
982ee69a
MB
11import struct
12import subprocess
13import sys
14import tempfile
2e4585da 15import time
b87e01c1 16import urllib.request
982ee69a 17from datetime import datetime, timedelta, timezone
f59f5ef8 18from enum import Enum, auto
982ee69a
MB
19from hashlib import pbkdf2_hmac
20
1d3586d0 21from .aes import (
22 aes_cbc_decrypt_bytes,
23 aes_gcm_decrypt_and_verify_bytes,
24 unpad_pkcs7,
25)
9b7a48ab 26from .compat import functools
9b8ee23b 27from .dependencies import (
28 _SECRETSTORAGE_UNAVAILABLE_REASON,
29 secretstorage,
30 sqlite3,
31)
97ec5bc5 32from .minicurses import MultilinePrinter, QuietMultilinePrinter
d2c8aadf 33from .utils import (
34 Popen,
d2c8aadf 35 error_to_str,
b87e01c1 36 escape_url,
d2c8aadf 37 expand_path,
22df97f9 38 is_path_like,
b87e01c1 39 sanitize_url,
40 str_or_none,
d2c8aadf 41 try_call,
b87e01c1 42 write_string,
d2c8aadf 43)
982ee69a 44
982ee69a
MB
45CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
46SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
47
48
49class YDLLogger:
50 def __init__(self, ydl=None):
51 self._ydl = ydl
52
53 def debug(self, message):
54 if self._ydl:
55 self._ydl.write_debug(message)
56
57 def info(self, message):
58 if self._ydl:
59 self._ydl.to_screen(f'[Cookies] {message}')
60
61 def warning(self, message, only_once=False):
62 if self._ydl:
63 self._ydl.report_warning(message, only_once)
64
65 def error(self, message):
66 if self._ydl:
67 self._ydl.report_error(message)
68
2e4585da 69 class ProgressBar(MultilinePrinter):
70 _DELAY, _timer = 0.1, 0
71
72 def print(self, message):
73 if time.time() - self._timer > self._DELAY:
74 self.print_at_line(f'[Cookies] {message}', 0)
75 self._timer = time.time()
76
97ec5bc5 77 def progress_bar(self):
78 """Return a context manager with a print method. (Optional)"""
79 # Do not print to files/pipes, loggers, or when --no-progress is used
80 if not self._ydl or self._ydl.params.get('noprogress') or self._ydl.params.get('logger'):
81 return
591bb9d3 82 file = self._ydl._out_files.error
97ec5bc5 83 try:
84 if not file.isatty():
85 return
86 except BaseException:
87 return
2e4585da 88 return self.ProgressBar(file, preserve_output=False)
97ec5bc5 89
90
91def _create_progress_bar(logger):
92 if hasattr(logger, 'progress_bar'):
93 printer = logger.progress_bar()
94 if printer:
95 return printer
96 printer = QuietMultilinePrinter()
97 printer.print = lambda _: None
98 return printer
99
982ee69a
MB
100
101def load_cookies(cookie_file, browser_specification, ydl):
102 cookie_jars = []
103 if browser_specification is not None:
9bd13fe5 104 browser_name, profile, keyring, container = _parse_browser_specification(*browser_specification)
105 cookie_jars.append(
106 extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring, container=container))
982ee69a
MB
107
108 if cookie_file is not None:
22df97f9 109 is_filename = is_path_like(cookie_file)
d76fa1f3 110 if is_filename:
111 cookie_file = expand_path(cookie_file)
112
982ee69a 113 jar = YoutubeDLCookieJar(cookie_file)
d76fa1f3 114 if not is_filename or os.access(cookie_file, os.R_OK):
982ee69a
MB
115 jar.load(ignore_discard=True, ignore_expires=True)
116 cookie_jars.append(jar)
117
118 return _merge_cookie_jars(cookie_jars)
119
120
9bd13fe5 121def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None, container=None):
982ee69a 122 if browser_name == 'firefox':
9bd13fe5 123 return _extract_firefox_cookies(profile, container, logger)
982ee69a
MB
124 elif browser_name == 'safari':
125 return _extract_safari_cookies(profile, logger)
126 elif browser_name in CHROMIUM_BASED_BROWSERS:
f59f5ef8 127 return _extract_chrome_cookies(browser_name, profile, keyring, logger)
982ee69a 128 else:
86e5f3ed 129 raise ValueError(f'unknown browser: {browser_name}')
982ee69a
MB
130
131
9bd13fe5 132def _extract_firefox_cookies(profile, container, logger):
982ee69a 133 logger.info('Extracting cookies from firefox')
9b8ee23b 134 if not sqlite3:
767b02a9
MB
135 logger.warning('Cannot extract cookies from firefox without sqlite3 support. '
136 'Please use a python interpreter compiled with sqlite3 support')
137 return YoutubeDLCookieJar()
982ee69a
MB
138
139 if profile is None:
140 search_root = _firefox_browser_dir()
141 elif _is_path(profile):
142 search_root = profile
143 else:
144 search_root = os.path.join(_firefox_browser_dir(), profile)
145
825d3ce3 146 cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite', logger)
147 if cookie_database_path is None:
148 raise FileNotFoundError(f'could not find firefox cookies database in {search_root}')
149 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
150
9bd13fe5 151 container_id = None
825d3ce3 152 if container not in (None, 'none'):
153 containers_path = os.path.join(os.path.dirname(cookie_database_path), 'containers.json')
9bd13fe5 154 if not os.path.isfile(containers_path) or not os.access(containers_path, os.R_OK):
155 raise FileNotFoundError(f'could not read containers.json in {search_root}')
d2c8aadf 156 with open(containers_path) as containers:
9bd13fe5 157 identities = json.load(containers).get('identities', [])
158 container_id = next((context.get('userContextId') for context in identities if container in (
159 context.get('name'),
160 try_call(lambda: re.fullmatch(r'userContext([^\.]+)\.label', context['l10nID']).group())
161 )), None)
162 if not isinstance(container_id, int):
163 raise ValueError(f'could not find firefox container "{container}" in containers.json')
164
0930b11f 165 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
982ee69a
MB
166 cursor = None
167 try:
168 cursor = _open_database_copy(cookie_database_path, tmpdir)
9bd13fe5 169 if isinstance(container_id, int):
9bd13fe5 170 logger.debug(
171 f'Only loading cookies from firefox container "{container}", ID {container_id}')
9bd13fe5 172 cursor.execute(
825d3ce3 173 'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE originAttributes LIKE ? OR originAttributes LIKE ?',
174 (f'%userContextId={container_id}', f'%userContextId={container_id}&%'))
175 elif container == 'none':
176 logger.debug('Only loading cookies not belonging to any container')
177 cursor.execute(
178 'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE NOT INSTR(originAttributes,"userContextId=")')
179 else:
9bd13fe5 180 cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
982ee69a 181 jar = YoutubeDLCookieJar()
97ec5bc5 182 with _create_progress_bar(logger) as progress_bar:
183 table = cursor.fetchall()
184 total_cookie_count = len(table)
185 for i, (host, name, value, path, expiry, is_secure) in enumerate(table):
186 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
ac668111 187 cookie = http.cookiejar.Cookie(
97ec5bc5 188 version=0, name=name, value=value, port=None, port_specified=False,
189 domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'),
190 path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False,
191 comment=None, comment_url=None, rest={})
192 jar.set_cookie(cookie)
86e5f3ed 193 logger.info(f'Extracted {len(jar)} cookies from firefox')
982ee69a
MB
194 return jar
195 finally:
196 if cursor is not None:
197 cursor.connection.close()
198
199
200def _firefox_browser_dir():
dec30912 201 if sys.platform in ('cygwin', 'win32'):
19a03940 202 return os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles')
982ee69a
MB
203 elif sys.platform == 'darwin':
204 return os.path.expanduser('~/Library/Application Support/Firefox')
dec30912 205 return os.path.expanduser('~/.mozilla/firefox')
982ee69a
MB
206
207
208def _get_chromium_based_browser_settings(browser_name):
209 # https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md
dec30912 210 if sys.platform in ('cygwin', 'win32'):
982ee69a
MB
211 appdata_local = os.path.expandvars('%LOCALAPPDATA%')
212 appdata_roaming = os.path.expandvars('%APPDATA%')
213 browser_dir = {
19a03940 214 'brave': os.path.join(appdata_local, R'BraveSoftware\Brave-Browser\User Data'),
215 'chrome': os.path.join(appdata_local, R'Google\Chrome\User Data'),
216 'chromium': os.path.join(appdata_local, R'Chromium\User Data'),
217 'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
218 'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
219 'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
982ee69a
MB
220 }[browser_name]
221
222 elif sys.platform == 'darwin':
223 appdata = os.path.expanduser('~/Library/Application Support')
224 browser_dir = {
225 'brave': os.path.join(appdata, 'BraveSoftware/Brave-Browser'),
226 'chrome': os.path.join(appdata, 'Google/Chrome'),
227 'chromium': os.path.join(appdata, 'Chromium'),
228 'edge': os.path.join(appdata, 'Microsoft Edge'),
229 'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
230 'vivaldi': os.path.join(appdata, 'Vivaldi'),
231 }[browser_name]
232
233 else:
dec30912
CMT
234 config = _config_home()
235 browser_dir = {
236 'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'),
237 'chrome': os.path.join(config, 'google-chrome'),
238 'chromium': os.path.join(config, 'chromium'),
239 'edge': os.path.join(config, 'microsoft-edge'),
240 'opera': os.path.join(config, 'opera'),
241 'vivaldi': os.path.join(config, 'vivaldi'),
242 }[browser_name]
982ee69a
MB
243
244 # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
245 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
246 keyring_name = {
247 'brave': 'Brave',
248 'chrome': 'Chrome',
249 'chromium': 'Chromium',
29b208f6 250 'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
982ee69a
MB
251 'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
252 'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
253 }[browser_name]
254
255 browsers_without_profiles = {'opera'}
256
257 return {
258 'browser_dir': browser_dir,
259 'keyring_name': keyring_name,
260 'supports_profiles': browser_name not in browsers_without_profiles
261 }
262
263
f59f5ef8 264def _extract_chrome_cookies(browser_name, profile, keyring, logger):
86e5f3ed 265 logger.info(f'Extracting cookies from {browser_name}')
767b02a9 266
9b8ee23b 267 if not sqlite3:
19a03940 268 logger.warning(f'Cannot extract cookies from {browser_name} without sqlite3 support. '
269 'Please use a python interpreter compiled with sqlite3 support')
767b02a9
MB
270 return YoutubeDLCookieJar()
271
982ee69a
MB
272 config = _get_chromium_based_browser_settings(browser_name)
273
274 if profile is None:
275 search_root = config['browser_dir']
276 elif _is_path(profile):
277 search_root = profile
278 config['browser_dir'] = os.path.dirname(profile) if config['supports_profiles'] else profile
279 else:
280 if config['supports_profiles']:
281 search_root = os.path.join(config['browser_dir'], profile)
282 else:
86e5f3ed 283 logger.error(f'{browser_name} does not support profiles')
982ee69a
MB
284 search_root = config['browser_dir']
285
97ec5bc5 286 cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies', logger)
982ee69a 287 if cookie_database_path is None:
86e5f3ed 288 raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"')
289 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
982ee69a 290
f59f5ef8 291 decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring)
982ee69a 292
0930b11f 293 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
982ee69a
MB
294 cursor = None
295 try:
296 cursor = _open_database_copy(cookie_database_path, tmpdir)
297 cursor.connection.text_factory = bytes
298 column_names = _get_column_names(cursor, 'cookies')
299 secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
19a03940 300 cursor.execute(f'SELECT host_key, name, value, encrypted_value, path, expires_utc, {secure_column} FROM cookies')
982ee69a
MB
301 jar = YoutubeDLCookieJar()
302 failed_cookies = 0
f59f5ef8 303 unencrypted_cookies = 0
97ec5bc5 304 with _create_progress_bar(logger) as progress_bar:
305 table = cursor.fetchall()
306 total_cookie_count = len(table)
307 for i, line in enumerate(table):
308 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
309 is_encrypted, cookie = _process_chrome_cookie(decryptor, *line)
310 if not cookie:
982ee69a
MB
311 failed_cookies += 1
312 continue
97ec5bc5 313 elif not is_encrypted:
314 unencrypted_cookies += 1
315 jar.set_cookie(cookie)
982ee69a 316 if failed_cookies > 0:
86e5f3ed 317 failed_message = f' ({failed_cookies} could not be decrypted)'
982ee69a
MB
318 else:
319 failed_message = ''
86e5f3ed 320 logger.info(f'Extracted {len(jar)} cookies from {browser_name}{failed_message}')
24146491 321 counts = decryptor._cookie_counts.copy()
f59f5ef8 322 counts['unencrypted'] = unencrypted_cookies
86e5f3ed 323 logger.debug(f'cookie version breakdown: {counts}')
982ee69a
MB
324 return jar
325 finally:
326 if cursor is not None:
327 cursor.connection.close()
328
329
97ec5bc5 330def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, path, expires_utc, is_secure):
0f06bcd7 331 host_key = host_key.decode()
332 name = name.decode()
333 value = value.decode()
334 path = path.decode()
97ec5bc5 335 is_encrypted = not value and encrypted_value
336
337 if is_encrypted:
338 value = decryptor.decrypt(encrypted_value)
339 if value is None:
340 return is_encrypted, None
341
ac668111 342 return is_encrypted, http.cookiejar.Cookie(
97ec5bc5 343 version=0, name=name, value=value, port=None, port_specified=False,
344 domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
345 path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False,
346 comment=None, comment_url=None, rest={})
347
348
982ee69a
MB
349class ChromeCookieDecryptor:
350 """
351 Overview:
352
353 Linux:
354 - cookies are either v10 or v11
355 - v10: AES-CBC encrypted with a fixed key
356 - v11: AES-CBC encrypted with an OS protected key (keyring)
357 - v11 keys can be stored in various places depending on the activate desktop environment [2]
358
359 Mac:
360 - cookies are either v10 or not v10
361 - v10: AES-CBC encrypted with an OS protected key (keyring) and more key derivation iterations than linux
362 - not v10: 'old data' stored as plaintext
363
364 Windows:
365 - cookies are either v10 or not v10
366 - v10: AES-GCM encrypted with a key which is encrypted with DPAPI
367 - not v10: encrypted with DPAPI
368
369 Sources:
370 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/
371 - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_linux.cc
372 - KeyStorageLinux::CreateService
373 """
374
0fa7d2c8 375 _cookie_counts = {}
982ee69a 376
0fa7d2c8 377 def decrypt(self, encrypted_value):
19a03940 378 raise NotImplementedError('Must be implemented by sub classes')
f59f5ef8 379
982ee69a 380
f59f5ef8 381def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None):
56ba69e4 382 if sys.platform == 'darwin':
982ee69a 383 return MacChromeCookieDecryptor(browser_keyring_name, logger)
56ba69e4 384 elif sys.platform in ('win32', 'cygwin'):
982ee69a 385 return WindowsChromeCookieDecryptor(browser_root, logger)
56ba69e4 386 return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring)
982ee69a
MB
387
388
389class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
f59f5ef8 390 def __init__(self, browser_keyring_name, logger, *, keyring=None):
982ee69a
MB
391 self._logger = logger
392 self._v10_key = self.derive_key(b'peanuts')
f59f5ef8 393 self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
9b7a48ab
SS
394 self._browser_keyring_name = browser_keyring_name
395 self._keyring = keyring
396
397 @functools.cached_property
398 def _v11_key(self):
399 password = _get_linux_keyring_password(self._browser_keyring_name, self._keyring, self._logger)
400 return None if password is None else self.derive_key(password)
982ee69a
MB
401
402 @staticmethod
403 def derive_key(password):
404 # values from
405 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_linux.cc
406 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16)
407
408 def decrypt(self, encrypted_value):
409 version = encrypted_value[:3]
410 ciphertext = encrypted_value[3:]
411
412 if version == b'v10':
f59f5ef8 413 self._cookie_counts['v10'] += 1
982ee69a
MB
414 return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
415
416 elif version == b'v11':
f59f5ef8 417 self._cookie_counts['v11'] += 1
982ee69a 418 if self._v11_key is None:
f59f5ef8 419 self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
982ee69a
MB
420 return None
421 return _decrypt_aes_cbc(ciphertext, self._v11_key, self._logger)
422
423 else:
f59f5ef8 424 self._cookie_counts['other'] += 1
982ee69a
MB
425 return None
426
427
428class MacChromeCookieDecryptor(ChromeCookieDecryptor):
429 def __init__(self, browser_keyring_name, logger):
430 self._logger = logger
f440b14f 431 password = _get_mac_keyring_password(browser_keyring_name, logger)
982ee69a 432 self._v10_key = None if password is None else self.derive_key(password)
f59f5ef8 433 self._cookie_counts = {'v10': 0, 'other': 0}
982ee69a
MB
434
435 @staticmethod
436 def derive_key(password):
437 # values from
438 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
439 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16)
440
441 def decrypt(self, encrypted_value):
442 version = encrypted_value[:3]
443 ciphertext = encrypted_value[3:]
444
445 if version == b'v10':
f59f5ef8 446 self._cookie_counts['v10'] += 1
982ee69a
MB
447 if self._v10_key is None:
448 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
449 return None
450
451 return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
452
453 else:
f59f5ef8 454 self._cookie_counts['other'] += 1
982ee69a
MB
455 # other prefixes are considered 'old data' which were stored as plaintext
456 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
457 return encrypted_value
458
459
460class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
461 def __init__(self, browser_root, logger):
462 self._logger = logger
463 self._v10_key = _get_windows_v10_key(browser_root, logger)
f59f5ef8
MB
464 self._cookie_counts = {'v10': 0, 'other': 0}
465
982ee69a
MB
466 def decrypt(self, encrypted_value):
467 version = encrypted_value[:3]
468 ciphertext = encrypted_value[3:]
469
470 if version == b'v10':
f59f5ef8 471 self._cookie_counts['v10'] += 1
982ee69a
MB
472 if self._v10_key is None:
473 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
474 return None
982ee69a
MB
475
476 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
477 # kNonceLength
478 nonce_length = 96 // 8
479 # boringssl
480 # EVP_AEAD_AES_GCM_TAG_LEN
481 authentication_tag_length = 16
482
483 raw_ciphertext = ciphertext
484 nonce = raw_ciphertext[:nonce_length]
485 ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length]
486 authentication_tag = raw_ciphertext[-authentication_tag_length:]
487
488 return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger)
489
490 else:
f59f5ef8 491 self._cookie_counts['other'] += 1
982ee69a
MB
492 # any other prefix means the data is DPAPI encrypted
493 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
0f06bcd7 494 return _decrypt_windows_dpapi(encrypted_value, self._logger).decode()
982ee69a
MB
495
496
497def _extract_safari_cookies(profile, logger):
982ee69a 498 if sys.platform != 'darwin':
86e5f3ed 499 raise ValueError(f'unsupported platform: {sys.platform}')
982ee69a 500
a58182b7
NV
501 if profile:
502 cookies_path = os.path.expanduser(profile)
503 if not os.path.isfile(cookies_path):
504 raise FileNotFoundError('custom safari cookies database not found')
505
506 else:
507 cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')
982ee69a 508
1f7db853 509 if not os.path.isfile(cookies_path):
a58182b7
NV
510 logger.debug('Trying secondary cookie location')
511 cookies_path = os.path.expanduser('~/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies')
512 if not os.path.isfile(cookies_path):
513 raise FileNotFoundError('could not find safari cookies database')
982ee69a
MB
514
515 with open(cookies_path, 'rb') as f:
516 cookies_data = f.read()
517
518 jar = parse_safari_cookies(cookies_data, logger=logger)
86e5f3ed 519 logger.info(f'Extracted {len(jar)} cookies from safari')
982ee69a
MB
520 return jar
521
522
523class ParserError(Exception):
524 pass
525
526
527class DataParser:
528 def __init__(self, data, logger):
529 self._data = data
530 self.cursor = 0
531 self._logger = logger
532
533 def read_bytes(self, num_bytes):
534 if num_bytes < 0:
86e5f3ed 535 raise ParserError(f'invalid read of {num_bytes} bytes')
982ee69a
MB
536 end = self.cursor + num_bytes
537 if end > len(self._data):
538 raise ParserError('reached end of input')
539 data = self._data[self.cursor:end]
540 self.cursor = end
541 return data
542
543 def expect_bytes(self, expected_value, message):
544 value = self.read_bytes(len(expected_value))
545 if value != expected_value:
86e5f3ed 546 raise ParserError(f'unexpected value: {value} != {expected_value} ({message})')
982ee69a
MB
547
548 def read_uint(self, big_endian=False):
549 data_format = '>I' if big_endian else '<I'
550 return struct.unpack(data_format, self.read_bytes(4))[0]
551
552 def read_double(self, big_endian=False):
553 data_format = '>d' if big_endian else '<d'
554 return struct.unpack(data_format, self.read_bytes(8))[0]
555
556 def read_cstring(self):
557 buffer = []
558 while True:
559 c = self.read_bytes(1)
560 if c == b'\x00':
0f06bcd7 561 return b''.join(buffer).decode()
982ee69a
MB
562 else:
563 buffer.append(c)
564
565 def skip(self, num_bytes, description='unknown'):
566 if num_bytes > 0:
19a03940 567 self._logger.debug(f'skipping {num_bytes} bytes ({description}): {self.read_bytes(num_bytes)!r}')
982ee69a 568 elif num_bytes < 0:
86e5f3ed 569 raise ParserError(f'invalid skip of {num_bytes} bytes')
982ee69a
MB
570
571 def skip_to(self, offset, description='unknown'):
572 self.skip(offset - self.cursor, description)
573
574 def skip_to_end(self, description='unknown'):
575 self.skip_to(len(self._data), description)
576
577
578def _mac_absolute_time_to_posix(timestamp):
579 return int((datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc) + timedelta(seconds=timestamp)).timestamp())
580
581
582def _parse_safari_cookies_header(data, logger):
583 p = DataParser(data, logger)
584 p.expect_bytes(b'cook', 'database signature')
585 number_of_pages = p.read_uint(big_endian=True)
586 page_sizes = [p.read_uint(big_endian=True) for _ in range(number_of_pages)]
587 return page_sizes, p.cursor
588
589
590def _parse_safari_cookies_page(data, jar, logger):
591 p = DataParser(data, logger)
592 p.expect_bytes(b'\x00\x00\x01\x00', 'page signature')
593 number_of_cookies = p.read_uint()
594 record_offsets = [p.read_uint() for _ in range(number_of_cookies)]
595 if number_of_cookies == 0:
86e5f3ed 596 logger.debug(f'a cookies page of size {len(data)} has no cookies')
982ee69a
MB
597 return
598
599 p.skip_to(record_offsets[0], 'unknown page header field')
600
97ec5bc5 601 with _create_progress_bar(logger) as progress_bar:
602 for i, record_offset in enumerate(record_offsets):
603 progress_bar.print(f'Loading cookie {i: 6d}/{number_of_cookies: 6d}')
604 p.skip_to(record_offset, 'space between records')
605 record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger)
606 p.read_bytes(record_length)
982ee69a
MB
607 p.skip_to_end('space in between pages')
608
609
610def _parse_safari_cookies_record(data, jar, logger):
611 p = DataParser(data, logger)
612 record_size = p.read_uint()
613 p.skip(4, 'unknown record field 1')
614 flags = p.read_uint()
615 is_secure = bool(flags & 0x0001)
616 p.skip(4, 'unknown record field 2')
617 domain_offset = p.read_uint()
618 name_offset = p.read_uint()
619 path_offset = p.read_uint()
620 value_offset = p.read_uint()
621 p.skip(8, 'unknown record field 3')
622 expiration_date = _mac_absolute_time_to_posix(p.read_double())
623 _creation_date = _mac_absolute_time_to_posix(p.read_double()) # noqa: F841
624
625 try:
626 p.skip_to(domain_offset)
627 domain = p.read_cstring()
628
629 p.skip_to(name_offset)
630 name = p.read_cstring()
631
632 p.skip_to(path_offset)
633 path = p.read_cstring()
634
635 p.skip_to(value_offset)
636 value = p.read_cstring()
637 except UnicodeDecodeError:
f440b14f 638 logger.warning('failed to parse Safari cookie because UTF-8 decoding failed', only_once=True)
982ee69a
MB
639 return record_size
640
641 p.skip_to(record_size, 'space at the end of the record')
642
ac668111 643 cookie = http.cookiejar.Cookie(
982ee69a
MB
644 version=0, name=name, value=value, port=None, port_specified=False,
645 domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'),
646 path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False,
647 comment=None, comment_url=None, rest={})
648 jar.set_cookie(cookie)
649 return record_size
650
651
652def parse_safari_cookies(data, jar=None, logger=YDLLogger()):
653 """
654 References:
655 - https://github.com/libyal/dtformats/blob/main/documentation/Safari%20Cookies.asciidoc
656 - this data appears to be out of date but the important parts of the database structure is the same
657 - there are a few bytes here and there which are skipped during parsing
658 """
659 if jar is None:
660 jar = YoutubeDLCookieJar()
661 page_sizes, body_start = _parse_safari_cookies_header(data, logger)
662 p = DataParser(data[body_start:], logger)
663 for page_size in page_sizes:
664 _parse_safari_cookies_page(p.read_bytes(page_size), jar, logger)
665 p.skip_to_end('footer')
666 return jar
667
668
f59f5ef8
MB
669class _LinuxDesktopEnvironment(Enum):
670 """
671 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.h
672 DesktopEnvironment
673 """
674 OTHER = auto()
675 CINNAMON = auto()
676 GNOME = auto()
677 KDE = auto()
678 PANTHEON = auto()
679 UNITY = auto()
680 XFCE = auto()
982ee69a
MB
681
682
f59f5ef8
MB
683class _LinuxKeyring(Enum):
684 """
685 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.h
686 SelectedLinuxBackend
687 """
688 KWALLET = auto()
689 GNOMEKEYRING = auto()
690 BASICTEXT = auto()
691
692
693SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys()
694
695
696def _get_linux_desktop_environment(env):
697 """
698 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc
699 GetDesktopEnvironment
700 """
701 xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
702 desktop_session = env.get('DESKTOP_SESSION', None)
703 if xdg_current_desktop is not None:
704 xdg_current_desktop = xdg_current_desktop.split(':')[0].strip()
705
706 if xdg_current_desktop == 'Unity':
707 if desktop_session is not None and 'gnome-fallback' in desktop_session:
708 return _LinuxDesktopEnvironment.GNOME
709 else:
710 return _LinuxDesktopEnvironment.UNITY
711 elif xdg_current_desktop == 'GNOME':
712 return _LinuxDesktopEnvironment.GNOME
713 elif xdg_current_desktop == 'X-Cinnamon':
714 return _LinuxDesktopEnvironment.CINNAMON
715 elif xdg_current_desktop == 'KDE':
716 return _LinuxDesktopEnvironment.KDE
717 elif xdg_current_desktop == 'Pantheon':
718 return _LinuxDesktopEnvironment.PANTHEON
719 elif xdg_current_desktop == 'XFCE':
720 return _LinuxDesktopEnvironment.XFCE
721 elif desktop_session is not None:
722 if desktop_session in ('mate', 'gnome'):
723 return _LinuxDesktopEnvironment.GNOME
724 elif 'kde' in desktop_session:
725 return _LinuxDesktopEnvironment.KDE
726 elif 'xfce' in desktop_session:
727 return _LinuxDesktopEnvironment.XFCE
728 else:
729 if 'GNOME_DESKTOP_SESSION_ID' in env:
730 return _LinuxDesktopEnvironment.GNOME
731 elif 'KDE_FULL_SESSION' in env:
732 return _LinuxDesktopEnvironment.KDE
fa8fd951 733 return _LinuxDesktopEnvironment.OTHER
f59f5ef8
MB
734
735
736def _choose_linux_keyring(logger):
737 """
738 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.cc
739 SelectBackend
740 """
741 desktop_environment = _get_linux_desktop_environment(os.environ)
86e5f3ed 742 logger.debug(f'detected desktop environment: {desktop_environment.name}')
f59f5ef8
MB
743 if desktop_environment == _LinuxDesktopEnvironment.KDE:
744 linux_keyring = _LinuxKeyring.KWALLET
745 elif desktop_environment == _LinuxDesktopEnvironment.OTHER:
746 linux_keyring = _LinuxKeyring.BASICTEXT
982ee69a 747 else:
f59f5ef8
MB
748 linux_keyring = _LinuxKeyring.GNOMEKEYRING
749 return linux_keyring
750
751
752def _get_kwallet_network_wallet(logger):
753 """ The name of the wallet used to store network passwords.
754
755 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/kwallet_dbus.cc
756 KWalletDBus::NetworkWallet
757 which does a dbus call to the following function:
758 https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html
759 Wallet::NetworkWallet
760 """
761 default_wallet = 'kdewallet'
762 try:
f0c9fb96 763 stdout, _, returncode = Popen.run([
f59f5ef8
MB
764 'dbus-send', '--session', '--print-reply=literal',
765 '--dest=org.kde.kwalletd5',
766 '/modules/kwalletd5',
767 'org.kde.KWallet.networkWallet'
f0c9fb96 768 ], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
f59f5ef8 769
f0c9fb96 770 if returncode:
f59f5ef8
MB
771 logger.warning('failed to read NetworkWallet')
772 return default_wallet
773 else:
f0c9fb96 774 logger.debug(f'NetworkWallet = "{stdout.strip()}"')
775 return stdout.strip()
a44ca5a4 776 except Exception as e:
86e5f3ed 777 logger.warning(f'exception while obtaining NetworkWallet: {e}')
f59f5ef8
MB
778 return default_wallet
779
780
781def _get_kwallet_password(browser_keyring_name, logger):
782 logger.debug('using kwallet-query to obtain password from kwallet')
783
784 if shutil.which('kwallet-query') is None:
785 logger.error('kwallet-query command not found. KWallet and kwallet-query '
786 'must be installed to read from KWallet. kwallet-query should be'
787 'included in the kwallet package for your distribution')
788 return b''
789
790 network_wallet = _get_kwallet_network_wallet(logger)
791
792 try:
f0c9fb96 793 stdout, _, returncode = Popen.run([
f59f5ef8 794 'kwallet-query',
86e5f3ed 795 '--read-password', f'{browser_keyring_name} Safe Storage',
796 '--folder', f'{browser_keyring_name} Keys',
f59f5ef8
MB
797 network_wallet
798 ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
799
f0c9fb96 800 if returncode:
801 logger.error(f'kwallet-query failed with return code {returncode}. '
802 'Please consult the kwallet-query man page for details')
f59f5ef8
MB
803 return b''
804 else:
805 if stdout.lower().startswith(b'failed to read'):
806 logger.debug('failed to read password from kwallet. Using empty string instead')
807 # this sometimes occurs in KDE because chrome does not check hasEntry and instead
808 # just tries to read the value (which kwallet returns "") whereas kwallet-query
809 # checks hasEntry. To verify this:
810 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
811 # while starting chrome.
812 # this may be a bug as the intended behaviour is to generate a random password and store
813 # it, but that doesn't matter here.
814 return b''
815 else:
816 logger.debug('password found')
f0c9fb96 817 return stdout.rstrip(b'\n')
a44ca5a4 818 except Exception as e:
819 logger.warning(f'exception running kwallet-query: {error_to_str(e)}')
f59f5ef8
MB
820 return b''
821
822
823def _get_gnome_keyring_password(browser_keyring_name, logger):
9b8ee23b 824 if not secretstorage:
825 logger.error(f'secretstorage not available {_SECRETSTORAGE_UNAVAILABLE_REASON}')
f59f5ef8
MB
826 return b''
827 # the Gnome keyring does not seem to organise keys in the same way as KWallet,
828 # using `dbus-monitor` during startup, it can be observed that chromium lists all keys
829 # and presumably searches for its key in the list. It appears that we must do the same.
830 # https://github.com/jaraco/keyring/issues/556
831 with contextlib.closing(secretstorage.dbus_init()) as con:
832 col = secretstorage.get_default_collection(con)
833 for item in col.get_all_items():
86e5f3ed 834 if item.get_label() == f'{browser_keyring_name} Safe Storage':
f59f5ef8
MB
835 return item.get_secret()
836 else:
837 logger.error('failed to read from keyring')
838 return b''
839
840
841def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
842 # note: chrome/chromium can be run with the following flags to determine which keyring backend
843 # it has chosen to use
844 # chromium --enable-logging=stderr --v=1 2>&1 | grep key_storage_
845 # Chromium supports a flag: --password-store=<basic|gnome|kwallet> so the automatic detection
846 # will not be sufficient in all cases.
847
2c539d49 848 keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger)
f59f5ef8
MB
849 logger.debug(f'Chosen keyring: {keyring.name}')
850
851 if keyring == _LinuxKeyring.KWALLET:
852 return _get_kwallet_password(browser_keyring_name, logger)
853 elif keyring == _LinuxKeyring.GNOMEKEYRING:
854 return _get_gnome_keyring_password(browser_keyring_name, logger)
855 elif keyring == _LinuxKeyring.BASICTEXT:
856 # when basic text is chosen, all cookies are stored as v10 (so no keyring password is required)
857 return None
858 assert False, f'Unknown keyring {keyring}'
859
860
861def _get_mac_keyring_password(browser_keyring_name, logger):
862 logger.debug('using find-generic-password to obtain password from OSX keychain')
863 try:
46a5b335 864 stdout, _, returncode = Popen.run(
d3c93ec2 865 ['security', 'find-generic-password',
866 '-w', # write password to stdout
867 '-a', browser_keyring_name, # match 'account'
86e5f3ed 868 '-s', f'{browser_keyring_name} Safe Storage'], # match 'service'
d3c93ec2 869 stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
46a5b335 870 if returncode:
871 logger.warning('find-generic-password failed')
872 return None
f0c9fb96 873 return stdout.rstrip(b'\n')
a44ca5a4 874 except Exception as e:
875 logger.warning(f'exception running find-generic-password: {error_to_str(e)}')
f59f5ef8 876 return None
982ee69a
MB
877
878
879def _get_windows_v10_key(browser_root, logger):
97ec5bc5 880 path = _find_most_recently_used_file(browser_root, 'Local State', logger)
982ee69a
MB
881 if path is None:
882 logger.error('could not find local state file')
883 return None
97ec5bc5 884 logger.debug(f'Found local state file at "{path}"')
86e5f3ed 885 with open(path, encoding='utf8') as f:
982ee69a
MB
886 data = json.load(f)
887 try:
888 base64_key = data['os_crypt']['encrypted_key']
889 except KeyError:
890 logger.error('no encrypted key in Local State')
891 return None
14f25df2 892 encrypted_key = base64.b64decode(base64_key)
982ee69a
MB
893 prefix = b'DPAPI'
894 if not encrypted_key.startswith(prefix):
895 logger.error('invalid key')
896 return None
897 return _decrypt_windows_dpapi(encrypted_key[len(prefix):], logger)
898
899
900def pbkdf2_sha1(password, salt, iterations, key_length):
901 return pbkdf2_hmac('sha1', password, salt, iterations, key_length)
902
903
904def _decrypt_aes_cbc(ciphertext, key, logger, initialization_vector=b' ' * 16):
1d3586d0 905 plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
982ee69a 906 try:
0f06bcd7 907 return plaintext.decode()
982ee69a 908 except UnicodeDecodeError:
f440b14f 909 logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
910 return None
911
912
913def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
982ee69a 914 try:
09906f55 915 plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce)
982ee69a 916 except ValueError:
f440b14f 917 logger.warning('failed to decrypt cookie (AES-GCM) because the MAC check failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
918 return None
919
920 try:
0f06bcd7 921 return plaintext.decode()
982ee69a 922 except UnicodeDecodeError:
f440b14f 923 logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
924 return None
925
926
927def _decrypt_windows_dpapi(ciphertext, logger):
928 """
929 References:
930 - https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata
931 """
fe0918bb 932
933 import ctypes
934 import ctypes.wintypes
982ee69a
MB
935
936 class DATA_BLOB(ctypes.Structure):
fe0918bb 937 _fields_ = [('cbData', ctypes.wintypes.DWORD),
982ee69a
MB
938 ('pbData', ctypes.POINTER(ctypes.c_char))]
939
940 buffer = ctypes.create_string_buffer(ciphertext)
941 blob_in = DATA_BLOB(ctypes.sizeof(buffer), buffer)
942 blob_out = DATA_BLOB()
943 ret = ctypes.windll.crypt32.CryptUnprotectData(
944 ctypes.byref(blob_in), # pDataIn
945 None, # ppszDataDescr: human readable description of pDataIn
946 None, # pOptionalEntropy: salt?
947 None, # pvReserved: must be NULL
948 None, # pPromptStruct: information about prompts to display
949 0, # dwFlags
950 ctypes.byref(blob_out) # pDataOut
951 )
952 if not ret:
f9be9cb9 953 logger.warning('failed to decrypt with DPAPI', only_once=True)
982ee69a
MB
954 return None
955
956 result = ctypes.string_at(blob_out.pbData, blob_out.cbData)
957 ctypes.windll.kernel32.LocalFree(blob_out.pbData)
958 return result
959
960
961def _config_home():
962 return os.environ.get('XDG_CONFIG_HOME', os.path.expanduser('~/.config'))
963
964
965def _open_database_copy(database_path, tmpdir):
966 # cannot open sqlite databases if they are already in use (e.g. by the browser)
967 database_copy_path = os.path.join(tmpdir, 'temporary.sqlite')
968 shutil.copy(database_path, database_copy_path)
969 conn = sqlite3.connect(database_copy_path)
970 return conn.cursor()
971
972
973def _get_column_names(cursor, table_name):
86e5f3ed 974 table_info = cursor.execute(f'PRAGMA table_info({table_name})').fetchall()
0f06bcd7 975 return [row[1].decode() for row in table_info]
982ee69a
MB
976
977
97ec5bc5 978def _find_most_recently_used_file(root, filename, logger):
982ee69a 979 # if there are multiple browser profiles, take the most recently used one
97ec5bc5 980 i, paths = 0, []
981 with _create_progress_bar(logger) as progress_bar:
982 for curr_root, dirs, files in os.walk(root):
983 for file in files:
984 i += 1
985 progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched')
986 if file == filename:
987 paths.append(os.path.join(curr_root, file))
982ee69a
MB
988 return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime)
989
990
991def _merge_cookie_jars(jars):
992 output_jar = YoutubeDLCookieJar()
993 for jar in jars:
994 for cookie in jar:
995 output_jar.set_cookie(cookie)
996 if jar.filename is not None:
997 output_jar.filename = jar.filename
998 return output_jar
999
1000
1001def _is_path(value):
1002 return os.path.sep in value
1003
1004
9bd13fe5 1005def _parse_browser_specification(browser_name, profile=None, keyring=None, container=None):
982ee69a
MB
1006 if browser_name not in SUPPORTED_BROWSERS:
1007 raise ValueError(f'unsupported browser: "{browser_name}"')
f59f5ef8
MB
1008 if keyring not in (None, *SUPPORTED_KEYRINGS):
1009 raise ValueError(f'unsupported keyring: "{keyring}"')
2b24afa6 1010 if profile is not None and _is_path(expand_path(profile)):
1011 profile = expand_path(profile)
9bd13fe5 1012 return browser_name, profile, keyring, container
8817a80d
SS
1013
1014
1015class LenientSimpleCookie(http.cookies.SimpleCookie):
1016 """More lenient version of http.cookies.SimpleCookie"""
1017 # From https://github.com/python/cpython/blob/v3.10.7/Lib/http/cookies.py
36069409
SS
1018 # We use Morsel's legal key chars to avoid errors on setting values
1019 _LEGAL_KEY_CHARS = r'\w\d' + re.escape('!#$%&\'*+-.:^_`|~')
1020 _LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + re.escape('(),/<=>?@[]{}')
8817a80d
SS
1021
1022 _RESERVED = {
1023 "expires",
1024 "path",
1025 "comment",
1026 "domain",
1027 "max-age",
1028 "secure",
1029 "httponly",
1030 "version",
1031 "samesite",
1032 }
1033
1034 _FLAGS = {"secure", "httponly"}
1035
1036 # Added 'bad' group to catch the remaining value
1037 _COOKIE_PATTERN = re.compile(r"""
1038 \s* # Optional whitespace at start of cookie
1039 (?P<key> # Start of group 'key'
1040 [""" + _LEGAL_KEY_CHARS + r"""]+?# Any word of at least one letter
1041 ) # End of group 'key'
1042 ( # Optional group: there may not be a value.
1043 \s*=\s* # Equal Sign
1044 ( # Start of potential value
1045 (?P<val> # Start of group 'val'
1046 "(?:[^\\"]|\\.)*" # Any doublequoted string
1047 | # or
1048 \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr
1049 | # or
1050 [""" + _LEGAL_VALUE_CHARS + r"""]* # Any word or empty string
1051 ) # End of group 'val'
1052 | # or
1053 (?P<bad>(?:\\;|[^;])*?) # 'bad' group fallback for invalid values
1054 ) # End of potential value
1055 )? # End of optional value group
1056 \s* # Any number of spaces.
1057 (\s+|;|$) # Ending either at space, semicolon, or EOS.
1058 """, re.ASCII | re.VERBOSE)
1059
1060 def load(self, data):
1061 # Workaround for https://github.com/yt-dlp/yt-dlp/issues/4776
1062 if not isinstance(data, str):
1063 return super().load(data)
1064
1065 morsel = None
36069409
SS
1066 for match in self._COOKIE_PATTERN.finditer(data):
1067 if match.group('bad'):
8817a80d
SS
1068 morsel = None
1069 continue
1070
36069409 1071 key, value = match.group('key', 'val')
8817a80d 1072
36069409
SS
1073 is_attribute = False
1074 if key.startswith('$'):
1075 key = key[1:]
1076 is_attribute = True
8817a80d
SS
1077
1078 lower_key = key.lower()
1079 if lower_key in self._RESERVED:
1080 if morsel is None:
1081 continue
1082
1083 if value is None:
1084 if lower_key not in self._FLAGS:
1085 morsel = None
1086 continue
1087 value = True
1088 else:
1089 value, _ = self.value_decode(value)
1090
1091 morsel[key] = value
1092
36069409
SS
1093 elif is_attribute:
1094 morsel = None
1095
8817a80d
SS
1096 elif value is not None:
1097 morsel = self.get(key, http.cookies.Morsel())
1098 real_value, coded_value = self.value_decode(value)
1099 morsel.set(key, real_value, coded_value)
1100 self[key] = morsel
1101
1102 else:
1103 morsel = None
b87e01c1 1104
1105
1106class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
1107 """
1108 See [1] for cookie file format.
1109
1110 1. https://curl.haxx.se/docs/http-cookies.html
1111 """
1112 _HTTPONLY_PREFIX = '#HttpOnly_'
1113 _ENTRY_LEN = 7
1114 _HEADER = '''# Netscape HTTP Cookie File
1115# This file is generated by yt-dlp. Do not edit.
1116
1117'''
1118 _CookieFileEntry = collections.namedtuple(
1119 'CookieFileEntry',
1120 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
1121
1122 def __init__(self, filename=None, *args, **kwargs):
1123 super().__init__(None, *args, **kwargs)
1124 if is_path_like(filename):
1125 filename = os.fspath(filename)
1126 self.filename = filename
1127
1128 @staticmethod
1129 def _true_or_false(cndn):
1130 return 'TRUE' if cndn else 'FALSE'
1131
1132 @contextlib.contextmanager
1133 def open(self, file, *, write=False):
1134 if is_path_like(file):
1135 with open(file, 'w' if write else 'r', encoding='utf-8') as f:
1136 yield f
1137 else:
1138 if write:
1139 file.truncate(0)
1140 yield file
1141
1142 def _really_save(self, f, ignore_discard=False, ignore_expires=False):
1143 now = time.time()
1144 for cookie in self:
1145 if (not ignore_discard and cookie.discard
1146 or not ignore_expires and cookie.is_expired(now)):
1147 continue
1148 name, value = cookie.name, cookie.value
1149 if value is None:
1150 # cookies.txt regards 'Set-Cookie: foo' as a cookie
1151 # with no name, whereas http.cookiejar regards it as a
1152 # cookie with no value.
1153 name, value = '', name
1154 f.write('%s\n' % '\t'.join((
1155 cookie.domain,
1156 self._true_or_false(cookie.domain.startswith('.')),
1157 cookie.path,
1158 self._true_or_false(cookie.secure),
1159 str_or_none(cookie.expires, default=''),
1160 name, value
1161 )))
1162
1163 def save(self, filename=None, *args, **kwargs):
1164 """
1165 Save cookies to a file.
1166 Code is taken from CPython 3.6
1167 https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """
1168
1169 if filename is None:
1170 if self.filename is not None:
1171 filename = self.filename
1172 else:
1173 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
1174
1175 # Store session cookies with `expires` set to 0 instead of an empty string
1176 for cookie in self:
1177 if cookie.expires is None:
1178 cookie.expires = 0
1179
1180 with self.open(filename, write=True) as f:
1181 f.write(self._HEADER)
1182 self._really_save(f, *args, **kwargs)
1183
1184 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
1185 """Load cookies from a file."""
1186 if filename is None:
1187 if self.filename is not None:
1188 filename = self.filename
1189 else:
1190 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
1191
1192 def prepare_line(line):
1193 if line.startswith(self._HTTPONLY_PREFIX):
1194 line = line[len(self._HTTPONLY_PREFIX):]
1195 # comments and empty lines are fine
1196 if line.startswith('#') or not line.strip():
1197 return line
1198 cookie_list = line.split('\t')
1199 if len(cookie_list) != self._ENTRY_LEN:
1200 raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list))
1201 cookie = self._CookieFileEntry(*cookie_list)
1202 if cookie.expires_at and not cookie.expires_at.isdigit():
1203 raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
1204 return line
1205
1206 cf = io.StringIO()
1207 with self.open(filename) as f:
1208 for line in f:
1209 try:
1210 cf.write(prepare_line(line))
1211 except http.cookiejar.LoadError as e:
1212 if f'{line.strip()} '[0] in '[{"':
1213 raise http.cookiejar.LoadError(
1214 'Cookies file must be Netscape formatted, not JSON. See '
1215 'https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp')
1216 write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
1217 continue
1218 cf.seek(0)
1219 self._really_load(cf, filename, ignore_discard, ignore_expires)
1220 # Session cookies are denoted by either `expires` field set to
1221 # an empty string or 0. MozillaCookieJar only recognizes the former
1222 # (see [1]). So we need force the latter to be recognized as session
1223 # cookies on our own.
1224 # Session cookies may be important for cookies-based authentication,
1225 # e.g. usually, when user does not check 'Remember me' check box while
1226 # logging in on a site, some important cookies are stored as session
1227 # cookies so that not recognizing them will result in failed login.
1228 # 1. https://bugs.python.org/issue17164
1229 for cookie in self:
1230 # Treat `expires=0` cookies as session cookies
1231 if cookie.expires == 0:
1232 cookie.expires = None
1233 cookie.discard = True
1234
1235 def get_cookie_header(self, url):
1236 """Generate a Cookie HTTP header for a given url"""
1237 cookie_req = urllib.request.Request(escape_url(sanitize_url(url)))
1238 self.add_cookie_header(cookie_req)
1239 return cookie_req.get_header('Cookie')