]> jfr.im git - yt-dlp.git/blame - yt_dlp/cookies.py
[cookies] Move `YoutubeDLCookieJar` to cookies module (#7091)
[yt-dlp.git] / yt_dlp / cookies.py
CommitLineData
14f25df2 1import base64
b87e01c1 2import collections
f59f5ef8 3import contextlib
54007a45 4import http.cookiejar
8817a80d 5import http.cookies
b87e01c1 6import io
982ee69a
MB
7import json
8import os
9bd13fe5 9import re
982ee69a 10import shutil
982ee69a
MB
11import struct
12import subprocess
13import sys
14import tempfile
2e4585da 15import time
b87e01c1 16import urllib.request
982ee69a 17from datetime import datetime, timedelta, timezone
f59f5ef8 18from enum import Enum, auto
982ee69a
MB
19from hashlib import pbkdf2_hmac
20
1d3586d0 21from .aes import (
22 aes_cbc_decrypt_bytes,
23 aes_gcm_decrypt_and_verify_bytes,
24 unpad_pkcs7,
25)
9b7a48ab 26from .compat import functools
9b8ee23b 27from .dependencies import (
28 _SECRETSTORAGE_UNAVAILABLE_REASON,
29 secretstorage,
30 sqlite3,
31)
97ec5bc5 32from .minicurses import MultilinePrinter, QuietMultilinePrinter
d2c8aadf 33from .utils import (
34 Popen,
d2c8aadf 35 error_to_str,
b87e01c1 36 escape_url,
d2c8aadf 37 expand_path,
22df97f9 38 is_path_like,
b87e01c1 39 sanitize_url,
40 str_or_none,
d2c8aadf 41 try_call,
b87e01c1 42 write_string,
d2c8aadf 43)
982ee69a 44
982ee69a
MB
45CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
46SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
47
48
49class YDLLogger:
50 def __init__(self, ydl=None):
51 self._ydl = ydl
52
53 def debug(self, message):
54 if self._ydl:
55 self._ydl.write_debug(message)
56
57 def info(self, message):
58 if self._ydl:
59 self._ydl.to_screen(f'[Cookies] {message}')
60
61 def warning(self, message, only_once=False):
62 if self._ydl:
63 self._ydl.report_warning(message, only_once)
64
65 def error(self, message):
66 if self._ydl:
67 self._ydl.report_error(message)
68
2e4585da 69 class ProgressBar(MultilinePrinter):
70 _DELAY, _timer = 0.1, 0
71
72 def print(self, message):
73 if time.time() - self._timer > self._DELAY:
74 self.print_at_line(f'[Cookies] {message}', 0)
75 self._timer = time.time()
76
97ec5bc5 77 def progress_bar(self):
78 """Return a context manager with a print method. (Optional)"""
79 # Do not print to files/pipes, loggers, or when --no-progress is used
80 if not self._ydl or self._ydl.params.get('noprogress') or self._ydl.params.get('logger'):
81 return
591bb9d3 82 file = self._ydl._out_files.error
97ec5bc5 83 try:
84 if not file.isatty():
85 return
86 except BaseException:
87 return
2e4585da 88 return self.ProgressBar(file, preserve_output=False)
97ec5bc5 89
90
91def _create_progress_bar(logger):
92 if hasattr(logger, 'progress_bar'):
93 printer = logger.progress_bar()
94 if printer:
95 return printer
96 printer = QuietMultilinePrinter()
97 printer.print = lambda _: None
98 return printer
99
982ee69a
MB
100
101def load_cookies(cookie_file, browser_specification, ydl):
102 cookie_jars = []
103 if browser_specification is not None:
9bd13fe5 104 browser_name, profile, keyring, container = _parse_browser_specification(*browser_specification)
105 cookie_jars.append(
106 extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring, container=container))
982ee69a
MB
107
108 if cookie_file is not None:
22df97f9 109 is_filename = is_path_like(cookie_file)
d76fa1f3 110 if is_filename:
111 cookie_file = expand_path(cookie_file)
112
982ee69a 113 jar = YoutubeDLCookieJar(cookie_file)
d76fa1f3 114 if not is_filename or os.access(cookie_file, os.R_OK):
982ee69a
MB
115 jar.load(ignore_discard=True, ignore_expires=True)
116 cookie_jars.append(jar)
117
118 return _merge_cookie_jars(cookie_jars)
119
120
9bd13fe5 121def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None, container=None):
982ee69a 122 if browser_name == 'firefox':
9bd13fe5 123 return _extract_firefox_cookies(profile, container, logger)
982ee69a
MB
124 elif browser_name == 'safari':
125 return _extract_safari_cookies(profile, logger)
126 elif browser_name in CHROMIUM_BASED_BROWSERS:
f59f5ef8 127 return _extract_chrome_cookies(browser_name, profile, keyring, logger)
982ee69a 128 else:
86e5f3ed 129 raise ValueError(f'unknown browser: {browser_name}')
982ee69a
MB
130
131
9bd13fe5 132def _extract_firefox_cookies(profile, container, logger):
982ee69a 133 logger.info('Extracting cookies from firefox')
9b8ee23b 134 if not sqlite3:
767b02a9
MB
135 logger.warning('Cannot extract cookies from firefox without sqlite3 support. '
136 'Please use a python interpreter compiled with sqlite3 support')
137 return YoutubeDLCookieJar()
982ee69a
MB
138
139 if profile is None:
140 search_root = _firefox_browser_dir()
141 elif _is_path(profile):
142 search_root = profile
143 else:
144 search_root = os.path.join(_firefox_browser_dir(), profile)
145
825d3ce3 146 cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite', logger)
147 if cookie_database_path is None:
148 raise FileNotFoundError(f'could not find firefox cookies database in {search_root}')
149 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
150
9bd13fe5 151 container_id = None
825d3ce3 152 if container not in (None, 'none'):
153 containers_path = os.path.join(os.path.dirname(cookie_database_path), 'containers.json')
9bd13fe5 154 if not os.path.isfile(containers_path) or not os.access(containers_path, os.R_OK):
155 raise FileNotFoundError(f'could not read containers.json in {search_root}')
d2c8aadf 156 with open(containers_path) as containers:
9bd13fe5 157 identities = json.load(containers).get('identities', [])
158 container_id = next((context.get('userContextId') for context in identities if container in (
159 context.get('name'),
160 try_call(lambda: re.fullmatch(r'userContext([^\.]+)\.label', context['l10nID']).group())
161 )), None)
162 if not isinstance(container_id, int):
163 raise ValueError(f'could not find firefox container "{container}" in containers.json')
164
0930b11f 165 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
982ee69a
MB
166 cursor = None
167 try:
168 cursor = _open_database_copy(cookie_database_path, tmpdir)
9bd13fe5 169 if isinstance(container_id, int):
9bd13fe5 170 logger.debug(
171 f'Only loading cookies from firefox container "{container}", ID {container_id}')
9bd13fe5 172 cursor.execute(
825d3ce3 173 'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE originAttributes LIKE ? OR originAttributes LIKE ?',
174 (f'%userContextId={container_id}', f'%userContextId={container_id}&%'))
175 elif container == 'none':
176 logger.debug('Only loading cookies not belonging to any container')
177 cursor.execute(
178 'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE NOT INSTR(originAttributes,"userContextId=")')
179 else:
9bd13fe5 180 cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
982ee69a 181 jar = YoutubeDLCookieJar()
97ec5bc5 182 with _create_progress_bar(logger) as progress_bar:
183 table = cursor.fetchall()
184 total_cookie_count = len(table)
185 for i, (host, name, value, path, expiry, is_secure) in enumerate(table):
186 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
ac668111 187 cookie = http.cookiejar.Cookie(
97ec5bc5 188 version=0, name=name, value=value, port=None, port_specified=False,
189 domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'),
190 path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False,
191 comment=None, comment_url=None, rest={})
192 jar.set_cookie(cookie)
86e5f3ed 193 logger.info(f'Extracted {len(jar)} cookies from firefox')
982ee69a
MB
194 return jar
195 finally:
196 if cursor is not None:
197 cursor.connection.close()
198
199
200def _firefox_browser_dir():
dec30912 201 if sys.platform in ('cygwin', 'win32'):
19a03940 202 return os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles')
982ee69a
MB
203 elif sys.platform == 'darwin':
204 return os.path.expanduser('~/Library/Application Support/Firefox')
dec30912 205 return os.path.expanduser('~/.mozilla/firefox')
982ee69a
MB
206
207
208def _get_chromium_based_browser_settings(browser_name):
209 # https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md
dec30912 210 if sys.platform in ('cygwin', 'win32'):
982ee69a
MB
211 appdata_local = os.path.expandvars('%LOCALAPPDATA%')
212 appdata_roaming = os.path.expandvars('%APPDATA%')
213 browser_dir = {
19a03940 214 'brave': os.path.join(appdata_local, R'BraveSoftware\Brave-Browser\User Data'),
215 'chrome': os.path.join(appdata_local, R'Google\Chrome\User Data'),
216 'chromium': os.path.join(appdata_local, R'Chromium\User Data'),
217 'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
218 'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
219 'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
982ee69a
MB
220 }[browser_name]
221
222 elif sys.platform == 'darwin':
223 appdata = os.path.expanduser('~/Library/Application Support')
224 browser_dir = {
225 'brave': os.path.join(appdata, 'BraveSoftware/Brave-Browser'),
226 'chrome': os.path.join(appdata, 'Google/Chrome'),
227 'chromium': os.path.join(appdata, 'Chromium'),
228 'edge': os.path.join(appdata, 'Microsoft Edge'),
229 'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
230 'vivaldi': os.path.join(appdata, 'Vivaldi'),
231 }[browser_name]
232
233 else:
dec30912
CMT
234 config = _config_home()
235 browser_dir = {
236 'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'),
237 'chrome': os.path.join(config, 'google-chrome'),
238 'chromium': os.path.join(config, 'chromium'),
239 'edge': os.path.join(config, 'microsoft-edge'),
240 'opera': os.path.join(config, 'opera'),
241 'vivaldi': os.path.join(config, 'vivaldi'),
242 }[browser_name]
982ee69a
MB
243
244 # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
245 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
246 keyring_name = {
247 'brave': 'Brave',
248 'chrome': 'Chrome',
249 'chromium': 'Chromium',
29b208f6 250 'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
982ee69a
MB
251 'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
252 'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
253 }[browser_name]
254
255 browsers_without_profiles = {'opera'}
256
257 return {
258 'browser_dir': browser_dir,
259 'keyring_name': keyring_name,
260 'supports_profiles': browser_name not in browsers_without_profiles
261 }
262
263
f59f5ef8 264def _extract_chrome_cookies(browser_name, profile, keyring, logger):
86e5f3ed 265 logger.info(f'Extracting cookies from {browser_name}')
767b02a9 266
9b8ee23b 267 if not sqlite3:
19a03940 268 logger.warning(f'Cannot extract cookies from {browser_name} without sqlite3 support. '
269 'Please use a python interpreter compiled with sqlite3 support')
767b02a9
MB
270 return YoutubeDLCookieJar()
271
982ee69a
MB
272 config = _get_chromium_based_browser_settings(browser_name)
273
274 if profile is None:
275 search_root = config['browser_dir']
276 elif _is_path(profile):
277 search_root = profile
278 config['browser_dir'] = os.path.dirname(profile) if config['supports_profiles'] else profile
279 else:
280 if config['supports_profiles']:
281 search_root = os.path.join(config['browser_dir'], profile)
282 else:
86e5f3ed 283 logger.error(f'{browser_name} does not support profiles')
982ee69a
MB
284 search_root = config['browser_dir']
285
97ec5bc5 286 cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies', logger)
982ee69a 287 if cookie_database_path is None:
86e5f3ed 288 raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"')
289 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
982ee69a 290
f59f5ef8 291 decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring)
982ee69a 292
0930b11f 293 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
982ee69a
MB
294 cursor = None
295 try:
296 cursor = _open_database_copy(cookie_database_path, tmpdir)
297 cursor.connection.text_factory = bytes
298 column_names = _get_column_names(cursor, 'cookies')
299 secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
19a03940 300 cursor.execute(f'SELECT host_key, name, value, encrypted_value, path, expires_utc, {secure_column} FROM cookies')
982ee69a
MB
301 jar = YoutubeDLCookieJar()
302 failed_cookies = 0
f59f5ef8 303 unencrypted_cookies = 0
97ec5bc5 304 with _create_progress_bar(logger) as progress_bar:
305 table = cursor.fetchall()
306 total_cookie_count = len(table)
307 for i, line in enumerate(table):
308 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
309 is_encrypted, cookie = _process_chrome_cookie(decryptor, *line)
310 if not cookie:
982ee69a
MB
311 failed_cookies += 1
312 continue
97ec5bc5 313 elif not is_encrypted:
314 unencrypted_cookies += 1
315 jar.set_cookie(cookie)
982ee69a 316 if failed_cookies > 0:
86e5f3ed 317 failed_message = f' ({failed_cookies} could not be decrypted)'
982ee69a
MB
318 else:
319 failed_message = ''
86e5f3ed 320 logger.info(f'Extracted {len(jar)} cookies from {browser_name}{failed_message}')
24146491 321 counts = decryptor._cookie_counts.copy()
f59f5ef8 322 counts['unencrypted'] = unencrypted_cookies
86e5f3ed 323 logger.debug(f'cookie version breakdown: {counts}')
982ee69a
MB
324 return jar
325 finally:
326 if cursor is not None:
327 cursor.connection.close()
328
329
97ec5bc5 330def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, path, expires_utc, is_secure):
0f06bcd7 331 host_key = host_key.decode()
332 name = name.decode()
333 value = value.decode()
334 path = path.decode()
97ec5bc5 335 is_encrypted = not value and encrypted_value
336
337 if is_encrypted:
338 value = decryptor.decrypt(encrypted_value)
339 if value is None:
340 return is_encrypted, None
341
ac668111 342 return is_encrypted, http.cookiejar.Cookie(
97ec5bc5 343 version=0, name=name, value=value, port=None, port_specified=False,
344 domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
345 path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False,
346 comment=None, comment_url=None, rest={})
347
348
982ee69a
MB
349class ChromeCookieDecryptor:
350 """
351 Overview:
352
353 Linux:
354 - cookies are either v10 or v11
355 - v10: AES-CBC encrypted with a fixed key
356 - v11: AES-CBC encrypted with an OS protected key (keyring)
357 - v11 keys can be stored in various places depending on the activate desktop environment [2]
358
359 Mac:
360 - cookies are either v10 or not v10
361 - v10: AES-CBC encrypted with an OS protected key (keyring) and more key derivation iterations than linux
362 - not v10: 'old data' stored as plaintext
363
364 Windows:
365 - cookies are either v10 or not v10
366 - v10: AES-GCM encrypted with a key which is encrypted with DPAPI
367 - not v10: encrypted with DPAPI
368
369 Sources:
370 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/
371 - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_linux.cc
372 - KeyStorageLinux::CreateService
373 """
374
0fa7d2c8 375 _cookie_counts = {}
982ee69a 376
0fa7d2c8 377 def decrypt(self, encrypted_value):
19a03940 378 raise NotImplementedError('Must be implemented by sub classes')
f59f5ef8 379
982ee69a 380
f59f5ef8 381def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None):
56ba69e4 382 if sys.platform == 'darwin':
982ee69a 383 return MacChromeCookieDecryptor(browser_keyring_name, logger)
56ba69e4 384 elif sys.platform in ('win32', 'cygwin'):
982ee69a 385 return WindowsChromeCookieDecryptor(browser_root, logger)
56ba69e4 386 return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring)
982ee69a
MB
387
388
389class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
f59f5ef8 390 def __init__(self, browser_keyring_name, logger, *, keyring=None):
982ee69a
MB
391 self._logger = logger
392 self._v10_key = self.derive_key(b'peanuts')
f59f5ef8 393 self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
9b7a48ab
SS
394 self._browser_keyring_name = browser_keyring_name
395 self._keyring = keyring
396
397 @functools.cached_property
398 def _v11_key(self):
399 password = _get_linux_keyring_password(self._browser_keyring_name, self._keyring, self._logger)
400 return None if password is None else self.derive_key(password)
982ee69a
MB
401
402 @staticmethod
403 def derive_key(password):
404 # values from
405 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_linux.cc
406 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16)
407
408 def decrypt(self, encrypted_value):
409 version = encrypted_value[:3]
410 ciphertext = encrypted_value[3:]
411
412 if version == b'v10':
f59f5ef8 413 self._cookie_counts['v10'] += 1
982ee69a
MB
414 return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
415
416 elif version == b'v11':
f59f5ef8 417 self._cookie_counts['v11'] += 1
982ee69a 418 if self._v11_key is None:
f59f5ef8 419 self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
982ee69a
MB
420 return None
421 return _decrypt_aes_cbc(ciphertext, self._v11_key, self._logger)
422
423 else:
f59f5ef8 424 self._cookie_counts['other'] += 1
982ee69a
MB
425 return None
426
427
428class MacChromeCookieDecryptor(ChromeCookieDecryptor):
429 def __init__(self, browser_keyring_name, logger):
430 self._logger = logger
f440b14f 431 password = _get_mac_keyring_password(browser_keyring_name, logger)
982ee69a 432 self._v10_key = None if password is None else self.derive_key(password)
f59f5ef8 433 self._cookie_counts = {'v10': 0, 'other': 0}
982ee69a
MB
434
435 @staticmethod
436 def derive_key(password):
437 # values from
438 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
439 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16)
440
441 def decrypt(self, encrypted_value):
442 version = encrypted_value[:3]
443 ciphertext = encrypted_value[3:]
444
445 if version == b'v10':
f59f5ef8 446 self._cookie_counts['v10'] += 1
982ee69a
MB
447 if self._v10_key is None:
448 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
449 return None
450
451 return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
452
453 else:
f59f5ef8 454 self._cookie_counts['other'] += 1
982ee69a
MB
455 # other prefixes are considered 'old data' which were stored as plaintext
456 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
457 return encrypted_value
458
459
460class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
461 def __init__(self, browser_root, logger):
462 self._logger = logger
463 self._v10_key = _get_windows_v10_key(browser_root, logger)
f59f5ef8
MB
464 self._cookie_counts = {'v10': 0, 'other': 0}
465
982ee69a
MB
466 def decrypt(self, encrypted_value):
467 version = encrypted_value[:3]
468 ciphertext = encrypted_value[3:]
469
470 if version == b'v10':
f59f5ef8 471 self._cookie_counts['v10'] += 1
982ee69a
MB
472 if self._v10_key is None:
473 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
474 return None
982ee69a
MB
475
476 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
477 # kNonceLength
478 nonce_length = 96 // 8
479 # boringssl
480 # EVP_AEAD_AES_GCM_TAG_LEN
481 authentication_tag_length = 16
482
483 raw_ciphertext = ciphertext
484 nonce = raw_ciphertext[:nonce_length]
485 ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length]
486 authentication_tag = raw_ciphertext[-authentication_tag_length:]
487
488 return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger)
489
490 else:
f59f5ef8 491 self._cookie_counts['other'] += 1
982ee69a
MB
492 # any other prefix means the data is DPAPI encrypted
493 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
0f06bcd7 494 return _decrypt_windows_dpapi(encrypted_value, self._logger).decode()
982ee69a
MB
495
496
497def _extract_safari_cookies(profile, logger):
498 if profile is not None:
499 logger.error('safari does not support profiles')
500 if sys.platform != 'darwin':
86e5f3ed 501 raise ValueError(f'unsupported platform: {sys.platform}')
982ee69a
MB
502
503 cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')
504
505 if not os.path.isfile(cookies_path):
1f7db853
MP
506 logger.debug('Trying secondary cookie location')
507 cookies_path = os.path.expanduser('~/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies')
508 if not os.path.isfile(cookies_path):
509 raise FileNotFoundError('could not find safari cookies database')
982ee69a
MB
510
511 with open(cookies_path, 'rb') as f:
512 cookies_data = f.read()
513
514 jar = parse_safari_cookies(cookies_data, logger=logger)
86e5f3ed 515 logger.info(f'Extracted {len(jar)} cookies from safari')
982ee69a
MB
516 return jar
517
518
519class ParserError(Exception):
520 pass
521
522
523class DataParser:
524 def __init__(self, data, logger):
525 self._data = data
526 self.cursor = 0
527 self._logger = logger
528
529 def read_bytes(self, num_bytes):
530 if num_bytes < 0:
86e5f3ed 531 raise ParserError(f'invalid read of {num_bytes} bytes')
982ee69a
MB
532 end = self.cursor + num_bytes
533 if end > len(self._data):
534 raise ParserError('reached end of input')
535 data = self._data[self.cursor:end]
536 self.cursor = end
537 return data
538
539 def expect_bytes(self, expected_value, message):
540 value = self.read_bytes(len(expected_value))
541 if value != expected_value:
86e5f3ed 542 raise ParserError(f'unexpected value: {value} != {expected_value} ({message})')
982ee69a
MB
543
544 def read_uint(self, big_endian=False):
545 data_format = '>I' if big_endian else '<I'
546 return struct.unpack(data_format, self.read_bytes(4))[0]
547
548 def read_double(self, big_endian=False):
549 data_format = '>d' if big_endian else '<d'
550 return struct.unpack(data_format, self.read_bytes(8))[0]
551
552 def read_cstring(self):
553 buffer = []
554 while True:
555 c = self.read_bytes(1)
556 if c == b'\x00':
0f06bcd7 557 return b''.join(buffer).decode()
982ee69a
MB
558 else:
559 buffer.append(c)
560
561 def skip(self, num_bytes, description='unknown'):
562 if num_bytes > 0:
19a03940 563 self._logger.debug(f'skipping {num_bytes} bytes ({description}): {self.read_bytes(num_bytes)!r}')
982ee69a 564 elif num_bytes < 0:
86e5f3ed 565 raise ParserError(f'invalid skip of {num_bytes} bytes')
982ee69a
MB
566
567 def skip_to(self, offset, description='unknown'):
568 self.skip(offset - self.cursor, description)
569
570 def skip_to_end(self, description='unknown'):
571 self.skip_to(len(self._data), description)
572
573
574def _mac_absolute_time_to_posix(timestamp):
575 return int((datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc) + timedelta(seconds=timestamp)).timestamp())
576
577
578def _parse_safari_cookies_header(data, logger):
579 p = DataParser(data, logger)
580 p.expect_bytes(b'cook', 'database signature')
581 number_of_pages = p.read_uint(big_endian=True)
582 page_sizes = [p.read_uint(big_endian=True) for _ in range(number_of_pages)]
583 return page_sizes, p.cursor
584
585
586def _parse_safari_cookies_page(data, jar, logger):
587 p = DataParser(data, logger)
588 p.expect_bytes(b'\x00\x00\x01\x00', 'page signature')
589 number_of_cookies = p.read_uint()
590 record_offsets = [p.read_uint() for _ in range(number_of_cookies)]
591 if number_of_cookies == 0:
86e5f3ed 592 logger.debug(f'a cookies page of size {len(data)} has no cookies')
982ee69a
MB
593 return
594
595 p.skip_to(record_offsets[0], 'unknown page header field')
596
97ec5bc5 597 with _create_progress_bar(logger) as progress_bar:
598 for i, record_offset in enumerate(record_offsets):
599 progress_bar.print(f'Loading cookie {i: 6d}/{number_of_cookies: 6d}')
600 p.skip_to(record_offset, 'space between records')
601 record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger)
602 p.read_bytes(record_length)
982ee69a
MB
603 p.skip_to_end('space in between pages')
604
605
606def _parse_safari_cookies_record(data, jar, logger):
607 p = DataParser(data, logger)
608 record_size = p.read_uint()
609 p.skip(4, 'unknown record field 1')
610 flags = p.read_uint()
611 is_secure = bool(flags & 0x0001)
612 p.skip(4, 'unknown record field 2')
613 domain_offset = p.read_uint()
614 name_offset = p.read_uint()
615 path_offset = p.read_uint()
616 value_offset = p.read_uint()
617 p.skip(8, 'unknown record field 3')
618 expiration_date = _mac_absolute_time_to_posix(p.read_double())
619 _creation_date = _mac_absolute_time_to_posix(p.read_double()) # noqa: F841
620
621 try:
622 p.skip_to(domain_offset)
623 domain = p.read_cstring()
624
625 p.skip_to(name_offset)
626 name = p.read_cstring()
627
628 p.skip_to(path_offset)
629 path = p.read_cstring()
630
631 p.skip_to(value_offset)
632 value = p.read_cstring()
633 except UnicodeDecodeError:
f440b14f 634 logger.warning('failed to parse Safari cookie because UTF-8 decoding failed', only_once=True)
982ee69a
MB
635 return record_size
636
637 p.skip_to(record_size, 'space at the end of the record')
638
ac668111 639 cookie = http.cookiejar.Cookie(
982ee69a
MB
640 version=0, name=name, value=value, port=None, port_specified=False,
641 domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'),
642 path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False,
643 comment=None, comment_url=None, rest={})
644 jar.set_cookie(cookie)
645 return record_size
646
647
648def parse_safari_cookies(data, jar=None, logger=YDLLogger()):
649 """
650 References:
651 - https://github.com/libyal/dtformats/blob/main/documentation/Safari%20Cookies.asciidoc
652 - this data appears to be out of date but the important parts of the database structure is the same
653 - there are a few bytes here and there which are skipped during parsing
654 """
655 if jar is None:
656 jar = YoutubeDLCookieJar()
657 page_sizes, body_start = _parse_safari_cookies_header(data, logger)
658 p = DataParser(data[body_start:], logger)
659 for page_size in page_sizes:
660 _parse_safari_cookies_page(p.read_bytes(page_size), jar, logger)
661 p.skip_to_end('footer')
662 return jar
663
664
f59f5ef8
MB
665class _LinuxDesktopEnvironment(Enum):
666 """
667 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.h
668 DesktopEnvironment
669 """
670 OTHER = auto()
671 CINNAMON = auto()
672 GNOME = auto()
673 KDE = auto()
674 PANTHEON = auto()
675 UNITY = auto()
676 XFCE = auto()
982ee69a
MB
677
678
f59f5ef8
MB
679class _LinuxKeyring(Enum):
680 """
681 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.h
682 SelectedLinuxBackend
683 """
684 KWALLET = auto()
685 GNOMEKEYRING = auto()
686 BASICTEXT = auto()
687
688
689SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys()
690
691
692def _get_linux_desktop_environment(env):
693 """
694 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc
695 GetDesktopEnvironment
696 """
697 xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
698 desktop_session = env.get('DESKTOP_SESSION', None)
699 if xdg_current_desktop is not None:
700 xdg_current_desktop = xdg_current_desktop.split(':')[0].strip()
701
702 if xdg_current_desktop == 'Unity':
703 if desktop_session is not None and 'gnome-fallback' in desktop_session:
704 return _LinuxDesktopEnvironment.GNOME
705 else:
706 return _LinuxDesktopEnvironment.UNITY
707 elif xdg_current_desktop == 'GNOME':
708 return _LinuxDesktopEnvironment.GNOME
709 elif xdg_current_desktop == 'X-Cinnamon':
710 return _LinuxDesktopEnvironment.CINNAMON
711 elif xdg_current_desktop == 'KDE':
712 return _LinuxDesktopEnvironment.KDE
713 elif xdg_current_desktop == 'Pantheon':
714 return _LinuxDesktopEnvironment.PANTHEON
715 elif xdg_current_desktop == 'XFCE':
716 return _LinuxDesktopEnvironment.XFCE
717 elif desktop_session is not None:
718 if desktop_session in ('mate', 'gnome'):
719 return _LinuxDesktopEnvironment.GNOME
720 elif 'kde' in desktop_session:
721 return _LinuxDesktopEnvironment.KDE
722 elif 'xfce' in desktop_session:
723 return _LinuxDesktopEnvironment.XFCE
724 else:
725 if 'GNOME_DESKTOP_SESSION_ID' in env:
726 return _LinuxDesktopEnvironment.GNOME
727 elif 'KDE_FULL_SESSION' in env:
728 return _LinuxDesktopEnvironment.KDE
fa8fd951 729 return _LinuxDesktopEnvironment.OTHER
f59f5ef8
MB
730
731
732def _choose_linux_keyring(logger):
733 """
734 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.cc
735 SelectBackend
736 """
737 desktop_environment = _get_linux_desktop_environment(os.environ)
86e5f3ed 738 logger.debug(f'detected desktop environment: {desktop_environment.name}')
f59f5ef8
MB
739 if desktop_environment == _LinuxDesktopEnvironment.KDE:
740 linux_keyring = _LinuxKeyring.KWALLET
741 elif desktop_environment == _LinuxDesktopEnvironment.OTHER:
742 linux_keyring = _LinuxKeyring.BASICTEXT
982ee69a 743 else:
f59f5ef8
MB
744 linux_keyring = _LinuxKeyring.GNOMEKEYRING
745 return linux_keyring
746
747
748def _get_kwallet_network_wallet(logger):
749 """ The name of the wallet used to store network passwords.
750
751 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/kwallet_dbus.cc
752 KWalletDBus::NetworkWallet
753 which does a dbus call to the following function:
754 https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html
755 Wallet::NetworkWallet
756 """
757 default_wallet = 'kdewallet'
758 try:
f0c9fb96 759 stdout, _, returncode = Popen.run([
f59f5ef8
MB
760 'dbus-send', '--session', '--print-reply=literal',
761 '--dest=org.kde.kwalletd5',
762 '/modules/kwalletd5',
763 'org.kde.KWallet.networkWallet'
f0c9fb96 764 ], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
f59f5ef8 765
f0c9fb96 766 if returncode:
f59f5ef8
MB
767 logger.warning('failed to read NetworkWallet')
768 return default_wallet
769 else:
f0c9fb96 770 logger.debug(f'NetworkWallet = "{stdout.strip()}"')
771 return stdout.strip()
a44ca5a4 772 except Exception as e:
86e5f3ed 773 logger.warning(f'exception while obtaining NetworkWallet: {e}')
f59f5ef8
MB
774 return default_wallet
775
776
777def _get_kwallet_password(browser_keyring_name, logger):
778 logger.debug('using kwallet-query to obtain password from kwallet')
779
780 if shutil.which('kwallet-query') is None:
781 logger.error('kwallet-query command not found. KWallet and kwallet-query '
782 'must be installed to read from KWallet. kwallet-query should be'
783 'included in the kwallet package for your distribution')
784 return b''
785
786 network_wallet = _get_kwallet_network_wallet(logger)
787
788 try:
f0c9fb96 789 stdout, _, returncode = Popen.run([
f59f5ef8 790 'kwallet-query',
86e5f3ed 791 '--read-password', f'{browser_keyring_name} Safe Storage',
792 '--folder', f'{browser_keyring_name} Keys',
f59f5ef8
MB
793 network_wallet
794 ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
795
f0c9fb96 796 if returncode:
797 logger.error(f'kwallet-query failed with return code {returncode}. '
798 'Please consult the kwallet-query man page for details')
f59f5ef8
MB
799 return b''
800 else:
801 if stdout.lower().startswith(b'failed to read'):
802 logger.debug('failed to read password from kwallet. Using empty string instead')
803 # this sometimes occurs in KDE because chrome does not check hasEntry and instead
804 # just tries to read the value (which kwallet returns "") whereas kwallet-query
805 # checks hasEntry. To verify this:
806 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
807 # while starting chrome.
808 # this may be a bug as the intended behaviour is to generate a random password and store
809 # it, but that doesn't matter here.
810 return b''
811 else:
812 logger.debug('password found')
f0c9fb96 813 return stdout.rstrip(b'\n')
a44ca5a4 814 except Exception as e:
815 logger.warning(f'exception running kwallet-query: {error_to_str(e)}')
f59f5ef8
MB
816 return b''
817
818
819def _get_gnome_keyring_password(browser_keyring_name, logger):
9b8ee23b 820 if not secretstorage:
821 logger.error(f'secretstorage not available {_SECRETSTORAGE_UNAVAILABLE_REASON}')
f59f5ef8
MB
822 return b''
823 # the Gnome keyring does not seem to organise keys in the same way as KWallet,
824 # using `dbus-monitor` during startup, it can be observed that chromium lists all keys
825 # and presumably searches for its key in the list. It appears that we must do the same.
826 # https://github.com/jaraco/keyring/issues/556
827 with contextlib.closing(secretstorage.dbus_init()) as con:
828 col = secretstorage.get_default_collection(con)
829 for item in col.get_all_items():
86e5f3ed 830 if item.get_label() == f'{browser_keyring_name} Safe Storage':
f59f5ef8
MB
831 return item.get_secret()
832 else:
833 logger.error('failed to read from keyring')
834 return b''
835
836
837def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
838 # note: chrome/chromium can be run with the following flags to determine which keyring backend
839 # it has chosen to use
840 # chromium --enable-logging=stderr --v=1 2>&1 | grep key_storage_
841 # Chromium supports a flag: --password-store=<basic|gnome|kwallet> so the automatic detection
842 # will not be sufficient in all cases.
843
2c539d49 844 keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger)
f59f5ef8
MB
845 logger.debug(f'Chosen keyring: {keyring.name}')
846
847 if keyring == _LinuxKeyring.KWALLET:
848 return _get_kwallet_password(browser_keyring_name, logger)
849 elif keyring == _LinuxKeyring.GNOMEKEYRING:
850 return _get_gnome_keyring_password(browser_keyring_name, logger)
851 elif keyring == _LinuxKeyring.BASICTEXT:
852 # when basic text is chosen, all cookies are stored as v10 (so no keyring password is required)
853 return None
854 assert False, f'Unknown keyring {keyring}'
855
856
857def _get_mac_keyring_password(browser_keyring_name, logger):
858 logger.debug('using find-generic-password to obtain password from OSX keychain')
859 try:
46a5b335 860 stdout, _, returncode = Popen.run(
d3c93ec2 861 ['security', 'find-generic-password',
862 '-w', # write password to stdout
863 '-a', browser_keyring_name, # match 'account'
86e5f3ed 864 '-s', f'{browser_keyring_name} Safe Storage'], # match 'service'
d3c93ec2 865 stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
46a5b335 866 if returncode:
867 logger.warning('find-generic-password failed')
868 return None
f0c9fb96 869 return stdout.rstrip(b'\n')
a44ca5a4 870 except Exception as e:
871 logger.warning(f'exception running find-generic-password: {error_to_str(e)}')
f59f5ef8 872 return None
982ee69a
MB
873
874
875def _get_windows_v10_key(browser_root, logger):
97ec5bc5 876 path = _find_most_recently_used_file(browser_root, 'Local State', logger)
982ee69a
MB
877 if path is None:
878 logger.error('could not find local state file')
879 return None
97ec5bc5 880 logger.debug(f'Found local state file at "{path}"')
86e5f3ed 881 with open(path, encoding='utf8') as f:
982ee69a
MB
882 data = json.load(f)
883 try:
884 base64_key = data['os_crypt']['encrypted_key']
885 except KeyError:
886 logger.error('no encrypted key in Local State')
887 return None
14f25df2 888 encrypted_key = base64.b64decode(base64_key)
982ee69a
MB
889 prefix = b'DPAPI'
890 if not encrypted_key.startswith(prefix):
891 logger.error('invalid key')
892 return None
893 return _decrypt_windows_dpapi(encrypted_key[len(prefix):], logger)
894
895
896def pbkdf2_sha1(password, salt, iterations, key_length):
897 return pbkdf2_hmac('sha1', password, salt, iterations, key_length)
898
899
900def _decrypt_aes_cbc(ciphertext, key, logger, initialization_vector=b' ' * 16):
1d3586d0 901 plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
982ee69a 902 try:
0f06bcd7 903 return plaintext.decode()
982ee69a 904 except UnicodeDecodeError:
f440b14f 905 logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
906 return None
907
908
909def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
982ee69a 910 try:
09906f55 911 plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce)
982ee69a 912 except ValueError:
f440b14f 913 logger.warning('failed to decrypt cookie (AES-GCM) because the MAC check failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
914 return None
915
916 try:
0f06bcd7 917 return plaintext.decode()
982ee69a 918 except UnicodeDecodeError:
f440b14f 919 logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
920 return None
921
922
923def _decrypt_windows_dpapi(ciphertext, logger):
924 """
925 References:
926 - https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata
927 """
fe0918bb 928
929 import ctypes
930 import ctypes.wintypes
982ee69a
MB
931
932 class DATA_BLOB(ctypes.Structure):
fe0918bb 933 _fields_ = [('cbData', ctypes.wintypes.DWORD),
982ee69a
MB
934 ('pbData', ctypes.POINTER(ctypes.c_char))]
935
936 buffer = ctypes.create_string_buffer(ciphertext)
937 blob_in = DATA_BLOB(ctypes.sizeof(buffer), buffer)
938 blob_out = DATA_BLOB()
939 ret = ctypes.windll.crypt32.CryptUnprotectData(
940 ctypes.byref(blob_in), # pDataIn
941 None, # ppszDataDescr: human readable description of pDataIn
942 None, # pOptionalEntropy: salt?
943 None, # pvReserved: must be NULL
944 None, # pPromptStruct: information about prompts to display
945 0, # dwFlags
946 ctypes.byref(blob_out) # pDataOut
947 )
948 if not ret:
f9be9cb9 949 logger.warning('failed to decrypt with DPAPI', only_once=True)
982ee69a
MB
950 return None
951
952 result = ctypes.string_at(blob_out.pbData, blob_out.cbData)
953 ctypes.windll.kernel32.LocalFree(blob_out.pbData)
954 return result
955
956
957def _config_home():
958 return os.environ.get('XDG_CONFIG_HOME', os.path.expanduser('~/.config'))
959
960
961def _open_database_copy(database_path, tmpdir):
962 # cannot open sqlite databases if they are already in use (e.g. by the browser)
963 database_copy_path = os.path.join(tmpdir, 'temporary.sqlite')
964 shutil.copy(database_path, database_copy_path)
965 conn = sqlite3.connect(database_copy_path)
966 return conn.cursor()
967
968
969def _get_column_names(cursor, table_name):
86e5f3ed 970 table_info = cursor.execute(f'PRAGMA table_info({table_name})').fetchall()
0f06bcd7 971 return [row[1].decode() for row in table_info]
982ee69a
MB
972
973
97ec5bc5 974def _find_most_recently_used_file(root, filename, logger):
982ee69a 975 # if there are multiple browser profiles, take the most recently used one
97ec5bc5 976 i, paths = 0, []
977 with _create_progress_bar(logger) as progress_bar:
978 for curr_root, dirs, files in os.walk(root):
979 for file in files:
980 i += 1
981 progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched')
982 if file == filename:
983 paths.append(os.path.join(curr_root, file))
982ee69a
MB
984 return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime)
985
986
987def _merge_cookie_jars(jars):
988 output_jar = YoutubeDLCookieJar()
989 for jar in jars:
990 for cookie in jar:
991 output_jar.set_cookie(cookie)
992 if jar.filename is not None:
993 output_jar.filename = jar.filename
994 return output_jar
995
996
997def _is_path(value):
998 return os.path.sep in value
999
1000
9bd13fe5 1001def _parse_browser_specification(browser_name, profile=None, keyring=None, container=None):
982ee69a
MB
1002 if browser_name not in SUPPORTED_BROWSERS:
1003 raise ValueError(f'unsupported browser: "{browser_name}"')
f59f5ef8
MB
1004 if keyring not in (None, *SUPPORTED_KEYRINGS):
1005 raise ValueError(f'unsupported keyring: "{keyring}"')
2b24afa6 1006 if profile is not None and _is_path(expand_path(profile)):
1007 profile = expand_path(profile)
9bd13fe5 1008 return browser_name, profile, keyring, container
8817a80d
SS
1009
1010
1011class LenientSimpleCookie(http.cookies.SimpleCookie):
1012 """More lenient version of http.cookies.SimpleCookie"""
1013 # From https://github.com/python/cpython/blob/v3.10.7/Lib/http/cookies.py
36069409
SS
1014 # We use Morsel's legal key chars to avoid errors on setting values
1015 _LEGAL_KEY_CHARS = r'\w\d' + re.escape('!#$%&\'*+-.:^_`|~')
1016 _LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + re.escape('(),/<=>?@[]{}')
8817a80d
SS
1017
1018 _RESERVED = {
1019 "expires",
1020 "path",
1021 "comment",
1022 "domain",
1023 "max-age",
1024 "secure",
1025 "httponly",
1026 "version",
1027 "samesite",
1028 }
1029
1030 _FLAGS = {"secure", "httponly"}
1031
1032 # Added 'bad' group to catch the remaining value
1033 _COOKIE_PATTERN = re.compile(r"""
1034 \s* # Optional whitespace at start of cookie
1035 (?P<key> # Start of group 'key'
1036 [""" + _LEGAL_KEY_CHARS + r"""]+?# Any word of at least one letter
1037 ) # End of group 'key'
1038 ( # Optional group: there may not be a value.
1039 \s*=\s* # Equal Sign
1040 ( # Start of potential value
1041 (?P<val> # Start of group 'val'
1042 "(?:[^\\"]|\\.)*" # Any doublequoted string
1043 | # or
1044 \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr
1045 | # or
1046 [""" + _LEGAL_VALUE_CHARS + r"""]* # Any word or empty string
1047 ) # End of group 'val'
1048 | # or
1049 (?P<bad>(?:\\;|[^;])*?) # 'bad' group fallback for invalid values
1050 ) # End of potential value
1051 )? # End of optional value group
1052 \s* # Any number of spaces.
1053 (\s+|;|$) # Ending either at space, semicolon, or EOS.
1054 """, re.ASCII | re.VERBOSE)
1055
1056 def load(self, data):
1057 # Workaround for https://github.com/yt-dlp/yt-dlp/issues/4776
1058 if not isinstance(data, str):
1059 return super().load(data)
1060
1061 morsel = None
36069409
SS
1062 for match in self._COOKIE_PATTERN.finditer(data):
1063 if match.group('bad'):
8817a80d
SS
1064 morsel = None
1065 continue
1066
36069409 1067 key, value = match.group('key', 'val')
8817a80d 1068
36069409
SS
1069 is_attribute = False
1070 if key.startswith('$'):
1071 key = key[1:]
1072 is_attribute = True
8817a80d
SS
1073
1074 lower_key = key.lower()
1075 if lower_key in self._RESERVED:
1076 if morsel is None:
1077 continue
1078
1079 if value is None:
1080 if lower_key not in self._FLAGS:
1081 morsel = None
1082 continue
1083 value = True
1084 else:
1085 value, _ = self.value_decode(value)
1086
1087 morsel[key] = value
1088
36069409
SS
1089 elif is_attribute:
1090 morsel = None
1091
8817a80d
SS
1092 elif value is not None:
1093 morsel = self.get(key, http.cookies.Morsel())
1094 real_value, coded_value = self.value_decode(value)
1095 morsel.set(key, real_value, coded_value)
1096 self[key] = morsel
1097
1098 else:
1099 morsel = None
b87e01c1 1100
1101
1102class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
1103 """
1104 See [1] for cookie file format.
1105
1106 1. https://curl.haxx.se/docs/http-cookies.html
1107 """
1108 _HTTPONLY_PREFIX = '#HttpOnly_'
1109 _ENTRY_LEN = 7
1110 _HEADER = '''# Netscape HTTP Cookie File
1111# This file is generated by yt-dlp. Do not edit.
1112
1113'''
1114 _CookieFileEntry = collections.namedtuple(
1115 'CookieFileEntry',
1116 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
1117
1118 def __init__(self, filename=None, *args, **kwargs):
1119 super().__init__(None, *args, **kwargs)
1120 if is_path_like(filename):
1121 filename = os.fspath(filename)
1122 self.filename = filename
1123
1124 @staticmethod
1125 def _true_or_false(cndn):
1126 return 'TRUE' if cndn else 'FALSE'
1127
1128 @contextlib.contextmanager
1129 def open(self, file, *, write=False):
1130 if is_path_like(file):
1131 with open(file, 'w' if write else 'r', encoding='utf-8') as f:
1132 yield f
1133 else:
1134 if write:
1135 file.truncate(0)
1136 yield file
1137
1138 def _really_save(self, f, ignore_discard=False, ignore_expires=False):
1139 now = time.time()
1140 for cookie in self:
1141 if (not ignore_discard and cookie.discard
1142 or not ignore_expires and cookie.is_expired(now)):
1143 continue
1144 name, value = cookie.name, cookie.value
1145 if value is None:
1146 # cookies.txt regards 'Set-Cookie: foo' as a cookie
1147 # with no name, whereas http.cookiejar regards it as a
1148 # cookie with no value.
1149 name, value = '', name
1150 f.write('%s\n' % '\t'.join((
1151 cookie.domain,
1152 self._true_or_false(cookie.domain.startswith('.')),
1153 cookie.path,
1154 self._true_or_false(cookie.secure),
1155 str_or_none(cookie.expires, default=''),
1156 name, value
1157 )))
1158
1159 def save(self, filename=None, *args, **kwargs):
1160 """
1161 Save cookies to a file.
1162 Code is taken from CPython 3.6
1163 https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """
1164
1165 if filename is None:
1166 if self.filename is not None:
1167 filename = self.filename
1168 else:
1169 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
1170
1171 # Store session cookies with `expires` set to 0 instead of an empty string
1172 for cookie in self:
1173 if cookie.expires is None:
1174 cookie.expires = 0
1175
1176 with self.open(filename, write=True) as f:
1177 f.write(self._HEADER)
1178 self._really_save(f, *args, **kwargs)
1179
1180 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
1181 """Load cookies from a file."""
1182 if filename is None:
1183 if self.filename is not None:
1184 filename = self.filename
1185 else:
1186 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
1187
1188 def prepare_line(line):
1189 if line.startswith(self._HTTPONLY_PREFIX):
1190 line = line[len(self._HTTPONLY_PREFIX):]
1191 # comments and empty lines are fine
1192 if line.startswith('#') or not line.strip():
1193 return line
1194 cookie_list = line.split('\t')
1195 if len(cookie_list) != self._ENTRY_LEN:
1196 raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list))
1197 cookie = self._CookieFileEntry(*cookie_list)
1198 if cookie.expires_at and not cookie.expires_at.isdigit():
1199 raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
1200 return line
1201
1202 cf = io.StringIO()
1203 with self.open(filename) as f:
1204 for line in f:
1205 try:
1206 cf.write(prepare_line(line))
1207 except http.cookiejar.LoadError as e:
1208 if f'{line.strip()} '[0] in '[{"':
1209 raise http.cookiejar.LoadError(
1210 'Cookies file must be Netscape formatted, not JSON. See '
1211 'https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp')
1212 write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
1213 continue
1214 cf.seek(0)
1215 self._really_load(cf, filename, ignore_discard, ignore_expires)
1216 # Session cookies are denoted by either `expires` field set to
1217 # an empty string or 0. MozillaCookieJar only recognizes the former
1218 # (see [1]). So we need force the latter to be recognized as session
1219 # cookies on our own.
1220 # Session cookies may be important for cookies-based authentication,
1221 # e.g. usually, when user does not check 'Remember me' check box while
1222 # logging in on a site, some important cookies are stored as session
1223 # cookies so that not recognizing them will result in failed login.
1224 # 1. https://bugs.python.org/issue17164
1225 for cookie in self:
1226 # Treat `expires=0` cookies as session cookies
1227 if cookie.expires == 0:
1228 cookie.expires = None
1229 cookie.discard = True
1230
1231 def get_cookie_header(self, url):
1232 """Generate a Cookie HTTP header for a given url"""
1233 cookie_req = urllib.request.Request(escape_url(sanitize_url(url)))
1234 self.add_cookie_header(cookie_req)
1235 return cookie_req.get_header('Cookie')