]> jfr.im git - yt-dlp.git/blob - yt_dlp/cookies.py
[ie/youtube] Extract upload timestamp if available (#9856)
[yt-dlp.git] / yt_dlp / cookies.py
1 import base64
2 import collections
3 import contextlib
4 import datetime as dt
5 import glob
6 import http.cookiejar
7 import http.cookies
8 import io
9 import json
10 import os
11 import re
12 import shutil
13 import struct
14 import subprocess
15 import sys
16 import tempfile
17 import time
18 import urllib.request
19 from enum import Enum, auto
20 from hashlib import pbkdf2_hmac
21
22 from .aes import (
23 aes_cbc_decrypt_bytes,
24 aes_gcm_decrypt_and_verify_bytes,
25 unpad_pkcs7,
26 )
27 from .compat import functools # isort: split
28 from .compat import compat_os_name
29 from .dependencies import (
30 _SECRETSTORAGE_UNAVAILABLE_REASON,
31 secretstorage,
32 sqlite3,
33 )
34 from .minicurses import MultilinePrinter, QuietMultilinePrinter
35 from .utils import (
36 DownloadError,
37 Popen,
38 error_to_str,
39 expand_path,
40 is_path_like,
41 sanitize_url,
42 str_or_none,
43 try_call,
44 write_string,
45 )
46 from .utils._utils import _YDLLogger
47 from .utils.networking import normalize_url
48
49 CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi', 'whale'}
50 SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
51
52
53 class YDLLogger(_YDLLogger):
54 def warning(self, message, only_once=False): # compat
55 return super().warning(message, once=only_once)
56
57 class ProgressBar(MultilinePrinter):
58 _DELAY, _timer = 0.1, 0
59
60 def print(self, message):
61 if time.time() - self._timer > self._DELAY:
62 self.print_at_line(f'[Cookies] {message}', 0)
63 self._timer = time.time()
64
65 def progress_bar(self):
66 """Return a context manager with a print method. (Optional)"""
67 # Do not print to files/pipes, loggers, or when --no-progress is used
68 if not self._ydl or self._ydl.params.get('noprogress') or self._ydl.params.get('logger'):
69 return
70 file = self._ydl._out_files.error
71 try:
72 if not file.isatty():
73 return
74 except BaseException:
75 return
76 return self.ProgressBar(file, preserve_output=False)
77
78
79 def _create_progress_bar(logger):
80 if hasattr(logger, 'progress_bar'):
81 printer = logger.progress_bar()
82 if printer:
83 return printer
84 printer = QuietMultilinePrinter()
85 printer.print = lambda _: None
86 return printer
87
88
89 def load_cookies(cookie_file, browser_specification, ydl):
90 cookie_jars = []
91 if browser_specification is not None:
92 browser_name, profile, keyring, container = _parse_browser_specification(*browser_specification)
93 cookie_jars.append(
94 extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring, container=container))
95
96 if cookie_file is not None:
97 is_filename = is_path_like(cookie_file)
98 if is_filename:
99 cookie_file = expand_path(cookie_file)
100
101 jar = YoutubeDLCookieJar(cookie_file)
102 if not is_filename or os.access(cookie_file, os.R_OK):
103 jar.load()
104 cookie_jars.append(jar)
105
106 return _merge_cookie_jars(cookie_jars)
107
108
109 def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None, container=None):
110 if browser_name == 'firefox':
111 return _extract_firefox_cookies(profile, container, logger)
112 elif browser_name == 'safari':
113 return _extract_safari_cookies(profile, logger)
114 elif browser_name in CHROMIUM_BASED_BROWSERS:
115 return _extract_chrome_cookies(browser_name, profile, keyring, logger)
116 else:
117 raise ValueError(f'unknown browser: {browser_name}')
118
119
120 def _extract_firefox_cookies(profile, container, logger):
121 logger.info('Extracting cookies from firefox')
122 if not sqlite3:
123 logger.warning('Cannot extract cookies from firefox without sqlite3 support. '
124 'Please use a Python interpreter compiled with sqlite3 support')
125 return YoutubeDLCookieJar()
126
127 if profile is None:
128 search_roots = list(_firefox_browser_dirs())
129 elif _is_path(profile):
130 search_roots = [profile]
131 else:
132 search_roots = [os.path.join(path, profile) for path in _firefox_browser_dirs()]
133 search_root = ', '.join(map(repr, search_roots))
134
135 cookie_database_path = _newest(_firefox_cookie_dbs(search_roots))
136 if cookie_database_path is None:
137 raise FileNotFoundError(f'could not find firefox cookies database in {search_root}')
138 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
139
140 container_id = None
141 if container not in (None, 'none'):
142 containers_path = os.path.join(os.path.dirname(cookie_database_path), 'containers.json')
143 if not os.path.isfile(containers_path) or not os.access(containers_path, os.R_OK):
144 raise FileNotFoundError(f'could not read containers.json in {search_root}')
145 with open(containers_path, encoding='utf8') as containers:
146 identities = json.load(containers).get('identities', [])
147 container_id = next((context.get('userContextId') for context in identities if container in (
148 context.get('name'),
149 try_call(lambda: re.fullmatch(r'userContext([^\.]+)\.label', context['l10nID']).group())
150 )), None)
151 if not isinstance(container_id, int):
152 raise ValueError(f'could not find firefox container "{container}" in containers.json')
153
154 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
155 cursor = None
156 try:
157 cursor = _open_database_copy(cookie_database_path, tmpdir)
158 if isinstance(container_id, int):
159 logger.debug(
160 f'Only loading cookies from firefox container "{container}", ID {container_id}')
161 cursor.execute(
162 'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE originAttributes LIKE ? OR originAttributes LIKE ?',
163 (f'%userContextId={container_id}', f'%userContextId={container_id}&%'))
164 elif container == 'none':
165 logger.debug('Only loading cookies not belonging to any container')
166 cursor.execute(
167 'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE NOT INSTR(originAttributes,"userContextId=")')
168 else:
169 cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
170 jar = YoutubeDLCookieJar()
171 with _create_progress_bar(logger) as progress_bar:
172 table = cursor.fetchall()
173 total_cookie_count = len(table)
174 for i, (host, name, value, path, expiry, is_secure) in enumerate(table):
175 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
176 cookie = http.cookiejar.Cookie(
177 version=0, name=name, value=value, port=None, port_specified=False,
178 domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'),
179 path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False,
180 comment=None, comment_url=None, rest={})
181 jar.set_cookie(cookie)
182 logger.info(f'Extracted {len(jar)} cookies from firefox')
183 return jar
184 finally:
185 if cursor is not None:
186 cursor.connection.close()
187
188
189 def _firefox_browser_dirs():
190 if sys.platform in ('cygwin', 'win32'):
191 yield os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles')
192
193 elif sys.platform == 'darwin':
194 yield os.path.expanduser('~/Library/Application Support/Firefox/Profiles')
195
196 else:
197 yield from map(os.path.expanduser, (
198 '~/.mozilla/firefox',
199 '~/snap/firefox/common/.mozilla/firefox',
200 '~/.var/app/org.mozilla.firefox/.mozilla/firefox',
201 ))
202
203
204 def _firefox_cookie_dbs(roots):
205 for root in map(os.path.abspath, roots):
206 for pattern in ('', '*/', 'Profiles/*/'):
207 yield from glob.iglob(os.path.join(root, pattern, 'cookies.sqlite'))
208
209
210 def _get_chromium_based_browser_settings(browser_name):
211 # https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md
212 if sys.platform in ('cygwin', 'win32'):
213 appdata_local = os.path.expandvars('%LOCALAPPDATA%')
214 appdata_roaming = os.path.expandvars('%APPDATA%')
215 browser_dir = {
216 'brave': os.path.join(appdata_local, R'BraveSoftware\Brave-Browser\User Data'),
217 'chrome': os.path.join(appdata_local, R'Google\Chrome\User Data'),
218 'chromium': os.path.join(appdata_local, R'Chromium\User Data'),
219 'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
220 'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
221 'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
222 'whale': os.path.join(appdata_local, R'Naver\Naver Whale\User Data'),
223 }[browser_name]
224
225 elif sys.platform == 'darwin':
226 appdata = os.path.expanduser('~/Library/Application Support')
227 browser_dir = {
228 'brave': os.path.join(appdata, 'BraveSoftware/Brave-Browser'),
229 'chrome': os.path.join(appdata, 'Google/Chrome'),
230 'chromium': os.path.join(appdata, 'Chromium'),
231 'edge': os.path.join(appdata, 'Microsoft Edge'),
232 'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
233 'vivaldi': os.path.join(appdata, 'Vivaldi'),
234 'whale': os.path.join(appdata, 'Naver/Whale'),
235 }[browser_name]
236
237 else:
238 config = _config_home()
239 browser_dir = {
240 'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'),
241 'chrome': os.path.join(config, 'google-chrome'),
242 'chromium': os.path.join(config, 'chromium'),
243 'edge': os.path.join(config, 'microsoft-edge'),
244 'opera': os.path.join(config, 'opera'),
245 'vivaldi': os.path.join(config, 'vivaldi'),
246 'whale': os.path.join(config, 'naver-whale'),
247 }[browser_name]
248
249 # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
250 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
251 keyring_name = {
252 'brave': 'Brave',
253 'chrome': 'Chrome',
254 'chromium': 'Chromium',
255 'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
256 'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
257 'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
258 'whale': 'Whale',
259 }[browser_name]
260
261 browsers_without_profiles = {'opera'}
262
263 return {
264 'browser_dir': browser_dir,
265 'keyring_name': keyring_name,
266 'supports_profiles': browser_name not in browsers_without_profiles
267 }
268
269
270 def _extract_chrome_cookies(browser_name, profile, keyring, logger):
271 logger.info(f'Extracting cookies from {browser_name}')
272
273 if not sqlite3:
274 logger.warning(f'Cannot extract cookies from {browser_name} without sqlite3 support. '
275 'Please use a Python interpreter compiled with sqlite3 support')
276 return YoutubeDLCookieJar()
277
278 config = _get_chromium_based_browser_settings(browser_name)
279
280 if profile is None:
281 search_root = config['browser_dir']
282 elif _is_path(profile):
283 search_root = profile
284 config['browser_dir'] = os.path.dirname(profile) if config['supports_profiles'] else profile
285 else:
286 if config['supports_profiles']:
287 search_root = os.path.join(config['browser_dir'], profile)
288 else:
289 logger.error(f'{browser_name} does not support profiles')
290 search_root = config['browser_dir']
291
292 cookie_database_path = _newest(_find_files(search_root, 'Cookies', logger))
293 if cookie_database_path is None:
294 raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"')
295 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
296
297 decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring)
298
299 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
300 cursor = None
301 try:
302 cursor = _open_database_copy(cookie_database_path, tmpdir)
303 cursor.connection.text_factory = bytes
304 column_names = _get_column_names(cursor, 'cookies')
305 secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
306 cursor.execute(f'SELECT host_key, name, value, encrypted_value, path, expires_utc, {secure_column} FROM cookies')
307 jar = YoutubeDLCookieJar()
308 failed_cookies = 0
309 unencrypted_cookies = 0
310 with _create_progress_bar(logger) as progress_bar:
311 table = cursor.fetchall()
312 total_cookie_count = len(table)
313 for i, line in enumerate(table):
314 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
315 is_encrypted, cookie = _process_chrome_cookie(decryptor, *line)
316 if not cookie:
317 failed_cookies += 1
318 continue
319 elif not is_encrypted:
320 unencrypted_cookies += 1
321 jar.set_cookie(cookie)
322 if failed_cookies > 0:
323 failed_message = f' ({failed_cookies} could not be decrypted)'
324 else:
325 failed_message = ''
326 logger.info(f'Extracted {len(jar)} cookies from {browser_name}{failed_message}')
327 counts = decryptor._cookie_counts.copy()
328 counts['unencrypted'] = unencrypted_cookies
329 logger.debug(f'cookie version breakdown: {counts}')
330 return jar
331 except PermissionError as error:
332 if compat_os_name == 'nt' and error.errno == 13:
333 message = 'Could not copy Chrome cookie database. See https://github.com/yt-dlp/yt-dlp/issues/7271 for more info'
334 logger.error(message)
335 raise DownloadError(message) # force exit
336 raise
337 finally:
338 if cursor is not None:
339 cursor.connection.close()
340
341
342 def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, path, expires_utc, is_secure):
343 host_key = host_key.decode()
344 name = name.decode()
345 value = value.decode()
346 path = path.decode()
347 is_encrypted = not value and encrypted_value
348
349 if is_encrypted:
350 value = decryptor.decrypt(encrypted_value)
351 if value is None:
352 return is_encrypted, None
353
354 # In chrome, session cookies have expires_utc set to 0
355 # In our cookie-store, cookies that do not expire should have expires set to None
356 if not expires_utc:
357 expires_utc = None
358
359 return is_encrypted, http.cookiejar.Cookie(
360 version=0, name=name, value=value, port=None, port_specified=False,
361 domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
362 path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False,
363 comment=None, comment_url=None, rest={})
364
365
366 class ChromeCookieDecryptor:
367 """
368 Overview:
369
370 Linux:
371 - cookies are either v10 or v11
372 - v10: AES-CBC encrypted with a fixed key
373 - also attempts empty password if decryption fails
374 - v11: AES-CBC encrypted with an OS protected key (keyring)
375 - also attempts empty password if decryption fails
376 - v11 keys can be stored in various places depending on the activate desktop environment [2]
377
378 Mac:
379 - cookies are either v10 or not v10
380 - v10: AES-CBC encrypted with an OS protected key (keyring) and more key derivation iterations than linux
381 - not v10: 'old data' stored as plaintext
382
383 Windows:
384 - cookies are either v10 or not v10
385 - v10: AES-GCM encrypted with a key which is encrypted with DPAPI
386 - not v10: encrypted with DPAPI
387
388 Sources:
389 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/
390 - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_linux.cc
391 - KeyStorageLinux::CreateService
392 """
393
394 _cookie_counts = {}
395
396 def decrypt(self, encrypted_value):
397 raise NotImplementedError('Must be implemented by sub classes')
398
399
400 def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None):
401 if sys.platform == 'darwin':
402 return MacChromeCookieDecryptor(browser_keyring_name, logger)
403 elif sys.platform in ('win32', 'cygwin'):
404 return WindowsChromeCookieDecryptor(browser_root, logger)
405 return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring)
406
407
408 class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
409 def __init__(self, browser_keyring_name, logger, *, keyring=None):
410 self._logger = logger
411 self._v10_key = self.derive_key(b'peanuts')
412 self._empty_key = self.derive_key(b'')
413 self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
414 self._browser_keyring_name = browser_keyring_name
415 self._keyring = keyring
416
417 @functools.cached_property
418 def _v11_key(self):
419 password = _get_linux_keyring_password(self._browser_keyring_name, self._keyring, self._logger)
420 return None if password is None else self.derive_key(password)
421
422 @staticmethod
423 def derive_key(password):
424 # values from
425 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_linux.cc
426 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16)
427
428 def decrypt(self, encrypted_value):
429 """
430
431 following the same approach as the fix in [1]: if cookies fail to decrypt then attempt to decrypt
432 with an empty password. The failure detection is not the same as what chromium uses so the
433 results won't be perfect
434
435 References:
436 - [1] https://chromium.googlesource.com/chromium/src/+/bbd54702284caca1f92d656fdcadf2ccca6f4165%5E%21/
437 - a bugfix to try an empty password as a fallback
438 """
439 version = encrypted_value[:3]
440 ciphertext = encrypted_value[3:]
441
442 if version == b'v10':
443 self._cookie_counts['v10'] += 1
444 return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key, self._empty_key), self._logger)
445
446 elif version == b'v11':
447 self._cookie_counts['v11'] += 1
448 if self._v11_key is None:
449 self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
450 return None
451 return _decrypt_aes_cbc_multi(ciphertext, (self._v11_key, self._empty_key), self._logger)
452
453 else:
454 self._logger.warning(f'unknown cookie version: "{version}"', only_once=True)
455 self._cookie_counts['other'] += 1
456 return None
457
458
459 class MacChromeCookieDecryptor(ChromeCookieDecryptor):
460 def __init__(self, browser_keyring_name, logger):
461 self._logger = logger
462 password = _get_mac_keyring_password(browser_keyring_name, logger)
463 self._v10_key = None if password is None else self.derive_key(password)
464 self._cookie_counts = {'v10': 0, 'other': 0}
465
466 @staticmethod
467 def derive_key(password):
468 # values from
469 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_mac.mm
470 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16)
471
472 def decrypt(self, encrypted_value):
473 version = encrypted_value[:3]
474 ciphertext = encrypted_value[3:]
475
476 if version == b'v10':
477 self._cookie_counts['v10'] += 1
478 if self._v10_key is None:
479 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
480 return None
481
482 return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key,), self._logger)
483
484 else:
485 self._cookie_counts['other'] += 1
486 # other prefixes are considered 'old data' which were stored as plaintext
487 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_mac.mm
488 return encrypted_value
489
490
491 class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
492 def __init__(self, browser_root, logger):
493 self._logger = logger
494 self._v10_key = _get_windows_v10_key(browser_root, logger)
495 self._cookie_counts = {'v10': 0, 'other': 0}
496
497 def decrypt(self, encrypted_value):
498 version = encrypted_value[:3]
499 ciphertext = encrypted_value[3:]
500
501 if version == b'v10':
502 self._cookie_counts['v10'] += 1
503 if self._v10_key is None:
504 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
505 return None
506
507 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
508 # kNonceLength
509 nonce_length = 96 // 8
510 # boringssl
511 # EVP_AEAD_AES_GCM_TAG_LEN
512 authentication_tag_length = 16
513
514 raw_ciphertext = ciphertext
515 nonce = raw_ciphertext[:nonce_length]
516 ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length]
517 authentication_tag = raw_ciphertext[-authentication_tag_length:]
518
519 return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger)
520
521 else:
522 self._cookie_counts['other'] += 1
523 # any other prefix means the data is DPAPI encrypted
524 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
525 return _decrypt_windows_dpapi(encrypted_value, self._logger).decode()
526
527
528 def _extract_safari_cookies(profile, logger):
529 if sys.platform != 'darwin':
530 raise ValueError(f'unsupported platform: {sys.platform}')
531
532 if profile:
533 cookies_path = os.path.expanduser(profile)
534 if not os.path.isfile(cookies_path):
535 raise FileNotFoundError('custom safari cookies database not found')
536
537 else:
538 cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')
539
540 if not os.path.isfile(cookies_path):
541 logger.debug('Trying secondary cookie location')
542 cookies_path = os.path.expanduser('~/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies')
543 if not os.path.isfile(cookies_path):
544 raise FileNotFoundError('could not find safari cookies database')
545
546 with open(cookies_path, 'rb') as f:
547 cookies_data = f.read()
548
549 jar = parse_safari_cookies(cookies_data, logger=logger)
550 logger.info(f'Extracted {len(jar)} cookies from safari')
551 return jar
552
553
554 class ParserError(Exception):
555 pass
556
557
558 class DataParser:
559 def __init__(self, data, logger):
560 self._data = data
561 self.cursor = 0
562 self._logger = logger
563
564 def read_bytes(self, num_bytes):
565 if num_bytes < 0:
566 raise ParserError(f'invalid read of {num_bytes} bytes')
567 end = self.cursor + num_bytes
568 if end > len(self._data):
569 raise ParserError('reached end of input')
570 data = self._data[self.cursor:end]
571 self.cursor = end
572 return data
573
574 def expect_bytes(self, expected_value, message):
575 value = self.read_bytes(len(expected_value))
576 if value != expected_value:
577 raise ParserError(f'unexpected value: {value} != {expected_value} ({message})')
578
579 def read_uint(self, big_endian=False):
580 data_format = '>I' if big_endian else '<I'
581 return struct.unpack(data_format, self.read_bytes(4))[0]
582
583 def read_double(self, big_endian=False):
584 data_format = '>d' if big_endian else '<d'
585 return struct.unpack(data_format, self.read_bytes(8))[0]
586
587 def read_cstring(self):
588 buffer = []
589 while True:
590 c = self.read_bytes(1)
591 if c == b'\x00':
592 return b''.join(buffer).decode()
593 else:
594 buffer.append(c)
595
596 def skip(self, num_bytes, description='unknown'):
597 if num_bytes > 0:
598 self._logger.debug(f'skipping {num_bytes} bytes ({description}): {self.read_bytes(num_bytes)!r}')
599 elif num_bytes < 0:
600 raise ParserError(f'invalid skip of {num_bytes} bytes')
601
602 def skip_to(self, offset, description='unknown'):
603 self.skip(offset - self.cursor, description)
604
605 def skip_to_end(self, description='unknown'):
606 self.skip_to(len(self._data), description)
607
608
609 def _mac_absolute_time_to_posix(timestamp):
610 return int((dt.datetime(2001, 1, 1, 0, 0, tzinfo=dt.timezone.utc) + dt.timedelta(seconds=timestamp)).timestamp())
611
612
613 def _parse_safari_cookies_header(data, logger):
614 p = DataParser(data, logger)
615 p.expect_bytes(b'cook', 'database signature')
616 number_of_pages = p.read_uint(big_endian=True)
617 page_sizes = [p.read_uint(big_endian=True) for _ in range(number_of_pages)]
618 return page_sizes, p.cursor
619
620
621 def _parse_safari_cookies_page(data, jar, logger):
622 p = DataParser(data, logger)
623 p.expect_bytes(b'\x00\x00\x01\x00', 'page signature')
624 number_of_cookies = p.read_uint()
625 record_offsets = [p.read_uint() for _ in range(number_of_cookies)]
626 if number_of_cookies == 0:
627 logger.debug(f'a cookies page of size {len(data)} has no cookies')
628 return
629
630 p.skip_to(record_offsets[0], 'unknown page header field')
631
632 with _create_progress_bar(logger) as progress_bar:
633 for i, record_offset in enumerate(record_offsets):
634 progress_bar.print(f'Loading cookie {i: 6d}/{number_of_cookies: 6d}')
635 p.skip_to(record_offset, 'space between records')
636 record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger)
637 p.read_bytes(record_length)
638 p.skip_to_end('space in between pages')
639
640
641 def _parse_safari_cookies_record(data, jar, logger):
642 p = DataParser(data, logger)
643 record_size = p.read_uint()
644 p.skip(4, 'unknown record field 1')
645 flags = p.read_uint()
646 is_secure = bool(flags & 0x0001)
647 p.skip(4, 'unknown record field 2')
648 domain_offset = p.read_uint()
649 name_offset = p.read_uint()
650 path_offset = p.read_uint()
651 value_offset = p.read_uint()
652 p.skip(8, 'unknown record field 3')
653 expiration_date = _mac_absolute_time_to_posix(p.read_double())
654 _creation_date = _mac_absolute_time_to_posix(p.read_double()) # noqa: F841
655
656 try:
657 p.skip_to(domain_offset)
658 domain = p.read_cstring()
659
660 p.skip_to(name_offset)
661 name = p.read_cstring()
662
663 p.skip_to(path_offset)
664 path = p.read_cstring()
665
666 p.skip_to(value_offset)
667 value = p.read_cstring()
668 except UnicodeDecodeError:
669 logger.warning('failed to parse Safari cookie because UTF-8 decoding failed', only_once=True)
670 return record_size
671
672 p.skip_to(record_size, 'space at the end of the record')
673
674 cookie = http.cookiejar.Cookie(
675 version=0, name=name, value=value, port=None, port_specified=False,
676 domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'),
677 path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False,
678 comment=None, comment_url=None, rest={})
679 jar.set_cookie(cookie)
680 return record_size
681
682
683 def parse_safari_cookies(data, jar=None, logger=YDLLogger()):
684 """
685 References:
686 - https://github.com/libyal/dtformats/blob/main/documentation/Safari%20Cookies.asciidoc
687 - this data appears to be out of date but the important parts of the database structure is the same
688 - there are a few bytes here and there which are skipped during parsing
689 """
690 if jar is None:
691 jar = YoutubeDLCookieJar()
692 page_sizes, body_start = _parse_safari_cookies_header(data, logger)
693 p = DataParser(data[body_start:], logger)
694 for page_size in page_sizes:
695 _parse_safari_cookies_page(p.read_bytes(page_size), jar, logger)
696 p.skip_to_end('footer')
697 return jar
698
699
700 class _LinuxDesktopEnvironment(Enum):
701 """
702 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.h
703 DesktopEnvironment
704 """
705 OTHER = auto()
706 CINNAMON = auto()
707 DEEPIN = auto()
708 GNOME = auto()
709 KDE3 = auto()
710 KDE4 = auto()
711 KDE5 = auto()
712 KDE6 = auto()
713 PANTHEON = auto()
714 UKUI = auto()
715 UNITY = auto()
716 XFCE = auto()
717 LXQT = auto()
718
719
720 class _LinuxKeyring(Enum):
721 """
722 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_util_linux.h
723 SelectedLinuxBackend
724 """
725 KWALLET = auto() # KDE4
726 KWALLET5 = auto()
727 KWALLET6 = auto()
728 GNOMEKEYRING = auto()
729 BASICTEXT = auto()
730
731
732 SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys()
733
734
735 def _get_linux_desktop_environment(env, logger):
736 """
737 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc
738 GetDesktopEnvironment
739 """
740 xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
741 desktop_session = env.get('DESKTOP_SESSION', None)
742 if xdg_current_desktop is not None:
743 xdg_current_desktop = xdg_current_desktop.split(':')[0].strip()
744
745 if xdg_current_desktop == 'Unity':
746 if desktop_session is not None and 'gnome-fallback' in desktop_session:
747 return _LinuxDesktopEnvironment.GNOME
748 else:
749 return _LinuxDesktopEnvironment.UNITY
750 elif xdg_current_desktop == 'Deepin':
751 return _LinuxDesktopEnvironment.DEEPIN
752 elif xdg_current_desktop == 'GNOME':
753 return _LinuxDesktopEnvironment.GNOME
754 elif xdg_current_desktop == 'X-Cinnamon':
755 return _LinuxDesktopEnvironment.CINNAMON
756 elif xdg_current_desktop == 'KDE':
757 kde_version = env.get('KDE_SESSION_VERSION', None)
758 if kde_version == '5':
759 return _LinuxDesktopEnvironment.KDE5
760 elif kde_version == '6':
761 return _LinuxDesktopEnvironment.KDE6
762 elif kde_version == '4':
763 return _LinuxDesktopEnvironment.KDE4
764 else:
765 logger.info(f'unknown KDE version: "{kde_version}". Assuming KDE4')
766 return _LinuxDesktopEnvironment.KDE4
767 elif xdg_current_desktop == 'Pantheon':
768 return _LinuxDesktopEnvironment.PANTHEON
769 elif xdg_current_desktop == 'XFCE':
770 return _LinuxDesktopEnvironment.XFCE
771 elif xdg_current_desktop == 'UKUI':
772 return _LinuxDesktopEnvironment.UKUI
773 elif xdg_current_desktop == 'LXQt':
774 return _LinuxDesktopEnvironment.LXQT
775 else:
776 logger.info(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"')
777
778 elif desktop_session is not None:
779 if desktop_session == 'deepin':
780 return _LinuxDesktopEnvironment.DEEPIN
781 elif desktop_session in ('mate', 'gnome'):
782 return _LinuxDesktopEnvironment.GNOME
783 elif desktop_session in ('kde4', 'kde-plasma'):
784 return _LinuxDesktopEnvironment.KDE4
785 elif desktop_session == 'kde':
786 if 'KDE_SESSION_VERSION' in env:
787 return _LinuxDesktopEnvironment.KDE4
788 else:
789 return _LinuxDesktopEnvironment.KDE3
790 elif 'xfce' in desktop_session or desktop_session == 'xubuntu':
791 return _LinuxDesktopEnvironment.XFCE
792 elif desktop_session == 'ukui':
793 return _LinuxDesktopEnvironment.UKUI
794 else:
795 logger.info(f'DESKTOP_SESSION is set to an unknown value: "{desktop_session}"')
796
797 else:
798 if 'GNOME_DESKTOP_SESSION_ID' in env:
799 return _LinuxDesktopEnvironment.GNOME
800 elif 'KDE_FULL_SESSION' in env:
801 if 'KDE_SESSION_VERSION' in env:
802 return _LinuxDesktopEnvironment.KDE4
803 else:
804 return _LinuxDesktopEnvironment.KDE3
805 return _LinuxDesktopEnvironment.OTHER
806
807
808 def _choose_linux_keyring(logger):
809 """
810 SelectBackend in [1]
811
812 There is currently support for forcing chromium to use BASIC_TEXT by creating a file called
813 `Disable Local Encryption` [1] in the user data dir. The function to write this file (`WriteBackendUse()` [1])
814 does not appear to be called anywhere other than in tests, so the user would have to create this file manually
815 and so would be aware enough to tell yt-dlp to use the BASIC_TEXT keyring.
816
817 References:
818 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_util_linux.cc
819 """
820 desktop_environment = _get_linux_desktop_environment(os.environ, logger)
821 logger.debug(f'detected desktop environment: {desktop_environment.name}')
822 if desktop_environment == _LinuxDesktopEnvironment.KDE4:
823 linux_keyring = _LinuxKeyring.KWALLET
824 elif desktop_environment == _LinuxDesktopEnvironment.KDE5:
825 linux_keyring = _LinuxKeyring.KWALLET5
826 elif desktop_environment == _LinuxDesktopEnvironment.KDE6:
827 linux_keyring = _LinuxKeyring.KWALLET6
828 elif desktop_environment in (
829 _LinuxDesktopEnvironment.KDE3, _LinuxDesktopEnvironment.LXQT, _LinuxDesktopEnvironment.OTHER
830 ):
831 linux_keyring = _LinuxKeyring.BASICTEXT
832 else:
833 linux_keyring = _LinuxKeyring.GNOMEKEYRING
834 return linux_keyring
835
836
837 def _get_kwallet_network_wallet(keyring, logger):
838 """ The name of the wallet used to store network passwords.
839
840 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/kwallet_dbus.cc
841 KWalletDBus::NetworkWallet
842 which does a dbus call to the following function:
843 https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html
844 Wallet::NetworkWallet
845 """
846 default_wallet = 'kdewallet'
847 try:
848 if keyring == _LinuxKeyring.KWALLET:
849 service_name = 'org.kde.kwalletd'
850 wallet_path = '/modules/kwalletd'
851 elif keyring == _LinuxKeyring.KWALLET5:
852 service_name = 'org.kde.kwalletd5'
853 wallet_path = '/modules/kwalletd5'
854 elif keyring == _LinuxKeyring.KWALLET6:
855 service_name = 'org.kde.kwalletd6'
856 wallet_path = '/modules/kwalletd6'
857 else:
858 raise ValueError(keyring)
859
860 stdout, _, returncode = Popen.run([
861 'dbus-send', '--session', '--print-reply=literal',
862 f'--dest={service_name}',
863 wallet_path,
864 'org.kde.KWallet.networkWallet'
865 ], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
866
867 if returncode:
868 logger.warning('failed to read NetworkWallet')
869 return default_wallet
870 else:
871 logger.debug(f'NetworkWallet = "{stdout.strip()}"')
872 return stdout.strip()
873 except Exception as e:
874 logger.warning(f'exception while obtaining NetworkWallet: {e}')
875 return default_wallet
876
877
878 def _get_kwallet_password(browser_keyring_name, keyring, logger):
879 logger.debug(f'using kwallet-query to obtain password from {keyring.name}')
880
881 if shutil.which('kwallet-query') is None:
882 logger.error('kwallet-query command not found. KWallet and kwallet-query '
883 'must be installed to read from KWallet. kwallet-query should be'
884 'included in the kwallet package for your distribution')
885 return b''
886
887 network_wallet = _get_kwallet_network_wallet(keyring, logger)
888
889 try:
890 stdout, _, returncode = Popen.run([
891 'kwallet-query',
892 '--read-password', f'{browser_keyring_name} Safe Storage',
893 '--folder', f'{browser_keyring_name} Keys',
894 network_wallet
895 ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
896
897 if returncode:
898 logger.error(f'kwallet-query failed with return code {returncode}. '
899 'Please consult the kwallet-query man page for details')
900 return b''
901 else:
902 if stdout.lower().startswith(b'failed to read'):
903 logger.debug('failed to read password from kwallet. Using empty string instead')
904 # this sometimes occurs in KDE because chrome does not check hasEntry and instead
905 # just tries to read the value (which kwallet returns "") whereas kwallet-query
906 # checks hasEntry. To verify this:
907 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
908 # while starting chrome.
909 # this was identified as a bug later and fixed in
910 # https://chromium.googlesource.com/chromium/src/+/bbd54702284caca1f92d656fdcadf2ccca6f4165%5E%21/#F0
911 # https://chromium.googlesource.com/chromium/src/+/5463af3c39d7f5b6d11db7fbd51e38cc1974d764
912 return b''
913 else:
914 logger.debug('password found')
915 return stdout.rstrip(b'\n')
916 except Exception as e:
917 logger.warning(f'exception running kwallet-query: {error_to_str(e)}')
918 return b''
919
920
921 def _get_gnome_keyring_password(browser_keyring_name, logger):
922 if not secretstorage:
923 logger.error(f'secretstorage not available {_SECRETSTORAGE_UNAVAILABLE_REASON}')
924 return b''
925 # the Gnome keyring does not seem to organise keys in the same way as KWallet,
926 # using `dbus-monitor` during startup, it can be observed that chromium lists all keys
927 # and presumably searches for its key in the list. It appears that we must do the same.
928 # https://github.com/jaraco/keyring/issues/556
929 with contextlib.closing(secretstorage.dbus_init()) as con:
930 col = secretstorage.get_default_collection(con)
931 for item in col.get_all_items():
932 if item.get_label() == f'{browser_keyring_name} Safe Storage':
933 return item.get_secret()
934 else:
935 logger.error('failed to read from keyring')
936 return b''
937
938
939 def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
940 # note: chrome/chromium can be run with the following flags to determine which keyring backend
941 # it has chosen to use
942 # chromium --enable-logging=stderr --v=1 2>&1 | grep key_storage_
943 # Chromium supports a flag: --password-store=<basic|gnome|kwallet> so the automatic detection
944 # will not be sufficient in all cases.
945
946 keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger)
947 logger.debug(f'Chosen keyring: {keyring.name}')
948
949 if keyring in (_LinuxKeyring.KWALLET, _LinuxKeyring.KWALLET5, _LinuxKeyring.KWALLET6):
950 return _get_kwallet_password(browser_keyring_name, keyring, logger)
951 elif keyring == _LinuxKeyring.GNOMEKEYRING:
952 return _get_gnome_keyring_password(browser_keyring_name, logger)
953 elif keyring == _LinuxKeyring.BASICTEXT:
954 # when basic text is chosen, all cookies are stored as v10 (so no keyring password is required)
955 return None
956 assert False, f'Unknown keyring {keyring}'
957
958
959 def _get_mac_keyring_password(browser_keyring_name, logger):
960 logger.debug('using find-generic-password to obtain password from OSX keychain')
961 try:
962 stdout, _, returncode = Popen.run(
963 ['security', 'find-generic-password',
964 '-w', # write password to stdout
965 '-a', browser_keyring_name, # match 'account'
966 '-s', f'{browser_keyring_name} Safe Storage'], # match 'service'
967 stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
968 if returncode:
969 logger.warning('find-generic-password failed')
970 return None
971 return stdout.rstrip(b'\n')
972 except Exception as e:
973 logger.warning(f'exception running find-generic-password: {error_to_str(e)}')
974 return None
975
976
977 def _get_windows_v10_key(browser_root, logger):
978 """
979 References:
980 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
981 """
982 path = _newest(_find_files(browser_root, 'Local State', logger))
983 if path is None:
984 logger.error('could not find local state file')
985 return None
986 logger.debug(f'Found local state file at "{path}"')
987 with open(path, encoding='utf8') as f:
988 data = json.load(f)
989 try:
990 # kOsCryptEncryptedKeyPrefName in [1]
991 base64_key = data['os_crypt']['encrypted_key']
992 except KeyError:
993 logger.error('no encrypted key in Local State')
994 return None
995 encrypted_key = base64.b64decode(base64_key)
996 # kDPAPIKeyPrefix in [1]
997 prefix = b'DPAPI'
998 if not encrypted_key.startswith(prefix):
999 logger.error('invalid key')
1000 return None
1001 return _decrypt_windows_dpapi(encrypted_key[len(prefix):], logger)
1002
1003
1004 def pbkdf2_sha1(password, salt, iterations, key_length):
1005 return pbkdf2_hmac('sha1', password, salt, iterations, key_length)
1006
1007
1008 def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' ' * 16):
1009 for key in keys:
1010 plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
1011 try:
1012 return plaintext.decode()
1013 except UnicodeDecodeError:
1014 pass
1015 logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
1016 return None
1017
1018
1019 def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
1020 try:
1021 plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce)
1022 except ValueError:
1023 logger.warning('failed to decrypt cookie (AES-GCM) because the MAC check failed. Possibly the key is wrong?', only_once=True)
1024 return None
1025
1026 try:
1027 return plaintext.decode()
1028 except UnicodeDecodeError:
1029 logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
1030 return None
1031
1032
1033 def _decrypt_windows_dpapi(ciphertext, logger):
1034 """
1035 References:
1036 - https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata
1037 """
1038
1039 import ctypes
1040 import ctypes.wintypes
1041
1042 class DATA_BLOB(ctypes.Structure):
1043 _fields_ = [('cbData', ctypes.wintypes.DWORD),
1044 ('pbData', ctypes.POINTER(ctypes.c_char))]
1045
1046 buffer = ctypes.create_string_buffer(ciphertext)
1047 blob_in = DATA_BLOB(ctypes.sizeof(buffer), buffer)
1048 blob_out = DATA_BLOB()
1049 ret = ctypes.windll.crypt32.CryptUnprotectData(
1050 ctypes.byref(blob_in), # pDataIn
1051 None, # ppszDataDescr: human readable description of pDataIn
1052 None, # pOptionalEntropy: salt?
1053 None, # pvReserved: must be NULL
1054 None, # pPromptStruct: information about prompts to display
1055 0, # dwFlags
1056 ctypes.byref(blob_out) # pDataOut
1057 )
1058 if not ret:
1059 logger.warning('failed to decrypt with DPAPI', only_once=True)
1060 return None
1061
1062 result = ctypes.string_at(blob_out.pbData, blob_out.cbData)
1063 ctypes.windll.kernel32.LocalFree(blob_out.pbData)
1064 return result
1065
1066
1067 def _config_home():
1068 return os.environ.get('XDG_CONFIG_HOME', os.path.expanduser('~/.config'))
1069
1070
1071 def _open_database_copy(database_path, tmpdir):
1072 # cannot open sqlite databases if they are already in use (e.g. by the browser)
1073 database_copy_path = os.path.join(tmpdir, 'temporary.sqlite')
1074 shutil.copy(database_path, database_copy_path)
1075 conn = sqlite3.connect(database_copy_path)
1076 return conn.cursor()
1077
1078
1079 def _get_column_names(cursor, table_name):
1080 table_info = cursor.execute(f'PRAGMA table_info({table_name})').fetchall()
1081 return [row[1].decode() for row in table_info]
1082
1083
1084 def _newest(files):
1085 return max(files, key=lambda path: os.lstat(path).st_mtime, default=None)
1086
1087
1088 def _find_files(root, filename, logger):
1089 # if there are multiple browser profiles, take the most recently used one
1090 i = 0
1091 with _create_progress_bar(logger) as progress_bar:
1092 for curr_root, _, files in os.walk(root):
1093 for file in files:
1094 i += 1
1095 progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched')
1096 if file == filename:
1097 yield os.path.join(curr_root, file)
1098
1099
1100 def _merge_cookie_jars(jars):
1101 output_jar = YoutubeDLCookieJar()
1102 for jar in jars:
1103 for cookie in jar:
1104 output_jar.set_cookie(cookie)
1105 if jar.filename is not None:
1106 output_jar.filename = jar.filename
1107 return output_jar
1108
1109
1110 def _is_path(value):
1111 return any(sep in value for sep in (os.path.sep, os.path.altsep) if sep)
1112
1113
1114 def _parse_browser_specification(browser_name, profile=None, keyring=None, container=None):
1115 if browser_name not in SUPPORTED_BROWSERS:
1116 raise ValueError(f'unsupported browser: "{browser_name}"')
1117 if keyring not in (None, *SUPPORTED_KEYRINGS):
1118 raise ValueError(f'unsupported keyring: "{keyring}"')
1119 if profile is not None and _is_path(expand_path(profile)):
1120 profile = expand_path(profile)
1121 return browser_name, profile, keyring, container
1122
1123
1124 class LenientSimpleCookie(http.cookies.SimpleCookie):
1125 """More lenient version of http.cookies.SimpleCookie"""
1126 # From https://github.com/python/cpython/blob/v3.10.7/Lib/http/cookies.py
1127 # We use Morsel's legal key chars to avoid errors on setting values
1128 _LEGAL_KEY_CHARS = r'\w\d' + re.escape('!#$%&\'*+-.:^_`|~')
1129 _LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + re.escape('(),/<=>?@[]{}')
1130
1131 _RESERVED = {
1132 "expires",
1133 "path",
1134 "comment",
1135 "domain",
1136 "max-age",
1137 "secure",
1138 "httponly",
1139 "version",
1140 "samesite",
1141 }
1142
1143 _FLAGS = {"secure", "httponly"}
1144
1145 # Added 'bad' group to catch the remaining value
1146 _COOKIE_PATTERN = re.compile(r"""
1147 \s* # Optional whitespace at start of cookie
1148 (?P<key> # Start of group 'key'
1149 [""" + _LEGAL_KEY_CHARS + r"""]+?# Any word of at least one letter
1150 ) # End of group 'key'
1151 ( # Optional group: there may not be a value.
1152 \s*=\s* # Equal Sign
1153 ( # Start of potential value
1154 (?P<val> # Start of group 'val'
1155 "(?:[^\\"]|\\.)*" # Any doublequoted string
1156 | # or
1157 \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr
1158 | # or
1159 [""" + _LEGAL_VALUE_CHARS + r"""]* # Any word or empty string
1160 ) # End of group 'val'
1161 | # or
1162 (?P<bad>(?:\\;|[^;])*?) # 'bad' group fallback for invalid values
1163 ) # End of potential value
1164 )? # End of optional value group
1165 \s* # Any number of spaces.
1166 (\s+|;|$) # Ending either at space, semicolon, or EOS.
1167 """, re.ASCII | re.VERBOSE)
1168
1169 def load(self, data):
1170 # Workaround for https://github.com/yt-dlp/yt-dlp/issues/4776
1171 if not isinstance(data, str):
1172 return super().load(data)
1173
1174 morsel = None
1175 for match in self._COOKIE_PATTERN.finditer(data):
1176 if match.group('bad'):
1177 morsel = None
1178 continue
1179
1180 key, value = match.group('key', 'val')
1181
1182 is_attribute = False
1183 if key.startswith('$'):
1184 key = key[1:]
1185 is_attribute = True
1186
1187 lower_key = key.lower()
1188 if lower_key in self._RESERVED:
1189 if morsel is None:
1190 continue
1191
1192 if value is None:
1193 if lower_key not in self._FLAGS:
1194 morsel = None
1195 continue
1196 value = True
1197 else:
1198 value, _ = self.value_decode(value)
1199
1200 morsel[key] = value
1201
1202 elif is_attribute:
1203 morsel = None
1204
1205 elif value is not None:
1206 morsel = self.get(key, http.cookies.Morsel())
1207 real_value, coded_value = self.value_decode(value)
1208 morsel.set(key, real_value, coded_value)
1209 self[key] = morsel
1210
1211 else:
1212 morsel = None
1213
1214
1215 class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
1216 """
1217 See [1] for cookie file format.
1218
1219 1. https://curl.haxx.se/docs/http-cookies.html
1220 """
1221 _HTTPONLY_PREFIX = '#HttpOnly_'
1222 _ENTRY_LEN = 7
1223 _HEADER = '''# Netscape HTTP Cookie File
1224 # This file is generated by yt-dlp. Do not edit.
1225
1226 '''
1227 _CookieFileEntry = collections.namedtuple(
1228 'CookieFileEntry',
1229 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
1230
1231 def __init__(self, filename=None, *args, **kwargs):
1232 super().__init__(None, *args, **kwargs)
1233 if is_path_like(filename):
1234 filename = os.fspath(filename)
1235 self.filename = filename
1236
1237 @staticmethod
1238 def _true_or_false(cndn):
1239 return 'TRUE' if cndn else 'FALSE'
1240
1241 @contextlib.contextmanager
1242 def open(self, file, *, write=False):
1243 if is_path_like(file):
1244 with open(file, 'w' if write else 'r', encoding='utf-8') as f:
1245 yield f
1246 else:
1247 if write:
1248 file.truncate(0)
1249 yield file
1250
1251 def _really_save(self, f, ignore_discard, ignore_expires):
1252 now = time.time()
1253 for cookie in self:
1254 if (not ignore_discard and cookie.discard
1255 or not ignore_expires and cookie.is_expired(now)):
1256 continue
1257 name, value = cookie.name, cookie.value
1258 if value is None:
1259 # cookies.txt regards 'Set-Cookie: foo' as a cookie
1260 # with no name, whereas http.cookiejar regards it as a
1261 # cookie with no value.
1262 name, value = '', name
1263 f.write('%s\n' % '\t'.join((
1264 cookie.domain,
1265 self._true_or_false(cookie.domain.startswith('.')),
1266 cookie.path,
1267 self._true_or_false(cookie.secure),
1268 str_or_none(cookie.expires, default=''),
1269 name, value
1270 )))
1271
1272 def save(self, filename=None, ignore_discard=True, ignore_expires=True):
1273 """
1274 Save cookies to a file.
1275 Code is taken from CPython 3.6
1276 https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """
1277
1278 if filename is None:
1279 if self.filename is not None:
1280 filename = self.filename
1281 else:
1282 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
1283
1284 # Store session cookies with `expires` set to 0 instead of an empty string
1285 for cookie in self:
1286 if cookie.expires is None:
1287 cookie.expires = 0
1288
1289 with self.open(filename, write=True) as f:
1290 f.write(self._HEADER)
1291 self._really_save(f, ignore_discard, ignore_expires)
1292
1293 def load(self, filename=None, ignore_discard=True, ignore_expires=True):
1294 """Load cookies from a file."""
1295 if filename is None:
1296 if self.filename is not None:
1297 filename = self.filename
1298 else:
1299 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
1300
1301 def prepare_line(line):
1302 if line.startswith(self._HTTPONLY_PREFIX):
1303 line = line[len(self._HTTPONLY_PREFIX):]
1304 # comments and empty lines are fine
1305 if line.startswith('#') or not line.strip():
1306 return line
1307 cookie_list = line.split('\t')
1308 if len(cookie_list) != self._ENTRY_LEN:
1309 raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list))
1310 cookie = self._CookieFileEntry(*cookie_list)
1311 if cookie.expires_at and not cookie.expires_at.isdigit():
1312 raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
1313 return line
1314
1315 cf = io.StringIO()
1316 with self.open(filename) as f:
1317 for line in f:
1318 try:
1319 cf.write(prepare_line(line))
1320 except http.cookiejar.LoadError as e:
1321 if f'{line.strip()} '[0] in '[{"':
1322 raise http.cookiejar.LoadError(
1323 'Cookies file must be Netscape formatted, not JSON. See '
1324 'https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp')
1325 write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
1326 continue
1327 cf.seek(0)
1328 self._really_load(cf, filename, ignore_discard, ignore_expires)
1329 # Session cookies are denoted by either `expires` field set to
1330 # an empty string or 0. MozillaCookieJar only recognizes the former
1331 # (see [1]). So we need force the latter to be recognized as session
1332 # cookies on our own.
1333 # Session cookies may be important for cookies-based authentication,
1334 # e.g. usually, when user does not check 'Remember me' check box while
1335 # logging in on a site, some important cookies are stored as session
1336 # cookies so that not recognizing them will result in failed login.
1337 # 1. https://bugs.python.org/issue17164
1338 for cookie in self:
1339 # Treat `expires=0` cookies as session cookies
1340 if cookie.expires == 0:
1341 cookie.expires = None
1342 cookie.discard = True
1343
1344 def get_cookie_header(self, url):
1345 """Generate a Cookie HTTP header for a given url"""
1346 cookie_req = urllib.request.Request(normalize_url(sanitize_url(url)))
1347 self.add_cookie_header(cookie_req)
1348 return cookie_req.get_header('Cookie')
1349
1350 def get_cookies_for_url(self, url):
1351 """Generate a list of Cookie objects for a given url"""
1352 # Policy `_now` attribute must be set before calling `_cookies_for_request`
1353 # Ref: https://github.com/python/cpython/blob/3.7/Lib/http/cookiejar.py#L1360
1354 self._policy._now = self._now = int(time.time())
1355 return self._cookies_for_request(urllib.request.Request(normalize_url(sanitize_url(url))))
1356
1357 def clear(self, *args, **kwargs):
1358 with contextlib.suppress(KeyError):
1359 return super().clear(*args, **kwargs)