]> jfr.im git - yt-dlp.git/blob - yt_dlp/cookies.py
80428c747bfaeb1f7ae6430b131e0ea4e5adb4ee
[yt-dlp.git] / yt_dlp / cookies.py
1 import base64
2 import collections
3 import contextlib
4 import http.cookiejar
5 import http.cookies
6 import io
7 import json
8 import os
9 import re
10 import shutil
11 import struct
12 import subprocess
13 import sys
14 import tempfile
15 import time
16 import urllib.request
17 from datetime import datetime, timedelta, timezone
18 from enum import Enum, auto
19 from hashlib import pbkdf2_hmac
20
21 from .aes import (
22 aes_cbc_decrypt_bytes,
23 aes_gcm_decrypt_and_verify_bytes,
24 unpad_pkcs7,
25 )
26 from .compat import functools
27 from .dependencies import (
28 _SECRETSTORAGE_UNAVAILABLE_REASON,
29 secretstorage,
30 sqlite3,
31 )
32 from .minicurses import MultilinePrinter, QuietMultilinePrinter
33 from .utils import (
34 Popen,
35 error_to_str,
36 escape_url,
37 expand_path,
38 is_path_like,
39 sanitize_url,
40 str_or_none,
41 try_call,
42 write_string,
43 )
44 from .utils._utils import _YDLLogger
45
46 CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
47 SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
48
49
50 class YDLLogger(_YDLLogger):
51 def warning(self, message, only_once=False): # compat
52 return super().warning(message, once=only_once)
53
54 class ProgressBar(MultilinePrinter):
55 _DELAY, _timer = 0.1, 0
56
57 def print(self, message):
58 if time.time() - self._timer > self._DELAY:
59 self.print_at_line(f'[Cookies] {message}', 0)
60 self._timer = time.time()
61
62 def progress_bar(self):
63 """Return a context manager with a print method. (Optional)"""
64 # Do not print to files/pipes, loggers, or when --no-progress is used
65 if not self._ydl or self._ydl.params.get('noprogress') or self._ydl.params.get('logger'):
66 return
67 file = self._ydl._out_files.error
68 try:
69 if not file.isatty():
70 return
71 except BaseException:
72 return
73 return self.ProgressBar(file, preserve_output=False)
74
75
76 def _create_progress_bar(logger):
77 if hasattr(logger, 'progress_bar'):
78 printer = logger.progress_bar()
79 if printer:
80 return printer
81 printer = QuietMultilinePrinter()
82 printer.print = lambda _: None
83 return printer
84
85
86 def load_cookies(cookie_file, browser_specification, ydl):
87 cookie_jars = []
88 if browser_specification is not None:
89 browser_name, profile, keyring, container = _parse_browser_specification(*browser_specification)
90 cookie_jars.append(
91 extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring, container=container))
92
93 if cookie_file is not None:
94 is_filename = is_path_like(cookie_file)
95 if is_filename:
96 cookie_file = expand_path(cookie_file)
97
98 jar = YoutubeDLCookieJar(cookie_file)
99 if not is_filename or os.access(cookie_file, os.R_OK):
100 jar.load()
101 cookie_jars.append(jar)
102
103 return _merge_cookie_jars(cookie_jars)
104
105
106 def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None, container=None):
107 if browser_name == 'firefox':
108 return _extract_firefox_cookies(profile, container, logger)
109 elif browser_name == 'safari':
110 return _extract_safari_cookies(profile, logger)
111 elif browser_name in CHROMIUM_BASED_BROWSERS:
112 return _extract_chrome_cookies(browser_name, profile, keyring, logger)
113 else:
114 raise ValueError(f'unknown browser: {browser_name}')
115
116
117 def _extract_firefox_cookies(profile, container, logger):
118 logger.info('Extracting cookies from firefox')
119 if not sqlite3:
120 logger.warning('Cannot extract cookies from firefox without sqlite3 support. '
121 'Please use a python interpreter compiled with sqlite3 support')
122 return YoutubeDLCookieJar()
123
124 if profile is None:
125 search_root = _firefox_browser_dir()
126 elif _is_path(profile):
127 search_root = profile
128 else:
129 search_root = os.path.join(_firefox_browser_dir(), profile)
130
131 cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite', logger)
132 if cookie_database_path is None:
133 raise FileNotFoundError(f'could not find firefox cookies database in {search_root}')
134 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
135
136 container_id = None
137 if container not in (None, 'none'):
138 containers_path = os.path.join(os.path.dirname(cookie_database_path), 'containers.json')
139 if not os.path.isfile(containers_path) or not os.access(containers_path, os.R_OK):
140 raise FileNotFoundError(f'could not read containers.json in {search_root}')
141 with open(containers_path) as containers:
142 identities = json.load(containers).get('identities', [])
143 container_id = next((context.get('userContextId') for context in identities if container in (
144 context.get('name'),
145 try_call(lambda: re.fullmatch(r'userContext([^\.]+)\.label', context['l10nID']).group())
146 )), None)
147 if not isinstance(container_id, int):
148 raise ValueError(f'could not find firefox container "{container}" in containers.json')
149
150 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
151 cursor = None
152 try:
153 cursor = _open_database_copy(cookie_database_path, tmpdir)
154 if isinstance(container_id, int):
155 logger.debug(
156 f'Only loading cookies from firefox container "{container}", ID {container_id}')
157 cursor.execute(
158 'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE originAttributes LIKE ? OR originAttributes LIKE ?',
159 (f'%userContextId={container_id}', f'%userContextId={container_id}&%'))
160 elif container == 'none':
161 logger.debug('Only loading cookies not belonging to any container')
162 cursor.execute(
163 'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE NOT INSTR(originAttributes,"userContextId=")')
164 else:
165 cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
166 jar = YoutubeDLCookieJar()
167 with _create_progress_bar(logger) as progress_bar:
168 table = cursor.fetchall()
169 total_cookie_count = len(table)
170 for i, (host, name, value, path, expiry, is_secure) in enumerate(table):
171 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
172 cookie = http.cookiejar.Cookie(
173 version=0, name=name, value=value, port=None, port_specified=False,
174 domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'),
175 path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False,
176 comment=None, comment_url=None, rest={})
177 jar.set_cookie(cookie)
178 logger.info(f'Extracted {len(jar)} cookies from firefox')
179 return jar
180 finally:
181 if cursor is not None:
182 cursor.connection.close()
183
184
185 def _firefox_browser_dir():
186 if sys.platform in ('cygwin', 'win32'):
187 return os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles')
188 elif sys.platform == 'darwin':
189 return os.path.expanduser('~/Library/Application Support/Firefox')
190 return os.path.expanduser('~/.mozilla/firefox')
191
192
193 def _get_chromium_based_browser_settings(browser_name):
194 # https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md
195 if sys.platform in ('cygwin', 'win32'):
196 appdata_local = os.path.expandvars('%LOCALAPPDATA%')
197 appdata_roaming = os.path.expandvars('%APPDATA%')
198 browser_dir = {
199 'brave': os.path.join(appdata_local, R'BraveSoftware\Brave-Browser\User Data'),
200 'chrome': os.path.join(appdata_local, R'Google\Chrome\User Data'),
201 'chromium': os.path.join(appdata_local, R'Chromium\User Data'),
202 'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
203 'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
204 'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
205 }[browser_name]
206
207 elif sys.platform == 'darwin':
208 appdata = os.path.expanduser('~/Library/Application Support')
209 browser_dir = {
210 'brave': os.path.join(appdata, 'BraveSoftware/Brave-Browser'),
211 'chrome': os.path.join(appdata, 'Google/Chrome'),
212 'chromium': os.path.join(appdata, 'Chromium'),
213 'edge': os.path.join(appdata, 'Microsoft Edge'),
214 'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
215 'vivaldi': os.path.join(appdata, 'Vivaldi'),
216 }[browser_name]
217
218 else:
219 config = _config_home()
220 browser_dir = {
221 'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'),
222 'chrome': os.path.join(config, 'google-chrome'),
223 'chromium': os.path.join(config, 'chromium'),
224 'edge': os.path.join(config, 'microsoft-edge'),
225 'opera': os.path.join(config, 'opera'),
226 'vivaldi': os.path.join(config, 'vivaldi'),
227 }[browser_name]
228
229 # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
230 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
231 keyring_name = {
232 'brave': 'Brave',
233 'chrome': 'Chrome',
234 'chromium': 'Chromium',
235 'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
236 'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
237 'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
238 }[browser_name]
239
240 browsers_without_profiles = {'opera'}
241
242 return {
243 'browser_dir': browser_dir,
244 'keyring_name': keyring_name,
245 'supports_profiles': browser_name not in browsers_without_profiles
246 }
247
248
249 def _extract_chrome_cookies(browser_name, profile, keyring, logger):
250 logger.info(f'Extracting cookies from {browser_name}')
251
252 if not sqlite3:
253 logger.warning(f'Cannot extract cookies from {browser_name} without sqlite3 support. '
254 'Please use a python interpreter compiled with sqlite3 support')
255 return YoutubeDLCookieJar()
256
257 config = _get_chromium_based_browser_settings(browser_name)
258
259 if profile is None:
260 search_root = config['browser_dir']
261 elif _is_path(profile):
262 search_root = profile
263 config['browser_dir'] = os.path.dirname(profile) if config['supports_profiles'] else profile
264 else:
265 if config['supports_profiles']:
266 search_root = os.path.join(config['browser_dir'], profile)
267 else:
268 logger.error(f'{browser_name} does not support profiles')
269 search_root = config['browser_dir']
270
271 cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies', logger)
272 if cookie_database_path is None:
273 raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"')
274 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
275
276 decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring)
277
278 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
279 cursor = None
280 try:
281 cursor = _open_database_copy(cookie_database_path, tmpdir)
282 cursor.connection.text_factory = bytes
283 column_names = _get_column_names(cursor, 'cookies')
284 secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
285 cursor.execute(f'SELECT host_key, name, value, encrypted_value, path, expires_utc, {secure_column} FROM cookies')
286 jar = YoutubeDLCookieJar()
287 failed_cookies = 0
288 unencrypted_cookies = 0
289 with _create_progress_bar(logger) as progress_bar:
290 table = cursor.fetchall()
291 total_cookie_count = len(table)
292 for i, line in enumerate(table):
293 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
294 is_encrypted, cookie = _process_chrome_cookie(decryptor, *line)
295 if not cookie:
296 failed_cookies += 1
297 continue
298 elif not is_encrypted:
299 unencrypted_cookies += 1
300 jar.set_cookie(cookie)
301 if failed_cookies > 0:
302 failed_message = f' ({failed_cookies} could not be decrypted)'
303 else:
304 failed_message = ''
305 logger.info(f'Extracted {len(jar)} cookies from {browser_name}{failed_message}')
306 counts = decryptor._cookie_counts.copy()
307 counts['unencrypted'] = unencrypted_cookies
308 logger.debug(f'cookie version breakdown: {counts}')
309 return jar
310 finally:
311 if cursor is not None:
312 cursor.connection.close()
313
314
315 def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, path, expires_utc, is_secure):
316 host_key = host_key.decode()
317 name = name.decode()
318 value = value.decode()
319 path = path.decode()
320 is_encrypted = not value and encrypted_value
321
322 if is_encrypted:
323 value = decryptor.decrypt(encrypted_value)
324 if value is None:
325 return is_encrypted, None
326
327 return is_encrypted, http.cookiejar.Cookie(
328 version=0, name=name, value=value, port=None, port_specified=False,
329 domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
330 path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False,
331 comment=None, comment_url=None, rest={})
332
333
334 class ChromeCookieDecryptor:
335 """
336 Overview:
337
338 Linux:
339 - cookies are either v10 or v11
340 - v10: AES-CBC encrypted with a fixed key
341 - also attempts empty password if decryption fails
342 - v11: AES-CBC encrypted with an OS protected key (keyring)
343 - also attempts empty password if decryption fails
344 - v11 keys can be stored in various places depending on the activate desktop environment [2]
345
346 Mac:
347 - cookies are either v10 or not v10
348 - v10: AES-CBC encrypted with an OS protected key (keyring) and more key derivation iterations than linux
349 - not v10: 'old data' stored as plaintext
350
351 Windows:
352 - cookies are either v10 or not v10
353 - v10: AES-GCM encrypted with a key which is encrypted with DPAPI
354 - not v10: encrypted with DPAPI
355
356 Sources:
357 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/
358 - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_linux.cc
359 - KeyStorageLinux::CreateService
360 """
361
362 _cookie_counts = {}
363
364 def decrypt(self, encrypted_value):
365 raise NotImplementedError('Must be implemented by sub classes')
366
367
368 def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None):
369 if sys.platform == 'darwin':
370 return MacChromeCookieDecryptor(browser_keyring_name, logger)
371 elif sys.platform in ('win32', 'cygwin'):
372 return WindowsChromeCookieDecryptor(browser_root, logger)
373 return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring)
374
375
376 class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
377 def __init__(self, browser_keyring_name, logger, *, keyring=None):
378 self._logger = logger
379 self._v10_key = self.derive_key(b'peanuts')
380 self._empty_key = self.derive_key(b'')
381 self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
382 self._browser_keyring_name = browser_keyring_name
383 self._keyring = keyring
384
385 @functools.cached_property
386 def _v11_key(self):
387 password = _get_linux_keyring_password(self._browser_keyring_name, self._keyring, self._logger)
388 return None if password is None else self.derive_key(password)
389
390 @staticmethod
391 def derive_key(password):
392 # values from
393 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_linux.cc
394 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16)
395
396 def decrypt(self, encrypted_value):
397 """
398
399 following the same approach as the fix in [1]: if cookies fail to decrypt then attempt to decrypt
400 with an empty password. The failure detection is not the same as what chromium uses so the
401 results won't be perfect
402
403 References:
404 - [1] https://chromium.googlesource.com/chromium/src/+/bbd54702284caca1f92d656fdcadf2ccca6f4165%5E%21/
405 - a bugfix to try an empty password as a fallback
406 """
407 version = encrypted_value[:3]
408 ciphertext = encrypted_value[3:]
409
410 if version == b'v10':
411 self._cookie_counts['v10'] += 1
412 return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key, self._empty_key), self._logger)
413
414 elif version == b'v11':
415 self._cookie_counts['v11'] += 1
416 if self._v11_key is None:
417 self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
418 return None
419 return _decrypt_aes_cbc_multi(ciphertext, (self._v11_key, self._empty_key), self._logger)
420
421 else:
422 self._logger.warning(f'unknown cookie version: "{version}"', only_once=True)
423 self._cookie_counts['other'] += 1
424 return None
425
426
427 class MacChromeCookieDecryptor(ChromeCookieDecryptor):
428 def __init__(self, browser_keyring_name, logger):
429 self._logger = logger
430 password = _get_mac_keyring_password(browser_keyring_name, logger)
431 self._v10_key = None if password is None else self.derive_key(password)
432 self._cookie_counts = {'v10': 0, 'other': 0}
433
434 @staticmethod
435 def derive_key(password):
436 # values from
437 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_mac.mm
438 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16)
439
440 def decrypt(self, encrypted_value):
441 version = encrypted_value[:3]
442 ciphertext = encrypted_value[3:]
443
444 if version == b'v10':
445 self._cookie_counts['v10'] += 1
446 if self._v10_key is None:
447 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
448 return None
449
450 return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key,), self._logger)
451
452 else:
453 self._cookie_counts['other'] += 1
454 # other prefixes are considered 'old data' which were stored as plaintext
455 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_mac.mm
456 return encrypted_value
457
458
459 class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
460 def __init__(self, browser_root, logger):
461 self._logger = logger
462 self._v10_key = _get_windows_v10_key(browser_root, logger)
463 self._cookie_counts = {'v10': 0, 'other': 0}
464
465 def decrypt(self, encrypted_value):
466 version = encrypted_value[:3]
467 ciphertext = encrypted_value[3:]
468
469 if version == b'v10':
470 self._cookie_counts['v10'] += 1
471 if self._v10_key is None:
472 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
473 return None
474
475 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
476 # kNonceLength
477 nonce_length = 96 // 8
478 # boringssl
479 # EVP_AEAD_AES_GCM_TAG_LEN
480 authentication_tag_length = 16
481
482 raw_ciphertext = ciphertext
483 nonce = raw_ciphertext[:nonce_length]
484 ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length]
485 authentication_tag = raw_ciphertext[-authentication_tag_length:]
486
487 return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger)
488
489 else:
490 self._cookie_counts['other'] += 1
491 # any other prefix means the data is DPAPI encrypted
492 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
493 return _decrypt_windows_dpapi(encrypted_value, self._logger).decode()
494
495
496 def _extract_safari_cookies(profile, logger):
497 if sys.platform != 'darwin':
498 raise ValueError(f'unsupported platform: {sys.platform}')
499
500 if profile:
501 cookies_path = os.path.expanduser(profile)
502 if not os.path.isfile(cookies_path):
503 raise FileNotFoundError('custom safari cookies database not found')
504
505 else:
506 cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')
507
508 if not os.path.isfile(cookies_path):
509 logger.debug('Trying secondary cookie location')
510 cookies_path = os.path.expanduser('~/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies')
511 if not os.path.isfile(cookies_path):
512 raise FileNotFoundError('could not find safari cookies database')
513
514 with open(cookies_path, 'rb') as f:
515 cookies_data = f.read()
516
517 jar = parse_safari_cookies(cookies_data, logger=logger)
518 logger.info(f'Extracted {len(jar)} cookies from safari')
519 return jar
520
521
522 class ParserError(Exception):
523 pass
524
525
526 class DataParser:
527 def __init__(self, data, logger):
528 self._data = data
529 self.cursor = 0
530 self._logger = logger
531
532 def read_bytes(self, num_bytes):
533 if num_bytes < 0:
534 raise ParserError(f'invalid read of {num_bytes} bytes')
535 end = self.cursor + num_bytes
536 if end > len(self._data):
537 raise ParserError('reached end of input')
538 data = self._data[self.cursor:end]
539 self.cursor = end
540 return data
541
542 def expect_bytes(self, expected_value, message):
543 value = self.read_bytes(len(expected_value))
544 if value != expected_value:
545 raise ParserError(f'unexpected value: {value} != {expected_value} ({message})')
546
547 def read_uint(self, big_endian=False):
548 data_format = '>I' if big_endian else '<I'
549 return struct.unpack(data_format, self.read_bytes(4))[0]
550
551 def read_double(self, big_endian=False):
552 data_format = '>d' if big_endian else '<d'
553 return struct.unpack(data_format, self.read_bytes(8))[0]
554
555 def read_cstring(self):
556 buffer = []
557 while True:
558 c = self.read_bytes(1)
559 if c == b'\x00':
560 return b''.join(buffer).decode()
561 else:
562 buffer.append(c)
563
564 def skip(self, num_bytes, description='unknown'):
565 if num_bytes > 0:
566 self._logger.debug(f'skipping {num_bytes} bytes ({description}): {self.read_bytes(num_bytes)!r}')
567 elif num_bytes < 0:
568 raise ParserError(f'invalid skip of {num_bytes} bytes')
569
570 def skip_to(self, offset, description='unknown'):
571 self.skip(offset - self.cursor, description)
572
573 def skip_to_end(self, description='unknown'):
574 self.skip_to(len(self._data), description)
575
576
577 def _mac_absolute_time_to_posix(timestamp):
578 return int((datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc) + timedelta(seconds=timestamp)).timestamp())
579
580
581 def _parse_safari_cookies_header(data, logger):
582 p = DataParser(data, logger)
583 p.expect_bytes(b'cook', 'database signature')
584 number_of_pages = p.read_uint(big_endian=True)
585 page_sizes = [p.read_uint(big_endian=True) for _ in range(number_of_pages)]
586 return page_sizes, p.cursor
587
588
589 def _parse_safari_cookies_page(data, jar, logger):
590 p = DataParser(data, logger)
591 p.expect_bytes(b'\x00\x00\x01\x00', 'page signature')
592 number_of_cookies = p.read_uint()
593 record_offsets = [p.read_uint() for _ in range(number_of_cookies)]
594 if number_of_cookies == 0:
595 logger.debug(f'a cookies page of size {len(data)} has no cookies')
596 return
597
598 p.skip_to(record_offsets[0], 'unknown page header field')
599
600 with _create_progress_bar(logger) as progress_bar:
601 for i, record_offset in enumerate(record_offsets):
602 progress_bar.print(f'Loading cookie {i: 6d}/{number_of_cookies: 6d}')
603 p.skip_to(record_offset, 'space between records')
604 record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger)
605 p.read_bytes(record_length)
606 p.skip_to_end('space in between pages')
607
608
609 def _parse_safari_cookies_record(data, jar, logger):
610 p = DataParser(data, logger)
611 record_size = p.read_uint()
612 p.skip(4, 'unknown record field 1')
613 flags = p.read_uint()
614 is_secure = bool(flags & 0x0001)
615 p.skip(4, 'unknown record field 2')
616 domain_offset = p.read_uint()
617 name_offset = p.read_uint()
618 path_offset = p.read_uint()
619 value_offset = p.read_uint()
620 p.skip(8, 'unknown record field 3')
621 expiration_date = _mac_absolute_time_to_posix(p.read_double())
622 _creation_date = _mac_absolute_time_to_posix(p.read_double()) # noqa: F841
623
624 try:
625 p.skip_to(domain_offset)
626 domain = p.read_cstring()
627
628 p.skip_to(name_offset)
629 name = p.read_cstring()
630
631 p.skip_to(path_offset)
632 path = p.read_cstring()
633
634 p.skip_to(value_offset)
635 value = p.read_cstring()
636 except UnicodeDecodeError:
637 logger.warning('failed to parse Safari cookie because UTF-8 decoding failed', only_once=True)
638 return record_size
639
640 p.skip_to(record_size, 'space at the end of the record')
641
642 cookie = http.cookiejar.Cookie(
643 version=0, name=name, value=value, port=None, port_specified=False,
644 domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'),
645 path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False,
646 comment=None, comment_url=None, rest={})
647 jar.set_cookie(cookie)
648 return record_size
649
650
651 def parse_safari_cookies(data, jar=None, logger=YDLLogger()):
652 """
653 References:
654 - https://github.com/libyal/dtformats/blob/main/documentation/Safari%20Cookies.asciidoc
655 - this data appears to be out of date but the important parts of the database structure is the same
656 - there are a few bytes here and there which are skipped during parsing
657 """
658 if jar is None:
659 jar = YoutubeDLCookieJar()
660 page_sizes, body_start = _parse_safari_cookies_header(data, logger)
661 p = DataParser(data[body_start:], logger)
662 for page_size in page_sizes:
663 _parse_safari_cookies_page(p.read_bytes(page_size), jar, logger)
664 p.skip_to_end('footer')
665 return jar
666
667
668 class _LinuxDesktopEnvironment(Enum):
669 """
670 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.h
671 DesktopEnvironment
672 """
673 OTHER = auto()
674 CINNAMON = auto()
675 DEEPIN = auto()
676 GNOME = auto()
677 KDE3 = auto()
678 KDE4 = auto()
679 KDE5 = auto()
680 KDE6 = auto()
681 PANTHEON = auto()
682 UKUI = auto()
683 UNITY = auto()
684 XFCE = auto()
685 LXQT = auto()
686
687
688 class _LinuxKeyring(Enum):
689 """
690 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_util_linux.h
691 SelectedLinuxBackend
692 """
693 KWALLET = auto() # KDE4
694 KWALLET5 = auto()
695 KWALLET6 = auto()
696 GNOMEKEYRING = auto()
697 BASICTEXT = auto()
698
699
700 SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys()
701
702
703 def _get_linux_desktop_environment(env, logger):
704 """
705 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc
706 GetDesktopEnvironment
707 """
708 xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
709 desktop_session = env.get('DESKTOP_SESSION', None)
710 if xdg_current_desktop is not None:
711 xdg_current_desktop = xdg_current_desktop.split(':')[0].strip()
712
713 if xdg_current_desktop == 'Unity':
714 if desktop_session is not None and 'gnome-fallback' in desktop_session:
715 return _LinuxDesktopEnvironment.GNOME
716 else:
717 return _LinuxDesktopEnvironment.UNITY
718 elif xdg_current_desktop == 'Deepin':
719 return _LinuxDesktopEnvironment.DEEPIN
720 elif xdg_current_desktop == 'GNOME':
721 return _LinuxDesktopEnvironment.GNOME
722 elif xdg_current_desktop == 'X-Cinnamon':
723 return _LinuxDesktopEnvironment.CINNAMON
724 elif xdg_current_desktop == 'KDE':
725 kde_version = env.get('KDE_SESSION_VERSION', None)
726 if kde_version == '5':
727 return _LinuxDesktopEnvironment.KDE5
728 elif kde_version == '6':
729 return _LinuxDesktopEnvironment.KDE6
730 elif kde_version == '4':
731 return _LinuxDesktopEnvironment.KDE4
732 else:
733 logger.info(f'unknown KDE version: "{kde_version}". Assuming KDE4')
734 return _LinuxDesktopEnvironment.KDE4
735 elif xdg_current_desktop == 'Pantheon':
736 return _LinuxDesktopEnvironment.PANTHEON
737 elif xdg_current_desktop == 'XFCE':
738 return _LinuxDesktopEnvironment.XFCE
739 elif xdg_current_desktop == 'UKUI':
740 return _LinuxDesktopEnvironment.UKUI
741 elif xdg_current_desktop == 'LXQt':
742 return _LinuxDesktopEnvironment.LXQT
743 else:
744 logger.info(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"')
745
746 elif desktop_session is not None:
747 if desktop_session == 'deepin':
748 return _LinuxDesktopEnvironment.DEEPIN
749 elif desktop_session in ('mate', 'gnome'):
750 return _LinuxDesktopEnvironment.GNOME
751 elif desktop_session in ('kde4', 'kde-plasma'):
752 return _LinuxDesktopEnvironment.KDE4
753 elif desktop_session == 'kde':
754 if 'KDE_SESSION_VERSION' in env:
755 return _LinuxDesktopEnvironment.KDE4
756 else:
757 return _LinuxDesktopEnvironment.KDE3
758 elif 'xfce' in desktop_session or desktop_session == 'xubuntu':
759 return _LinuxDesktopEnvironment.XFCE
760 elif desktop_session == 'ukui':
761 return _LinuxDesktopEnvironment.UKUI
762 else:
763 logger.info(f'DESKTOP_SESSION is set to an unknown value: "{desktop_session}"')
764
765 else:
766 if 'GNOME_DESKTOP_SESSION_ID' in env:
767 return _LinuxDesktopEnvironment.GNOME
768 elif 'KDE_FULL_SESSION' in env:
769 if 'KDE_SESSION_VERSION' in env:
770 return _LinuxDesktopEnvironment.KDE4
771 else:
772 return _LinuxDesktopEnvironment.KDE3
773 return _LinuxDesktopEnvironment.OTHER
774
775
776 def _choose_linux_keyring(logger):
777 """
778 SelectBackend in [1]
779
780 There is currently support for forcing chromium to use BASIC_TEXT by creating a file called
781 `Disable Local Encryption` [1] in the user data dir. The function to write this file (`WriteBackendUse()` [1])
782 does not appear to be called anywhere other than in tests, so the user would have to create this file manually
783 and so would be aware enough to tell yt-dlp to use the BASIC_TEXT keyring.
784
785 References:
786 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_util_linux.cc
787 """
788 desktop_environment = _get_linux_desktop_environment(os.environ, logger)
789 logger.debug(f'detected desktop environment: {desktop_environment.name}')
790 if desktop_environment == _LinuxDesktopEnvironment.KDE4:
791 linux_keyring = _LinuxKeyring.KWALLET
792 elif desktop_environment == _LinuxDesktopEnvironment.KDE5:
793 linux_keyring = _LinuxKeyring.KWALLET5
794 elif desktop_environment == _LinuxDesktopEnvironment.KDE6:
795 linux_keyring = _LinuxKeyring.KWALLET6
796 elif desktop_environment in (
797 _LinuxDesktopEnvironment.KDE3, _LinuxDesktopEnvironment.LXQT, _LinuxDesktopEnvironment.OTHER
798 ):
799 linux_keyring = _LinuxKeyring.BASICTEXT
800 else:
801 linux_keyring = _LinuxKeyring.GNOMEKEYRING
802 return linux_keyring
803
804
805 def _get_kwallet_network_wallet(keyring, logger):
806 """ The name of the wallet used to store network passwords.
807
808 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/kwallet_dbus.cc
809 KWalletDBus::NetworkWallet
810 which does a dbus call to the following function:
811 https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html
812 Wallet::NetworkWallet
813 """
814 default_wallet = 'kdewallet'
815 try:
816 if keyring == _LinuxKeyring.KWALLET:
817 service_name = 'org.kde.kwalletd'
818 wallet_path = '/modules/kwalletd'
819 elif keyring == _LinuxKeyring.KWALLET5:
820 service_name = 'org.kde.kwalletd5'
821 wallet_path = '/modules/kwalletd5'
822 elif keyring == _LinuxKeyring.KWALLET6:
823 service_name = 'org.kde.kwalletd6'
824 wallet_path = '/modules/kwalletd6'
825 else:
826 raise ValueError(keyring)
827
828 stdout, _, returncode = Popen.run([
829 'dbus-send', '--session', '--print-reply=literal',
830 f'--dest={service_name}',
831 wallet_path,
832 'org.kde.KWallet.networkWallet'
833 ], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
834
835 if returncode:
836 logger.warning('failed to read NetworkWallet')
837 return default_wallet
838 else:
839 logger.debug(f'NetworkWallet = "{stdout.strip()}"')
840 return stdout.strip()
841 except Exception as e:
842 logger.warning(f'exception while obtaining NetworkWallet: {e}')
843 return default_wallet
844
845
846 def _get_kwallet_password(browser_keyring_name, keyring, logger):
847 logger.debug(f'using kwallet-query to obtain password from {keyring.name}')
848
849 if shutil.which('kwallet-query') is None:
850 logger.error('kwallet-query command not found. KWallet and kwallet-query '
851 'must be installed to read from KWallet. kwallet-query should be'
852 'included in the kwallet package for your distribution')
853 return b''
854
855 network_wallet = _get_kwallet_network_wallet(keyring, logger)
856
857 try:
858 stdout, _, returncode = Popen.run([
859 'kwallet-query',
860 '--read-password', f'{browser_keyring_name} Safe Storage',
861 '--folder', f'{browser_keyring_name} Keys',
862 network_wallet
863 ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
864
865 if returncode:
866 logger.error(f'kwallet-query failed with return code {returncode}. '
867 'Please consult the kwallet-query man page for details')
868 return b''
869 else:
870 if stdout.lower().startswith(b'failed to read'):
871 logger.debug('failed to read password from kwallet. Using empty string instead')
872 # this sometimes occurs in KDE because chrome does not check hasEntry and instead
873 # just tries to read the value (which kwallet returns "") whereas kwallet-query
874 # checks hasEntry. To verify this:
875 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
876 # while starting chrome.
877 # this was identified as a bug later and fixed in
878 # https://chromium.googlesource.com/chromium/src/+/bbd54702284caca1f92d656fdcadf2ccca6f4165%5E%21/#F0
879 # https://chromium.googlesource.com/chromium/src/+/5463af3c39d7f5b6d11db7fbd51e38cc1974d764
880 return b''
881 else:
882 logger.debug('password found')
883 return stdout.rstrip(b'\n')
884 except Exception as e:
885 logger.warning(f'exception running kwallet-query: {error_to_str(e)}')
886 return b''
887
888
889 def _get_gnome_keyring_password(browser_keyring_name, logger):
890 if not secretstorage:
891 logger.error(f'secretstorage not available {_SECRETSTORAGE_UNAVAILABLE_REASON}')
892 return b''
893 # the Gnome keyring does not seem to organise keys in the same way as KWallet,
894 # using `dbus-monitor` during startup, it can be observed that chromium lists all keys
895 # and presumably searches for its key in the list. It appears that we must do the same.
896 # https://github.com/jaraco/keyring/issues/556
897 with contextlib.closing(secretstorage.dbus_init()) as con:
898 col = secretstorage.get_default_collection(con)
899 for item in col.get_all_items():
900 if item.get_label() == f'{browser_keyring_name} Safe Storage':
901 return item.get_secret()
902 else:
903 logger.error('failed to read from keyring')
904 return b''
905
906
907 def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
908 # note: chrome/chromium can be run with the following flags to determine which keyring backend
909 # it has chosen to use
910 # chromium --enable-logging=stderr --v=1 2>&1 | grep key_storage_
911 # Chromium supports a flag: --password-store=<basic|gnome|kwallet> so the automatic detection
912 # will not be sufficient in all cases.
913
914 keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger)
915 logger.debug(f'Chosen keyring: {keyring.name}')
916
917 if keyring in (_LinuxKeyring.KWALLET, _LinuxKeyring.KWALLET5, _LinuxKeyring.KWALLET6):
918 return _get_kwallet_password(browser_keyring_name, keyring, logger)
919 elif keyring == _LinuxKeyring.GNOMEKEYRING:
920 return _get_gnome_keyring_password(browser_keyring_name, logger)
921 elif keyring == _LinuxKeyring.BASICTEXT:
922 # when basic text is chosen, all cookies are stored as v10 (so no keyring password is required)
923 return None
924 assert False, f'Unknown keyring {keyring}'
925
926
927 def _get_mac_keyring_password(browser_keyring_name, logger):
928 logger.debug('using find-generic-password to obtain password from OSX keychain')
929 try:
930 stdout, _, returncode = Popen.run(
931 ['security', 'find-generic-password',
932 '-w', # write password to stdout
933 '-a', browser_keyring_name, # match 'account'
934 '-s', f'{browser_keyring_name} Safe Storage'], # match 'service'
935 stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
936 if returncode:
937 logger.warning('find-generic-password failed')
938 return None
939 return stdout.rstrip(b'\n')
940 except Exception as e:
941 logger.warning(f'exception running find-generic-password: {error_to_str(e)}')
942 return None
943
944
945 def _get_windows_v10_key(browser_root, logger):
946 """
947 References:
948 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
949 """
950 path = _find_most_recently_used_file(browser_root, 'Local State', logger)
951 if path is None:
952 logger.error('could not find local state file')
953 return None
954 logger.debug(f'Found local state file at "{path}"')
955 with open(path, encoding='utf8') as f:
956 data = json.load(f)
957 try:
958 # kOsCryptEncryptedKeyPrefName in [1]
959 base64_key = data['os_crypt']['encrypted_key']
960 except KeyError:
961 logger.error('no encrypted key in Local State')
962 return None
963 encrypted_key = base64.b64decode(base64_key)
964 # kDPAPIKeyPrefix in [1]
965 prefix = b'DPAPI'
966 if not encrypted_key.startswith(prefix):
967 logger.error('invalid key')
968 return None
969 return _decrypt_windows_dpapi(encrypted_key[len(prefix):], logger)
970
971
972 def pbkdf2_sha1(password, salt, iterations, key_length):
973 return pbkdf2_hmac('sha1', password, salt, iterations, key_length)
974
975
976 def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' ' * 16):
977 for key in keys:
978 plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
979 try:
980 return plaintext.decode()
981 except UnicodeDecodeError:
982 pass
983 logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
984 return None
985
986
987 def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
988 try:
989 plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce)
990 except ValueError:
991 logger.warning('failed to decrypt cookie (AES-GCM) because the MAC check failed. Possibly the key is wrong?', only_once=True)
992 return None
993
994 try:
995 return plaintext.decode()
996 except UnicodeDecodeError:
997 logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
998 return None
999
1000
1001 def _decrypt_windows_dpapi(ciphertext, logger):
1002 """
1003 References:
1004 - https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata
1005 """
1006
1007 import ctypes
1008 import ctypes.wintypes
1009
1010 class DATA_BLOB(ctypes.Structure):
1011 _fields_ = [('cbData', ctypes.wintypes.DWORD),
1012 ('pbData', ctypes.POINTER(ctypes.c_char))]
1013
1014 buffer = ctypes.create_string_buffer(ciphertext)
1015 blob_in = DATA_BLOB(ctypes.sizeof(buffer), buffer)
1016 blob_out = DATA_BLOB()
1017 ret = ctypes.windll.crypt32.CryptUnprotectData(
1018 ctypes.byref(blob_in), # pDataIn
1019 None, # ppszDataDescr: human readable description of pDataIn
1020 None, # pOptionalEntropy: salt?
1021 None, # pvReserved: must be NULL
1022 None, # pPromptStruct: information about prompts to display
1023 0, # dwFlags
1024 ctypes.byref(blob_out) # pDataOut
1025 )
1026 if not ret:
1027 logger.warning('failed to decrypt with DPAPI', only_once=True)
1028 return None
1029
1030 result = ctypes.string_at(blob_out.pbData, blob_out.cbData)
1031 ctypes.windll.kernel32.LocalFree(blob_out.pbData)
1032 return result
1033
1034
1035 def _config_home():
1036 return os.environ.get('XDG_CONFIG_HOME', os.path.expanduser('~/.config'))
1037
1038
1039 def _open_database_copy(database_path, tmpdir):
1040 # cannot open sqlite databases if they are already in use (e.g. by the browser)
1041 database_copy_path = os.path.join(tmpdir, 'temporary.sqlite')
1042 shutil.copy(database_path, database_copy_path)
1043 conn = sqlite3.connect(database_copy_path)
1044 return conn.cursor()
1045
1046
1047 def _get_column_names(cursor, table_name):
1048 table_info = cursor.execute(f'PRAGMA table_info({table_name})').fetchall()
1049 return [row[1].decode() for row in table_info]
1050
1051
1052 def _find_most_recently_used_file(root, filename, logger):
1053 # if there are multiple browser profiles, take the most recently used one
1054 i, paths = 0, []
1055 with _create_progress_bar(logger) as progress_bar:
1056 for curr_root, dirs, files in os.walk(root):
1057 for file in files:
1058 i += 1
1059 progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched')
1060 if file == filename:
1061 paths.append(os.path.join(curr_root, file))
1062 return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime)
1063
1064
1065 def _merge_cookie_jars(jars):
1066 output_jar = YoutubeDLCookieJar()
1067 for jar in jars:
1068 for cookie in jar:
1069 output_jar.set_cookie(cookie)
1070 if jar.filename is not None:
1071 output_jar.filename = jar.filename
1072 return output_jar
1073
1074
1075 def _is_path(value):
1076 return os.path.sep in value
1077
1078
1079 def _parse_browser_specification(browser_name, profile=None, keyring=None, container=None):
1080 if browser_name not in SUPPORTED_BROWSERS:
1081 raise ValueError(f'unsupported browser: "{browser_name}"')
1082 if keyring not in (None, *SUPPORTED_KEYRINGS):
1083 raise ValueError(f'unsupported keyring: "{keyring}"')
1084 if profile is not None and _is_path(expand_path(profile)):
1085 profile = expand_path(profile)
1086 return browser_name, profile, keyring, container
1087
1088
1089 class LenientSimpleCookie(http.cookies.SimpleCookie):
1090 """More lenient version of http.cookies.SimpleCookie"""
1091 # From https://github.com/python/cpython/blob/v3.10.7/Lib/http/cookies.py
1092 # We use Morsel's legal key chars to avoid errors on setting values
1093 _LEGAL_KEY_CHARS = r'\w\d' + re.escape('!#$%&\'*+-.:^_`|~')
1094 _LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + re.escape('(),/<=>?@[]{}')
1095
1096 _RESERVED = {
1097 "expires",
1098 "path",
1099 "comment",
1100 "domain",
1101 "max-age",
1102 "secure",
1103 "httponly",
1104 "version",
1105 "samesite",
1106 }
1107
1108 _FLAGS = {"secure", "httponly"}
1109
1110 # Added 'bad' group to catch the remaining value
1111 _COOKIE_PATTERN = re.compile(r"""
1112 \s* # Optional whitespace at start of cookie
1113 (?P<key> # Start of group 'key'
1114 [""" + _LEGAL_KEY_CHARS + r"""]+?# Any word of at least one letter
1115 ) # End of group 'key'
1116 ( # Optional group: there may not be a value.
1117 \s*=\s* # Equal Sign
1118 ( # Start of potential value
1119 (?P<val> # Start of group 'val'
1120 "(?:[^\\"]|\\.)*" # Any doublequoted string
1121 | # or
1122 \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr
1123 | # or
1124 [""" + _LEGAL_VALUE_CHARS + r"""]* # Any word or empty string
1125 ) # End of group 'val'
1126 | # or
1127 (?P<bad>(?:\\;|[^;])*?) # 'bad' group fallback for invalid values
1128 ) # End of potential value
1129 )? # End of optional value group
1130 \s* # Any number of spaces.
1131 (\s+|;|$) # Ending either at space, semicolon, or EOS.
1132 """, re.ASCII | re.VERBOSE)
1133
1134 def load(self, data):
1135 # Workaround for https://github.com/yt-dlp/yt-dlp/issues/4776
1136 if not isinstance(data, str):
1137 return super().load(data)
1138
1139 morsel = None
1140 for match in self._COOKIE_PATTERN.finditer(data):
1141 if match.group('bad'):
1142 morsel = None
1143 continue
1144
1145 key, value = match.group('key', 'val')
1146
1147 is_attribute = False
1148 if key.startswith('$'):
1149 key = key[1:]
1150 is_attribute = True
1151
1152 lower_key = key.lower()
1153 if lower_key in self._RESERVED:
1154 if morsel is None:
1155 continue
1156
1157 if value is None:
1158 if lower_key not in self._FLAGS:
1159 morsel = None
1160 continue
1161 value = True
1162 else:
1163 value, _ = self.value_decode(value)
1164
1165 morsel[key] = value
1166
1167 elif is_attribute:
1168 morsel = None
1169
1170 elif value is not None:
1171 morsel = self.get(key, http.cookies.Morsel())
1172 real_value, coded_value = self.value_decode(value)
1173 morsel.set(key, real_value, coded_value)
1174 self[key] = morsel
1175
1176 else:
1177 morsel = None
1178
1179
1180 class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
1181 """
1182 See [1] for cookie file format.
1183
1184 1. https://curl.haxx.se/docs/http-cookies.html
1185 """
1186 _HTTPONLY_PREFIX = '#HttpOnly_'
1187 _ENTRY_LEN = 7
1188 _HEADER = '''# Netscape HTTP Cookie File
1189 # This file is generated by yt-dlp. Do not edit.
1190
1191 '''
1192 _CookieFileEntry = collections.namedtuple(
1193 'CookieFileEntry',
1194 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
1195
1196 def __init__(self, filename=None, *args, **kwargs):
1197 super().__init__(None, *args, **kwargs)
1198 if is_path_like(filename):
1199 filename = os.fspath(filename)
1200 self.filename = filename
1201
1202 @staticmethod
1203 def _true_or_false(cndn):
1204 return 'TRUE' if cndn else 'FALSE'
1205
1206 @contextlib.contextmanager
1207 def open(self, file, *, write=False):
1208 if is_path_like(file):
1209 with open(file, 'w' if write else 'r', encoding='utf-8') as f:
1210 yield f
1211 else:
1212 if write:
1213 file.truncate(0)
1214 yield file
1215
1216 def _really_save(self, f, ignore_discard, ignore_expires):
1217 now = time.time()
1218 for cookie in self:
1219 if (not ignore_discard and cookie.discard
1220 or not ignore_expires and cookie.is_expired(now)):
1221 continue
1222 name, value = cookie.name, cookie.value
1223 if value is None:
1224 # cookies.txt regards 'Set-Cookie: foo' as a cookie
1225 # with no name, whereas http.cookiejar regards it as a
1226 # cookie with no value.
1227 name, value = '', name
1228 f.write('%s\n' % '\t'.join((
1229 cookie.domain,
1230 self._true_or_false(cookie.domain.startswith('.')),
1231 cookie.path,
1232 self._true_or_false(cookie.secure),
1233 str_or_none(cookie.expires, default=''),
1234 name, value
1235 )))
1236
1237 def save(self, filename=None, ignore_discard=True, ignore_expires=True):
1238 """
1239 Save cookies to a file.
1240 Code is taken from CPython 3.6
1241 https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """
1242
1243 if filename is None:
1244 if self.filename is not None:
1245 filename = self.filename
1246 else:
1247 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
1248
1249 # Store session cookies with `expires` set to 0 instead of an empty string
1250 for cookie in self:
1251 if cookie.expires is None:
1252 cookie.expires = 0
1253
1254 with self.open(filename, write=True) as f:
1255 f.write(self._HEADER)
1256 self._really_save(f, ignore_discard, ignore_expires)
1257
1258 def load(self, filename=None, ignore_discard=True, ignore_expires=True):
1259 """Load cookies from a file."""
1260 if filename is None:
1261 if self.filename is not None:
1262 filename = self.filename
1263 else:
1264 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
1265
1266 def prepare_line(line):
1267 if line.startswith(self._HTTPONLY_PREFIX):
1268 line = line[len(self._HTTPONLY_PREFIX):]
1269 # comments and empty lines are fine
1270 if line.startswith('#') or not line.strip():
1271 return line
1272 cookie_list = line.split('\t')
1273 if len(cookie_list) != self._ENTRY_LEN:
1274 raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list))
1275 cookie = self._CookieFileEntry(*cookie_list)
1276 if cookie.expires_at and not cookie.expires_at.isdigit():
1277 raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
1278 return line
1279
1280 cf = io.StringIO()
1281 with self.open(filename) as f:
1282 for line in f:
1283 try:
1284 cf.write(prepare_line(line))
1285 except http.cookiejar.LoadError as e:
1286 if f'{line.strip()} '[0] in '[{"':
1287 raise http.cookiejar.LoadError(
1288 'Cookies file must be Netscape formatted, not JSON. See '
1289 'https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp')
1290 write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
1291 continue
1292 cf.seek(0)
1293 self._really_load(cf, filename, ignore_discard, ignore_expires)
1294 # Session cookies are denoted by either `expires` field set to
1295 # an empty string or 0. MozillaCookieJar only recognizes the former
1296 # (see [1]). So we need force the latter to be recognized as session
1297 # cookies on our own.
1298 # Session cookies may be important for cookies-based authentication,
1299 # e.g. usually, when user does not check 'Remember me' check box while
1300 # logging in on a site, some important cookies are stored as session
1301 # cookies so that not recognizing them will result in failed login.
1302 # 1. https://bugs.python.org/issue17164
1303 for cookie in self:
1304 # Treat `expires=0` cookies as session cookies
1305 if cookie.expires == 0:
1306 cookie.expires = None
1307 cookie.discard = True
1308
1309 def get_cookie_header(self, url):
1310 """Generate a Cookie HTTP header for a given url"""
1311 cookie_req = urllib.request.Request(escape_url(sanitize_url(url)))
1312 self.add_cookie_header(cookie_req)
1313 return cookie_req.get_header('Cookie')
1314
1315 def get_cookies_for_url(self, url):
1316 """Generate a list of Cookie objects for a given url"""
1317 # Policy `_now` attribute must be set before calling `_cookies_for_request`
1318 # Ref: https://github.com/python/cpython/blob/3.7/Lib/http/cookiejar.py#L1360
1319 self._policy._now = self._now = int(time.time())
1320 return self._cookies_for_request(urllib.request.Request(escape_url(sanitize_url(url))))
1321
1322 def clear(self, *args, **kwargs):
1323 with contextlib.suppress(KeyError):
1324 return super().clear(*args, **kwargs)