]> jfr.im git - yt-dlp.git/blame_incremental - yt_dlp/cookies.py
[cleanup] Misc cleanup
[yt-dlp.git] / yt_dlp / cookies.py
... / ...
CommitLineData
1import contextlib
2import ctypes
3import json
4import os
5import shutil
6import struct
7import subprocess
8import sys
9import tempfile
10import time
11from datetime import datetime, timedelta, timezone
12from enum import Enum, auto
13from hashlib import pbkdf2_hmac
14
15from .aes import (
16 aes_cbc_decrypt_bytes,
17 aes_gcm_decrypt_and_verify_bytes,
18 unpad_pkcs7,
19)
20from .compat import compat_b64decode, compat_cookiejar_Cookie
21from .dependencies import (
22 _SECRETSTORAGE_UNAVAILABLE_REASON,
23 secretstorage,
24 sqlite3,
25)
26from .minicurses import MultilinePrinter, QuietMultilinePrinter
27from .utils import Popen, YoutubeDLCookieJar, error_to_str, expand_path
28
29CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
30SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
31
32
33class YDLLogger:
34 def __init__(self, ydl=None):
35 self._ydl = ydl
36
37 def debug(self, message):
38 if self._ydl:
39 self._ydl.write_debug(message)
40
41 def info(self, message):
42 if self._ydl:
43 self._ydl.to_screen(f'[Cookies] {message}')
44
45 def warning(self, message, only_once=False):
46 if self._ydl:
47 self._ydl.report_warning(message, only_once)
48
49 def error(self, message):
50 if self._ydl:
51 self._ydl.report_error(message)
52
53 class ProgressBar(MultilinePrinter):
54 _DELAY, _timer = 0.1, 0
55
56 def print(self, message):
57 if time.time() - self._timer > self._DELAY:
58 self.print_at_line(f'[Cookies] {message}', 0)
59 self._timer = time.time()
60
61 def progress_bar(self):
62 """Return a context manager with a print method. (Optional)"""
63 # Do not print to files/pipes, loggers, or when --no-progress is used
64 if not self._ydl or self._ydl.params.get('noprogress') or self._ydl.params.get('logger'):
65 return
66 file = self._ydl._out_files['error']
67 try:
68 if not file.isatty():
69 return
70 except BaseException:
71 return
72 return self.ProgressBar(file, preserve_output=False)
73
74
75def _create_progress_bar(logger):
76 if hasattr(logger, 'progress_bar'):
77 printer = logger.progress_bar()
78 if printer:
79 return printer
80 printer = QuietMultilinePrinter()
81 printer.print = lambda _: None
82 return printer
83
84
85def load_cookies(cookie_file, browser_specification, ydl):
86 cookie_jars = []
87 if browser_specification is not None:
88 browser_name, profile, keyring = _parse_browser_specification(*browser_specification)
89 cookie_jars.append(extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring))
90
91 if cookie_file is not None:
92 is_filename = YoutubeDLCookieJar.is_path(cookie_file)
93 if is_filename:
94 cookie_file = expand_path(cookie_file)
95
96 jar = YoutubeDLCookieJar(cookie_file)
97 if not is_filename or os.access(cookie_file, os.R_OK):
98 jar.load(ignore_discard=True, ignore_expires=True)
99 cookie_jars.append(jar)
100
101 return _merge_cookie_jars(cookie_jars)
102
103
104def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None):
105 if browser_name == 'firefox':
106 return _extract_firefox_cookies(profile, logger)
107 elif browser_name == 'safari':
108 return _extract_safari_cookies(profile, logger)
109 elif browser_name in CHROMIUM_BASED_BROWSERS:
110 return _extract_chrome_cookies(browser_name, profile, keyring, logger)
111 else:
112 raise ValueError(f'unknown browser: {browser_name}')
113
114
115def _extract_firefox_cookies(profile, logger):
116 logger.info('Extracting cookies from firefox')
117 if not sqlite3:
118 logger.warning('Cannot extract cookies from firefox without sqlite3 support. '
119 'Please use a python interpreter compiled with sqlite3 support')
120 return YoutubeDLCookieJar()
121
122 if profile is None:
123 search_root = _firefox_browser_dir()
124 elif _is_path(profile):
125 search_root = profile
126 else:
127 search_root = os.path.join(_firefox_browser_dir(), profile)
128
129 cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite', logger)
130 if cookie_database_path is None:
131 raise FileNotFoundError(f'could not find firefox cookies database in {search_root}')
132 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
133
134 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
135 cursor = None
136 try:
137 cursor = _open_database_copy(cookie_database_path, tmpdir)
138 cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
139 jar = YoutubeDLCookieJar()
140 with _create_progress_bar(logger) as progress_bar:
141 table = cursor.fetchall()
142 total_cookie_count = len(table)
143 for i, (host, name, value, path, expiry, is_secure) in enumerate(table):
144 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
145 cookie = compat_cookiejar_Cookie(
146 version=0, name=name, value=value, port=None, port_specified=False,
147 domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'),
148 path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False,
149 comment=None, comment_url=None, rest={})
150 jar.set_cookie(cookie)
151 logger.info(f'Extracted {len(jar)} cookies from firefox')
152 return jar
153 finally:
154 if cursor is not None:
155 cursor.connection.close()
156
157
158def _firefox_browser_dir():
159 if sys.platform in ('linux', 'linux2'):
160 return os.path.expanduser('~/.mozilla/firefox')
161 elif sys.platform == 'win32':
162 return os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles')
163 elif sys.platform == 'darwin':
164 return os.path.expanduser('~/Library/Application Support/Firefox')
165 else:
166 raise ValueError(f'unsupported platform: {sys.platform}')
167
168
169def _get_chromium_based_browser_settings(browser_name):
170 # https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md
171 if sys.platform in ('linux', 'linux2'):
172 config = _config_home()
173 browser_dir = {
174 'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'),
175 'chrome': os.path.join(config, 'google-chrome'),
176 'chromium': os.path.join(config, 'chromium'),
177 'edge': os.path.join(config, 'microsoft-edge'),
178 'opera': os.path.join(config, 'opera'),
179 'vivaldi': os.path.join(config, 'vivaldi'),
180 }[browser_name]
181
182 elif sys.platform == 'win32':
183 appdata_local = os.path.expandvars('%LOCALAPPDATA%')
184 appdata_roaming = os.path.expandvars('%APPDATA%')
185 browser_dir = {
186 'brave': os.path.join(appdata_local, R'BraveSoftware\Brave-Browser\User Data'),
187 'chrome': os.path.join(appdata_local, R'Google\Chrome\User Data'),
188 'chromium': os.path.join(appdata_local, R'Chromium\User Data'),
189 'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
190 'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
191 'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
192 }[browser_name]
193
194 elif sys.platform == 'darwin':
195 appdata = os.path.expanduser('~/Library/Application Support')
196 browser_dir = {
197 'brave': os.path.join(appdata, 'BraveSoftware/Brave-Browser'),
198 'chrome': os.path.join(appdata, 'Google/Chrome'),
199 'chromium': os.path.join(appdata, 'Chromium'),
200 'edge': os.path.join(appdata, 'Microsoft Edge'),
201 'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
202 'vivaldi': os.path.join(appdata, 'Vivaldi'),
203 }[browser_name]
204
205 else:
206 raise ValueError(f'unsupported platform: {sys.platform}')
207
208 # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
209 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
210 keyring_name = {
211 'brave': 'Brave',
212 'chrome': 'Chrome',
213 'chromium': 'Chromium',
214 'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
215 'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
216 'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
217 }[browser_name]
218
219 browsers_without_profiles = {'opera'}
220
221 return {
222 'browser_dir': browser_dir,
223 'keyring_name': keyring_name,
224 'supports_profiles': browser_name not in browsers_without_profiles
225 }
226
227
228def _extract_chrome_cookies(browser_name, profile, keyring, logger):
229 logger.info(f'Extracting cookies from {browser_name}')
230
231 if not sqlite3:
232 logger.warning(f'Cannot extract cookies from {browser_name} without sqlite3 support. '
233 'Please use a python interpreter compiled with sqlite3 support')
234 return YoutubeDLCookieJar()
235
236 config = _get_chromium_based_browser_settings(browser_name)
237
238 if profile is None:
239 search_root = config['browser_dir']
240 elif _is_path(profile):
241 search_root = profile
242 config['browser_dir'] = os.path.dirname(profile) if config['supports_profiles'] else profile
243 else:
244 if config['supports_profiles']:
245 search_root = os.path.join(config['browser_dir'], profile)
246 else:
247 logger.error(f'{browser_name} does not support profiles')
248 search_root = config['browser_dir']
249
250 cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies', logger)
251 if cookie_database_path is None:
252 raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"')
253 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
254
255 decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring)
256
257 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
258 cursor = None
259 try:
260 cursor = _open_database_copy(cookie_database_path, tmpdir)
261 cursor.connection.text_factory = bytes
262 column_names = _get_column_names(cursor, 'cookies')
263 secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
264 cursor.execute(f'SELECT host_key, name, value, encrypted_value, path, expires_utc, {secure_column} FROM cookies')
265 jar = YoutubeDLCookieJar()
266 failed_cookies = 0
267 unencrypted_cookies = 0
268 with _create_progress_bar(logger) as progress_bar:
269 table = cursor.fetchall()
270 total_cookie_count = len(table)
271 for i, line in enumerate(table):
272 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
273 is_encrypted, cookie = _process_chrome_cookie(decryptor, *line)
274 if not cookie:
275 failed_cookies += 1
276 continue
277 elif not is_encrypted:
278 unencrypted_cookies += 1
279 jar.set_cookie(cookie)
280 if failed_cookies > 0:
281 failed_message = f' ({failed_cookies} could not be decrypted)'
282 else:
283 failed_message = ''
284 logger.info(f'Extracted {len(jar)} cookies from {browser_name}{failed_message}')
285 counts = decryptor._cookie_counts.copy()
286 counts['unencrypted'] = unencrypted_cookies
287 logger.debug(f'cookie version breakdown: {counts}')
288 return jar
289 finally:
290 if cursor is not None:
291 cursor.connection.close()
292
293
294def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, path, expires_utc, is_secure):
295 host_key = host_key.decode()
296 name = name.decode()
297 value = value.decode()
298 path = path.decode()
299 is_encrypted = not value and encrypted_value
300
301 if is_encrypted:
302 value = decryptor.decrypt(encrypted_value)
303 if value is None:
304 return is_encrypted, None
305
306 return is_encrypted, compat_cookiejar_Cookie(
307 version=0, name=name, value=value, port=None, port_specified=False,
308 domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
309 path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False,
310 comment=None, comment_url=None, rest={})
311
312
313class ChromeCookieDecryptor:
314 """
315 Overview:
316
317 Linux:
318 - cookies are either v10 or v11
319 - v10: AES-CBC encrypted with a fixed key
320 - v11: AES-CBC encrypted with an OS protected key (keyring)
321 - v11 keys can be stored in various places depending on the activate desktop environment [2]
322
323 Mac:
324 - cookies are either v10 or not v10
325 - v10: AES-CBC encrypted with an OS protected key (keyring) and more key derivation iterations than linux
326 - not v10: 'old data' stored as plaintext
327
328 Windows:
329 - cookies are either v10 or not v10
330 - v10: AES-GCM encrypted with a key which is encrypted with DPAPI
331 - not v10: encrypted with DPAPI
332
333 Sources:
334 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/
335 - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_linux.cc
336 - KeyStorageLinux::CreateService
337 """
338
339 def decrypt(self, encrypted_value):
340 raise NotImplementedError('Must be implemented by sub classes')
341
342 @property
343 def _cookie_counts(self):
344 raise NotImplementedError('Must be implemented by sub classes')
345
346
347def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None):
348 if sys.platform in ('linux', 'linux2'):
349 return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring)
350 elif sys.platform == 'darwin':
351 return MacChromeCookieDecryptor(browser_keyring_name, logger)
352 elif sys.platform == 'win32':
353 return WindowsChromeCookieDecryptor(browser_root, logger)
354 else:
355 raise NotImplementedError(f'Chrome cookie decryption is not supported on this platform: {sys.platform}')
356
357
358class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
359 def __init__(self, browser_keyring_name, logger, *, keyring=None):
360 self._logger = logger
361 self._v10_key = self.derive_key(b'peanuts')
362 password = _get_linux_keyring_password(browser_keyring_name, keyring, logger)
363 self._v11_key = None if password is None else self.derive_key(password)
364 self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
365
366 @staticmethod
367 def derive_key(password):
368 # values from
369 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_linux.cc
370 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16)
371
372 def decrypt(self, encrypted_value):
373 version = encrypted_value[:3]
374 ciphertext = encrypted_value[3:]
375
376 if version == b'v10':
377 self._cookie_counts['v10'] += 1
378 return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
379
380 elif version == b'v11':
381 self._cookie_counts['v11'] += 1
382 if self._v11_key is None:
383 self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
384 return None
385 return _decrypt_aes_cbc(ciphertext, self._v11_key, self._logger)
386
387 else:
388 self._cookie_counts['other'] += 1
389 return None
390
391
392class MacChromeCookieDecryptor(ChromeCookieDecryptor):
393 def __init__(self, browser_keyring_name, logger):
394 self._logger = logger
395 password = _get_mac_keyring_password(browser_keyring_name, logger)
396 self._v10_key = None if password is None else self.derive_key(password)
397 self._cookie_counts = {'v10': 0, 'other': 0}
398
399 @staticmethod
400 def derive_key(password):
401 # values from
402 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
403 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16)
404
405 def decrypt(self, encrypted_value):
406 version = encrypted_value[:3]
407 ciphertext = encrypted_value[3:]
408
409 if version == b'v10':
410 self._cookie_counts['v10'] += 1
411 if self._v10_key is None:
412 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
413 return None
414
415 return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
416
417 else:
418 self._cookie_counts['other'] += 1
419 # other prefixes are considered 'old data' which were stored as plaintext
420 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
421 return encrypted_value
422
423
424class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
425 def __init__(self, browser_root, logger):
426 self._logger = logger
427 self._v10_key = _get_windows_v10_key(browser_root, logger)
428 self._cookie_counts = {'v10': 0, 'other': 0}
429
430 def decrypt(self, encrypted_value):
431 version = encrypted_value[:3]
432 ciphertext = encrypted_value[3:]
433
434 if version == b'v10':
435 self._cookie_counts['v10'] += 1
436 if self._v10_key is None:
437 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
438 return None
439
440 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
441 # kNonceLength
442 nonce_length = 96 // 8
443 # boringssl
444 # EVP_AEAD_AES_GCM_TAG_LEN
445 authentication_tag_length = 16
446
447 raw_ciphertext = ciphertext
448 nonce = raw_ciphertext[:nonce_length]
449 ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length]
450 authentication_tag = raw_ciphertext[-authentication_tag_length:]
451
452 return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger)
453
454 else:
455 self._cookie_counts['other'] += 1
456 # any other prefix means the data is DPAPI encrypted
457 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
458 return _decrypt_windows_dpapi(encrypted_value, self._logger).decode()
459
460
461def _extract_safari_cookies(profile, logger):
462 if profile is not None:
463 logger.error('safari does not support profiles')
464 if sys.platform != 'darwin':
465 raise ValueError(f'unsupported platform: {sys.platform}')
466
467 cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')
468
469 if not os.path.isfile(cookies_path):
470 logger.debug('Trying secondary cookie location')
471 cookies_path = os.path.expanduser('~/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies')
472 if not os.path.isfile(cookies_path):
473 raise FileNotFoundError('could not find safari cookies database')
474
475 with open(cookies_path, 'rb') as f:
476 cookies_data = f.read()
477
478 jar = parse_safari_cookies(cookies_data, logger=logger)
479 logger.info(f'Extracted {len(jar)} cookies from safari')
480 return jar
481
482
483class ParserError(Exception):
484 pass
485
486
487class DataParser:
488 def __init__(self, data, logger):
489 self._data = data
490 self.cursor = 0
491 self._logger = logger
492
493 def read_bytes(self, num_bytes):
494 if num_bytes < 0:
495 raise ParserError(f'invalid read of {num_bytes} bytes')
496 end = self.cursor + num_bytes
497 if end > len(self._data):
498 raise ParserError('reached end of input')
499 data = self._data[self.cursor:end]
500 self.cursor = end
501 return data
502
503 def expect_bytes(self, expected_value, message):
504 value = self.read_bytes(len(expected_value))
505 if value != expected_value:
506 raise ParserError(f'unexpected value: {value} != {expected_value} ({message})')
507
508 def read_uint(self, big_endian=False):
509 data_format = '>I' if big_endian else '<I'
510 return struct.unpack(data_format, self.read_bytes(4))[0]
511
512 def read_double(self, big_endian=False):
513 data_format = '>d' if big_endian else '<d'
514 return struct.unpack(data_format, self.read_bytes(8))[0]
515
516 def read_cstring(self):
517 buffer = []
518 while True:
519 c = self.read_bytes(1)
520 if c == b'\x00':
521 return b''.join(buffer).decode()
522 else:
523 buffer.append(c)
524
525 def skip(self, num_bytes, description='unknown'):
526 if num_bytes > 0:
527 self._logger.debug(f'skipping {num_bytes} bytes ({description}): {self.read_bytes(num_bytes)!r}')
528 elif num_bytes < 0:
529 raise ParserError(f'invalid skip of {num_bytes} bytes')
530
531 def skip_to(self, offset, description='unknown'):
532 self.skip(offset - self.cursor, description)
533
534 def skip_to_end(self, description='unknown'):
535 self.skip_to(len(self._data), description)
536
537
538def _mac_absolute_time_to_posix(timestamp):
539 return int((datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc) + timedelta(seconds=timestamp)).timestamp())
540
541
542def _parse_safari_cookies_header(data, logger):
543 p = DataParser(data, logger)
544 p.expect_bytes(b'cook', 'database signature')
545 number_of_pages = p.read_uint(big_endian=True)
546 page_sizes = [p.read_uint(big_endian=True) for _ in range(number_of_pages)]
547 return page_sizes, p.cursor
548
549
550def _parse_safari_cookies_page(data, jar, logger):
551 p = DataParser(data, logger)
552 p.expect_bytes(b'\x00\x00\x01\x00', 'page signature')
553 number_of_cookies = p.read_uint()
554 record_offsets = [p.read_uint() for _ in range(number_of_cookies)]
555 if number_of_cookies == 0:
556 logger.debug(f'a cookies page of size {len(data)} has no cookies')
557 return
558
559 p.skip_to(record_offsets[0], 'unknown page header field')
560
561 with _create_progress_bar(logger) as progress_bar:
562 for i, record_offset in enumerate(record_offsets):
563 progress_bar.print(f'Loading cookie {i: 6d}/{number_of_cookies: 6d}')
564 p.skip_to(record_offset, 'space between records')
565 record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger)
566 p.read_bytes(record_length)
567 p.skip_to_end('space in between pages')
568
569
570def _parse_safari_cookies_record(data, jar, logger):
571 p = DataParser(data, logger)
572 record_size = p.read_uint()
573 p.skip(4, 'unknown record field 1')
574 flags = p.read_uint()
575 is_secure = bool(flags & 0x0001)
576 p.skip(4, 'unknown record field 2')
577 domain_offset = p.read_uint()
578 name_offset = p.read_uint()
579 path_offset = p.read_uint()
580 value_offset = p.read_uint()
581 p.skip(8, 'unknown record field 3')
582 expiration_date = _mac_absolute_time_to_posix(p.read_double())
583 _creation_date = _mac_absolute_time_to_posix(p.read_double()) # noqa: F841
584
585 try:
586 p.skip_to(domain_offset)
587 domain = p.read_cstring()
588
589 p.skip_to(name_offset)
590 name = p.read_cstring()
591
592 p.skip_to(path_offset)
593 path = p.read_cstring()
594
595 p.skip_to(value_offset)
596 value = p.read_cstring()
597 except UnicodeDecodeError:
598 logger.warning('failed to parse Safari cookie because UTF-8 decoding failed', only_once=True)
599 return record_size
600
601 p.skip_to(record_size, 'space at the end of the record')
602
603 cookie = compat_cookiejar_Cookie(
604 version=0, name=name, value=value, port=None, port_specified=False,
605 domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'),
606 path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False,
607 comment=None, comment_url=None, rest={})
608 jar.set_cookie(cookie)
609 return record_size
610
611
612def parse_safari_cookies(data, jar=None, logger=YDLLogger()):
613 """
614 References:
615 - https://github.com/libyal/dtformats/blob/main/documentation/Safari%20Cookies.asciidoc
616 - this data appears to be out of date but the important parts of the database structure is the same
617 - there are a few bytes here and there which are skipped during parsing
618 """
619 if jar is None:
620 jar = YoutubeDLCookieJar()
621 page_sizes, body_start = _parse_safari_cookies_header(data, logger)
622 p = DataParser(data[body_start:], logger)
623 for page_size in page_sizes:
624 _parse_safari_cookies_page(p.read_bytes(page_size), jar, logger)
625 p.skip_to_end('footer')
626 return jar
627
628
629class _LinuxDesktopEnvironment(Enum):
630 """
631 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.h
632 DesktopEnvironment
633 """
634 OTHER = auto()
635 CINNAMON = auto()
636 GNOME = auto()
637 KDE = auto()
638 PANTHEON = auto()
639 UNITY = auto()
640 XFCE = auto()
641
642
643class _LinuxKeyring(Enum):
644 """
645 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.h
646 SelectedLinuxBackend
647 """
648 KWALLET = auto()
649 GNOMEKEYRING = auto()
650 BASICTEXT = auto()
651
652
653SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys()
654
655
656def _get_linux_desktop_environment(env):
657 """
658 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc
659 GetDesktopEnvironment
660 """
661 xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
662 desktop_session = env.get('DESKTOP_SESSION', None)
663 if xdg_current_desktop is not None:
664 xdg_current_desktop = xdg_current_desktop.split(':')[0].strip()
665
666 if xdg_current_desktop == 'Unity':
667 if desktop_session is not None and 'gnome-fallback' in desktop_session:
668 return _LinuxDesktopEnvironment.GNOME
669 else:
670 return _LinuxDesktopEnvironment.UNITY
671 elif xdg_current_desktop == 'GNOME':
672 return _LinuxDesktopEnvironment.GNOME
673 elif xdg_current_desktop == 'X-Cinnamon':
674 return _LinuxDesktopEnvironment.CINNAMON
675 elif xdg_current_desktop == 'KDE':
676 return _LinuxDesktopEnvironment.KDE
677 elif xdg_current_desktop == 'Pantheon':
678 return _LinuxDesktopEnvironment.PANTHEON
679 elif xdg_current_desktop == 'XFCE':
680 return _LinuxDesktopEnvironment.XFCE
681 elif desktop_session is not None:
682 if desktop_session in ('mate', 'gnome'):
683 return _LinuxDesktopEnvironment.GNOME
684 elif 'kde' in desktop_session:
685 return _LinuxDesktopEnvironment.KDE
686 elif 'xfce' in desktop_session:
687 return _LinuxDesktopEnvironment.XFCE
688 else:
689 if 'GNOME_DESKTOP_SESSION_ID' in env:
690 return _LinuxDesktopEnvironment.GNOME
691 elif 'KDE_FULL_SESSION' in env:
692 return _LinuxDesktopEnvironment.KDE
693 return _LinuxDesktopEnvironment.OTHER
694
695
696def _choose_linux_keyring(logger):
697 """
698 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.cc
699 SelectBackend
700 """
701 desktop_environment = _get_linux_desktop_environment(os.environ)
702 logger.debug(f'detected desktop environment: {desktop_environment.name}')
703 if desktop_environment == _LinuxDesktopEnvironment.KDE:
704 linux_keyring = _LinuxKeyring.KWALLET
705 elif desktop_environment == _LinuxDesktopEnvironment.OTHER:
706 linux_keyring = _LinuxKeyring.BASICTEXT
707 else:
708 linux_keyring = _LinuxKeyring.GNOMEKEYRING
709 return linux_keyring
710
711
712def _get_kwallet_network_wallet(logger):
713 """ The name of the wallet used to store network passwords.
714
715 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/kwallet_dbus.cc
716 KWalletDBus::NetworkWallet
717 which does a dbus call to the following function:
718 https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html
719 Wallet::NetworkWallet
720 """
721 default_wallet = 'kdewallet'
722 try:
723 proc = Popen([
724 'dbus-send', '--session', '--print-reply=literal',
725 '--dest=org.kde.kwalletd5',
726 '/modules/kwalletd5',
727 'org.kde.KWallet.networkWallet'
728 ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
729
730 stdout, stderr = proc.communicate_or_kill()
731 if proc.returncode != 0:
732 logger.warning('failed to read NetworkWallet')
733 return default_wallet
734 else:
735 network_wallet = stdout.decode().strip()
736 logger.debug(f'NetworkWallet = "{network_wallet}"')
737 return network_wallet
738 except Exception as e:
739 logger.warning(f'exception while obtaining NetworkWallet: {e}')
740 return default_wallet
741
742
743def _get_kwallet_password(browser_keyring_name, logger):
744 logger.debug('using kwallet-query to obtain password from kwallet')
745
746 if shutil.which('kwallet-query') is None:
747 logger.error('kwallet-query command not found. KWallet and kwallet-query '
748 'must be installed to read from KWallet. kwallet-query should be'
749 'included in the kwallet package for your distribution')
750 return b''
751
752 network_wallet = _get_kwallet_network_wallet(logger)
753
754 try:
755 proc = Popen([
756 'kwallet-query',
757 '--read-password', f'{browser_keyring_name} Safe Storage',
758 '--folder', f'{browser_keyring_name} Keys',
759 network_wallet
760 ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
761
762 stdout, stderr = proc.communicate_or_kill()
763 if proc.returncode != 0:
764 logger.error(f'kwallet-query failed with return code {proc.returncode}. Please consult '
765 'the kwallet-query man page for details')
766 return b''
767 else:
768 if stdout.lower().startswith(b'failed to read'):
769 logger.debug('failed to read password from kwallet. Using empty string instead')
770 # this sometimes occurs in KDE because chrome does not check hasEntry and instead
771 # just tries to read the value (which kwallet returns "") whereas kwallet-query
772 # checks hasEntry. To verify this:
773 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
774 # while starting chrome.
775 # this may be a bug as the intended behaviour is to generate a random password and store
776 # it, but that doesn't matter here.
777 return b''
778 else:
779 logger.debug('password found')
780 if stdout[-1:] == b'\n':
781 stdout = stdout[:-1]
782 return stdout
783 except Exception as e:
784 logger.warning(f'exception running kwallet-query: {error_to_str(e)}')
785 return b''
786
787
788def _get_gnome_keyring_password(browser_keyring_name, logger):
789 if not secretstorage:
790 logger.error(f'secretstorage not available {_SECRETSTORAGE_UNAVAILABLE_REASON}')
791 return b''
792 # the Gnome keyring does not seem to organise keys in the same way as KWallet,
793 # using `dbus-monitor` during startup, it can be observed that chromium lists all keys
794 # and presumably searches for its key in the list. It appears that we must do the same.
795 # https://github.com/jaraco/keyring/issues/556
796 with contextlib.closing(secretstorage.dbus_init()) as con:
797 col = secretstorage.get_default_collection(con)
798 for item in col.get_all_items():
799 if item.get_label() == f'{browser_keyring_name} Safe Storage':
800 return item.get_secret()
801 else:
802 logger.error('failed to read from keyring')
803 return b''
804
805
806def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
807 # note: chrome/chromium can be run with the following flags to determine which keyring backend
808 # it has chosen to use
809 # chromium --enable-logging=stderr --v=1 2>&1 | grep key_storage_
810 # Chromium supports a flag: --password-store=<basic|gnome|kwallet> so the automatic detection
811 # will not be sufficient in all cases.
812
813 keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger)
814 logger.debug(f'Chosen keyring: {keyring.name}')
815
816 if keyring == _LinuxKeyring.KWALLET:
817 return _get_kwallet_password(browser_keyring_name, logger)
818 elif keyring == _LinuxKeyring.GNOMEKEYRING:
819 return _get_gnome_keyring_password(browser_keyring_name, logger)
820 elif keyring == _LinuxKeyring.BASICTEXT:
821 # when basic text is chosen, all cookies are stored as v10 (so no keyring password is required)
822 return None
823 assert False, f'Unknown keyring {keyring}'
824
825
826def _get_mac_keyring_password(browser_keyring_name, logger):
827 logger.debug('using find-generic-password to obtain password from OSX keychain')
828 try:
829 proc = Popen(
830 ['security', 'find-generic-password',
831 '-w', # write password to stdout
832 '-a', browser_keyring_name, # match 'account'
833 '-s', f'{browser_keyring_name} Safe Storage'], # match 'service'
834 stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
835
836 stdout, stderr = proc.communicate_or_kill()
837 if stdout[-1:] == b'\n':
838 stdout = stdout[:-1]
839 return stdout
840 except Exception as e:
841 logger.warning(f'exception running find-generic-password: {error_to_str(e)}')
842 return None
843
844
845def _get_windows_v10_key(browser_root, logger):
846 path = _find_most_recently_used_file(browser_root, 'Local State', logger)
847 if path is None:
848 logger.error('could not find local state file')
849 return None
850 logger.debug(f'Found local state file at "{path}"')
851 with open(path, encoding='utf8') as f:
852 data = json.load(f)
853 try:
854 base64_key = data['os_crypt']['encrypted_key']
855 except KeyError:
856 logger.error('no encrypted key in Local State')
857 return None
858 encrypted_key = compat_b64decode(base64_key)
859 prefix = b'DPAPI'
860 if not encrypted_key.startswith(prefix):
861 logger.error('invalid key')
862 return None
863 return _decrypt_windows_dpapi(encrypted_key[len(prefix):], logger)
864
865
866def pbkdf2_sha1(password, salt, iterations, key_length):
867 return pbkdf2_hmac('sha1', password, salt, iterations, key_length)
868
869
870def _decrypt_aes_cbc(ciphertext, key, logger, initialization_vector=b' ' * 16):
871 plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
872 try:
873 return plaintext.decode()
874 except UnicodeDecodeError:
875 logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
876 return None
877
878
879def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
880 try:
881 plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce)
882 except ValueError:
883 logger.warning('failed to decrypt cookie (AES-GCM) because the MAC check failed. Possibly the key is wrong?', only_once=True)
884 return None
885
886 try:
887 return plaintext.decode()
888 except UnicodeDecodeError:
889 logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
890 return None
891
892
893def _decrypt_windows_dpapi(ciphertext, logger):
894 """
895 References:
896 - https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata
897 """
898 from ctypes.wintypes import DWORD
899
900 class DATA_BLOB(ctypes.Structure):
901 _fields_ = [('cbData', DWORD),
902 ('pbData', ctypes.POINTER(ctypes.c_char))]
903
904 buffer = ctypes.create_string_buffer(ciphertext)
905 blob_in = DATA_BLOB(ctypes.sizeof(buffer), buffer)
906 blob_out = DATA_BLOB()
907 ret = ctypes.windll.crypt32.CryptUnprotectData(
908 ctypes.byref(blob_in), # pDataIn
909 None, # ppszDataDescr: human readable description of pDataIn
910 None, # pOptionalEntropy: salt?
911 None, # pvReserved: must be NULL
912 None, # pPromptStruct: information about prompts to display
913 0, # dwFlags
914 ctypes.byref(blob_out) # pDataOut
915 )
916 if not ret:
917 logger.warning('failed to decrypt with DPAPI', only_once=True)
918 return None
919
920 result = ctypes.string_at(blob_out.pbData, blob_out.cbData)
921 ctypes.windll.kernel32.LocalFree(blob_out.pbData)
922 return result
923
924
925def _config_home():
926 return os.environ.get('XDG_CONFIG_HOME', os.path.expanduser('~/.config'))
927
928
929def _open_database_copy(database_path, tmpdir):
930 # cannot open sqlite databases if they are already in use (e.g. by the browser)
931 database_copy_path = os.path.join(tmpdir, 'temporary.sqlite')
932 shutil.copy(database_path, database_copy_path)
933 conn = sqlite3.connect(database_copy_path)
934 return conn.cursor()
935
936
937def _get_column_names(cursor, table_name):
938 table_info = cursor.execute(f'PRAGMA table_info({table_name})').fetchall()
939 return [row[1].decode() for row in table_info]
940
941
942def _find_most_recently_used_file(root, filename, logger):
943 # if there are multiple browser profiles, take the most recently used one
944 i, paths = 0, []
945 with _create_progress_bar(logger) as progress_bar:
946 for curr_root, dirs, files in os.walk(root):
947 for file in files:
948 i += 1
949 progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched')
950 if file == filename:
951 paths.append(os.path.join(curr_root, file))
952 return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime)
953
954
955def _merge_cookie_jars(jars):
956 output_jar = YoutubeDLCookieJar()
957 for jar in jars:
958 for cookie in jar:
959 output_jar.set_cookie(cookie)
960 if jar.filename is not None:
961 output_jar.filename = jar.filename
962 return output_jar
963
964
965def _is_path(value):
966 return os.path.sep in value
967
968
969def _parse_browser_specification(browser_name, profile=None, keyring=None):
970 if browser_name not in SUPPORTED_BROWSERS:
971 raise ValueError(f'unsupported browser: "{browser_name}"')
972 if keyring not in (None, *SUPPORTED_KEYRINGS):
973 raise ValueError(f'unsupported keyring: "{keyring}"')
974 if profile is not None and _is_path(profile):
975 profile = os.path.expanduser(profile)
976 return browser_name, profile, keyring