]> jfr.im git - yt-dlp.git/blame_incremental - yt_dlp/cookies.py
Add option `--download-sections` to download video partially
[yt-dlp.git] / yt_dlp / cookies.py
... / ...
CommitLineData
1import contextlib
2import ctypes
3import json
4import os
5import shutil
6import struct
7import subprocess
8import sys
9import tempfile
10import time
11from datetime import datetime, timedelta, timezone
12from enum import Enum, auto
13from hashlib import pbkdf2_hmac
14
15from .aes import (
16 aes_cbc_decrypt_bytes,
17 aes_gcm_decrypt_and_verify_bytes,
18 unpad_pkcs7,
19)
20from .compat import compat_b64decode, compat_cookiejar_Cookie
21from .dependencies import (
22 _SECRETSTORAGE_UNAVAILABLE_REASON,
23 secretstorage,
24 sqlite3,
25)
26from .minicurses import MultilinePrinter, QuietMultilinePrinter
27from .utils import Popen, YoutubeDLCookieJar, error_to_str, expand_path
28
29CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
30SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
31
32
33class YDLLogger:
34 def __init__(self, ydl=None):
35 self._ydl = ydl
36
37 def debug(self, message):
38 if self._ydl:
39 self._ydl.write_debug(message)
40
41 def info(self, message):
42 if self._ydl:
43 self._ydl.to_screen(f'[Cookies] {message}')
44
45 def warning(self, message, only_once=False):
46 if self._ydl:
47 self._ydl.report_warning(message, only_once)
48
49 def error(self, message):
50 if self._ydl:
51 self._ydl.report_error(message)
52
53 class ProgressBar(MultilinePrinter):
54 _DELAY, _timer = 0.1, 0
55
56 def print(self, message):
57 if time.time() - self._timer > self._DELAY:
58 self.print_at_line(f'[Cookies] {message}', 0)
59 self._timer = time.time()
60
61 def progress_bar(self):
62 """Return a context manager with a print method. (Optional)"""
63 # Do not print to files/pipes, loggers, or when --no-progress is used
64 if not self._ydl or self._ydl.params.get('noprogress') or self._ydl.params.get('logger'):
65 return
66 file = self._ydl._out_files.error
67 try:
68 if not file.isatty():
69 return
70 except BaseException:
71 return
72 return self.ProgressBar(file, preserve_output=False)
73
74
75def _create_progress_bar(logger):
76 if hasattr(logger, 'progress_bar'):
77 printer = logger.progress_bar()
78 if printer:
79 return printer
80 printer = QuietMultilinePrinter()
81 printer.print = lambda _: None
82 return printer
83
84
85def load_cookies(cookie_file, browser_specification, ydl):
86 cookie_jars = []
87 if browser_specification is not None:
88 browser_name, profile, keyring = _parse_browser_specification(*browser_specification)
89 cookie_jars.append(extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring))
90
91 if cookie_file is not None:
92 is_filename = YoutubeDLCookieJar.is_path(cookie_file)
93 if is_filename:
94 cookie_file = expand_path(cookie_file)
95
96 jar = YoutubeDLCookieJar(cookie_file)
97 if not is_filename or os.access(cookie_file, os.R_OK):
98 jar.load(ignore_discard=True, ignore_expires=True)
99 cookie_jars.append(jar)
100
101 return _merge_cookie_jars(cookie_jars)
102
103
104def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None):
105 if browser_name == 'firefox':
106 return _extract_firefox_cookies(profile, logger)
107 elif browser_name == 'safari':
108 return _extract_safari_cookies(profile, logger)
109 elif browser_name in CHROMIUM_BASED_BROWSERS:
110 return _extract_chrome_cookies(browser_name, profile, keyring, logger)
111 else:
112 raise ValueError(f'unknown browser: {browser_name}')
113
114
115def _extract_firefox_cookies(profile, logger):
116 logger.info('Extracting cookies from firefox')
117 if not sqlite3:
118 logger.warning('Cannot extract cookies from firefox without sqlite3 support. '
119 'Please use a python interpreter compiled with sqlite3 support')
120 return YoutubeDLCookieJar()
121
122 if profile is None:
123 search_root = _firefox_browser_dir()
124 elif _is_path(profile):
125 search_root = profile
126 else:
127 search_root = os.path.join(_firefox_browser_dir(), profile)
128
129 cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite', logger)
130 if cookie_database_path is None:
131 raise FileNotFoundError(f'could not find firefox cookies database in {search_root}')
132 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
133
134 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
135 cursor = None
136 try:
137 cursor = _open_database_copy(cookie_database_path, tmpdir)
138 cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
139 jar = YoutubeDLCookieJar()
140 with _create_progress_bar(logger) as progress_bar:
141 table = cursor.fetchall()
142 total_cookie_count = len(table)
143 for i, (host, name, value, path, expiry, is_secure) in enumerate(table):
144 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
145 cookie = compat_cookiejar_Cookie(
146 version=0, name=name, value=value, port=None, port_specified=False,
147 domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'),
148 path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False,
149 comment=None, comment_url=None, rest={})
150 jar.set_cookie(cookie)
151 logger.info(f'Extracted {len(jar)} cookies from firefox')
152 return jar
153 finally:
154 if cursor is not None:
155 cursor.connection.close()
156
157
158def _firefox_browser_dir():
159 if sys.platform in ('linux', 'linux2'):
160 return os.path.expanduser('~/.mozilla/firefox')
161 elif sys.platform == 'win32':
162 return os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles')
163 elif sys.platform == 'darwin':
164 return os.path.expanduser('~/Library/Application Support/Firefox')
165 else:
166 raise ValueError(f'unsupported platform: {sys.platform}')
167
168
169def _get_chromium_based_browser_settings(browser_name):
170 # https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md
171 if sys.platform in ('linux', 'linux2'):
172 config = _config_home()
173 browser_dir = {
174 'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'),
175 'chrome': os.path.join(config, 'google-chrome'),
176 'chromium': os.path.join(config, 'chromium'),
177 'edge': os.path.join(config, 'microsoft-edge'),
178 'opera': os.path.join(config, 'opera'),
179 'vivaldi': os.path.join(config, 'vivaldi'),
180 }[browser_name]
181
182 elif sys.platform == 'win32':
183 appdata_local = os.path.expandvars('%LOCALAPPDATA%')
184 appdata_roaming = os.path.expandvars('%APPDATA%')
185 browser_dir = {
186 'brave': os.path.join(appdata_local, R'BraveSoftware\Brave-Browser\User Data'),
187 'chrome': os.path.join(appdata_local, R'Google\Chrome\User Data'),
188 'chromium': os.path.join(appdata_local, R'Chromium\User Data'),
189 'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
190 'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
191 'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
192 }[browser_name]
193
194 elif sys.platform == 'darwin':
195 appdata = os.path.expanduser('~/Library/Application Support')
196 browser_dir = {
197 'brave': os.path.join(appdata, 'BraveSoftware/Brave-Browser'),
198 'chrome': os.path.join(appdata, 'Google/Chrome'),
199 'chromium': os.path.join(appdata, 'Chromium'),
200 'edge': os.path.join(appdata, 'Microsoft Edge'),
201 'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
202 'vivaldi': os.path.join(appdata, 'Vivaldi'),
203 }[browser_name]
204
205 else:
206 raise ValueError(f'unsupported platform: {sys.platform}')
207
208 # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
209 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
210 keyring_name = {
211 'brave': 'Brave',
212 'chrome': 'Chrome',
213 'chromium': 'Chromium',
214 'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
215 'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
216 'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
217 }[browser_name]
218
219 browsers_without_profiles = {'opera'}
220
221 return {
222 'browser_dir': browser_dir,
223 'keyring_name': keyring_name,
224 'supports_profiles': browser_name not in browsers_without_profiles
225 }
226
227
228def _extract_chrome_cookies(browser_name, profile, keyring, logger):
229 logger.info(f'Extracting cookies from {browser_name}')
230
231 if not sqlite3:
232 logger.warning(f'Cannot extract cookies from {browser_name} without sqlite3 support. '
233 'Please use a python interpreter compiled with sqlite3 support')
234 return YoutubeDLCookieJar()
235
236 config = _get_chromium_based_browser_settings(browser_name)
237
238 if profile is None:
239 search_root = config['browser_dir']
240 elif _is_path(profile):
241 search_root = profile
242 config['browser_dir'] = os.path.dirname(profile) if config['supports_profiles'] else profile
243 else:
244 if config['supports_profiles']:
245 search_root = os.path.join(config['browser_dir'], profile)
246 else:
247 logger.error(f'{browser_name} does not support profiles')
248 search_root = config['browser_dir']
249
250 cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies', logger)
251 if cookie_database_path is None:
252 raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"')
253 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
254
255 decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring)
256
257 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
258 cursor = None
259 try:
260 cursor = _open_database_copy(cookie_database_path, tmpdir)
261 cursor.connection.text_factory = bytes
262 column_names = _get_column_names(cursor, 'cookies')
263 secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
264 cursor.execute(f'SELECT host_key, name, value, encrypted_value, path, expires_utc, {secure_column} FROM cookies')
265 jar = YoutubeDLCookieJar()
266 failed_cookies = 0
267 unencrypted_cookies = 0
268 with _create_progress_bar(logger) as progress_bar:
269 table = cursor.fetchall()
270 total_cookie_count = len(table)
271 for i, line in enumerate(table):
272 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
273 is_encrypted, cookie = _process_chrome_cookie(decryptor, *line)
274 if not cookie:
275 failed_cookies += 1
276 continue
277 elif not is_encrypted:
278 unencrypted_cookies += 1
279 jar.set_cookie(cookie)
280 if failed_cookies > 0:
281 failed_message = f' ({failed_cookies} could not be decrypted)'
282 else:
283 failed_message = ''
284 logger.info(f'Extracted {len(jar)} cookies from {browser_name}{failed_message}')
285 counts = decryptor._cookie_counts.copy()
286 counts['unencrypted'] = unencrypted_cookies
287 logger.debug(f'cookie version breakdown: {counts}')
288 return jar
289 finally:
290 if cursor is not None:
291 cursor.connection.close()
292
293
294def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, path, expires_utc, is_secure):
295 host_key = host_key.decode()
296 name = name.decode()
297 value = value.decode()
298 path = path.decode()
299 is_encrypted = not value and encrypted_value
300
301 if is_encrypted:
302 value = decryptor.decrypt(encrypted_value)
303 if value is None:
304 return is_encrypted, None
305
306 return is_encrypted, compat_cookiejar_Cookie(
307 version=0, name=name, value=value, port=None, port_specified=False,
308 domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
309 path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False,
310 comment=None, comment_url=None, rest={})
311
312
313class ChromeCookieDecryptor:
314 """
315 Overview:
316
317 Linux:
318 - cookies are either v10 or v11
319 - v10: AES-CBC encrypted with a fixed key
320 - v11: AES-CBC encrypted with an OS protected key (keyring)
321 - v11 keys can be stored in various places depending on the activate desktop environment [2]
322
323 Mac:
324 - cookies are either v10 or not v10
325 - v10: AES-CBC encrypted with an OS protected key (keyring) and more key derivation iterations than linux
326 - not v10: 'old data' stored as plaintext
327
328 Windows:
329 - cookies are either v10 or not v10
330 - v10: AES-GCM encrypted with a key which is encrypted with DPAPI
331 - not v10: encrypted with DPAPI
332
333 Sources:
334 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/
335 - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_linux.cc
336 - KeyStorageLinux::CreateService
337 """
338
339 _cookie_counts = {}
340
341 def decrypt(self, encrypted_value):
342 raise NotImplementedError('Must be implemented by sub classes')
343
344
345def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None):
346 if sys.platform in ('linux', 'linux2'):
347 return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring)
348 elif sys.platform == 'darwin':
349 return MacChromeCookieDecryptor(browser_keyring_name, logger)
350 elif sys.platform == 'win32':
351 return WindowsChromeCookieDecryptor(browser_root, logger)
352 else:
353 raise NotImplementedError(f'Chrome cookie decryption is not supported on this platform: {sys.platform}')
354
355
356class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
357 def __init__(self, browser_keyring_name, logger, *, keyring=None):
358 self._logger = logger
359 self._v10_key = self.derive_key(b'peanuts')
360 password = _get_linux_keyring_password(browser_keyring_name, keyring, logger)
361 self._v11_key = None if password is None else self.derive_key(password)
362 self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
363
364 @staticmethod
365 def derive_key(password):
366 # values from
367 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_linux.cc
368 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16)
369
370 def decrypt(self, encrypted_value):
371 version = encrypted_value[:3]
372 ciphertext = encrypted_value[3:]
373
374 if version == b'v10':
375 self._cookie_counts['v10'] += 1
376 return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
377
378 elif version == b'v11':
379 self._cookie_counts['v11'] += 1
380 if self._v11_key is None:
381 self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
382 return None
383 return _decrypt_aes_cbc(ciphertext, self._v11_key, self._logger)
384
385 else:
386 self._cookie_counts['other'] += 1
387 return None
388
389
390class MacChromeCookieDecryptor(ChromeCookieDecryptor):
391 def __init__(self, browser_keyring_name, logger):
392 self._logger = logger
393 password = _get_mac_keyring_password(browser_keyring_name, logger)
394 self._v10_key = None if password is None else self.derive_key(password)
395 self._cookie_counts = {'v10': 0, 'other': 0}
396
397 @staticmethod
398 def derive_key(password):
399 # values from
400 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
401 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16)
402
403 def decrypt(self, encrypted_value):
404 version = encrypted_value[:3]
405 ciphertext = encrypted_value[3:]
406
407 if version == b'v10':
408 self._cookie_counts['v10'] += 1
409 if self._v10_key is None:
410 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
411 return None
412
413 return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
414
415 else:
416 self._cookie_counts['other'] += 1
417 # other prefixes are considered 'old data' which were stored as plaintext
418 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
419 return encrypted_value
420
421
422class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
423 def __init__(self, browser_root, logger):
424 self._logger = logger
425 self._v10_key = _get_windows_v10_key(browser_root, logger)
426 self._cookie_counts = {'v10': 0, 'other': 0}
427
428 def decrypt(self, encrypted_value):
429 version = encrypted_value[:3]
430 ciphertext = encrypted_value[3:]
431
432 if version == b'v10':
433 self._cookie_counts['v10'] += 1
434 if self._v10_key is None:
435 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
436 return None
437
438 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
439 # kNonceLength
440 nonce_length = 96 // 8
441 # boringssl
442 # EVP_AEAD_AES_GCM_TAG_LEN
443 authentication_tag_length = 16
444
445 raw_ciphertext = ciphertext
446 nonce = raw_ciphertext[:nonce_length]
447 ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length]
448 authentication_tag = raw_ciphertext[-authentication_tag_length:]
449
450 return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger)
451
452 else:
453 self._cookie_counts['other'] += 1
454 # any other prefix means the data is DPAPI encrypted
455 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
456 return _decrypt_windows_dpapi(encrypted_value, self._logger).decode()
457
458
459def _extract_safari_cookies(profile, logger):
460 if profile is not None:
461 logger.error('safari does not support profiles')
462 if sys.platform != 'darwin':
463 raise ValueError(f'unsupported platform: {sys.platform}')
464
465 cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')
466
467 if not os.path.isfile(cookies_path):
468 logger.debug('Trying secondary cookie location')
469 cookies_path = os.path.expanduser('~/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies')
470 if not os.path.isfile(cookies_path):
471 raise FileNotFoundError('could not find safari cookies database')
472
473 with open(cookies_path, 'rb') as f:
474 cookies_data = f.read()
475
476 jar = parse_safari_cookies(cookies_data, logger=logger)
477 logger.info(f'Extracted {len(jar)} cookies from safari')
478 return jar
479
480
481class ParserError(Exception):
482 pass
483
484
485class DataParser:
486 def __init__(self, data, logger):
487 self._data = data
488 self.cursor = 0
489 self._logger = logger
490
491 def read_bytes(self, num_bytes):
492 if num_bytes < 0:
493 raise ParserError(f'invalid read of {num_bytes} bytes')
494 end = self.cursor + num_bytes
495 if end > len(self._data):
496 raise ParserError('reached end of input')
497 data = self._data[self.cursor:end]
498 self.cursor = end
499 return data
500
501 def expect_bytes(self, expected_value, message):
502 value = self.read_bytes(len(expected_value))
503 if value != expected_value:
504 raise ParserError(f'unexpected value: {value} != {expected_value} ({message})')
505
506 def read_uint(self, big_endian=False):
507 data_format = '>I' if big_endian else '<I'
508 return struct.unpack(data_format, self.read_bytes(4))[0]
509
510 def read_double(self, big_endian=False):
511 data_format = '>d' if big_endian else '<d'
512 return struct.unpack(data_format, self.read_bytes(8))[0]
513
514 def read_cstring(self):
515 buffer = []
516 while True:
517 c = self.read_bytes(1)
518 if c == b'\x00':
519 return b''.join(buffer).decode()
520 else:
521 buffer.append(c)
522
523 def skip(self, num_bytes, description='unknown'):
524 if num_bytes > 0:
525 self._logger.debug(f'skipping {num_bytes} bytes ({description}): {self.read_bytes(num_bytes)!r}')
526 elif num_bytes < 0:
527 raise ParserError(f'invalid skip of {num_bytes} bytes')
528
529 def skip_to(self, offset, description='unknown'):
530 self.skip(offset - self.cursor, description)
531
532 def skip_to_end(self, description='unknown'):
533 self.skip_to(len(self._data), description)
534
535
536def _mac_absolute_time_to_posix(timestamp):
537 return int((datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc) + timedelta(seconds=timestamp)).timestamp())
538
539
540def _parse_safari_cookies_header(data, logger):
541 p = DataParser(data, logger)
542 p.expect_bytes(b'cook', 'database signature')
543 number_of_pages = p.read_uint(big_endian=True)
544 page_sizes = [p.read_uint(big_endian=True) for _ in range(number_of_pages)]
545 return page_sizes, p.cursor
546
547
548def _parse_safari_cookies_page(data, jar, logger):
549 p = DataParser(data, logger)
550 p.expect_bytes(b'\x00\x00\x01\x00', 'page signature')
551 number_of_cookies = p.read_uint()
552 record_offsets = [p.read_uint() for _ in range(number_of_cookies)]
553 if number_of_cookies == 0:
554 logger.debug(f'a cookies page of size {len(data)} has no cookies')
555 return
556
557 p.skip_to(record_offsets[0], 'unknown page header field')
558
559 with _create_progress_bar(logger) as progress_bar:
560 for i, record_offset in enumerate(record_offsets):
561 progress_bar.print(f'Loading cookie {i: 6d}/{number_of_cookies: 6d}')
562 p.skip_to(record_offset, 'space between records')
563 record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger)
564 p.read_bytes(record_length)
565 p.skip_to_end('space in between pages')
566
567
568def _parse_safari_cookies_record(data, jar, logger):
569 p = DataParser(data, logger)
570 record_size = p.read_uint()
571 p.skip(4, 'unknown record field 1')
572 flags = p.read_uint()
573 is_secure = bool(flags & 0x0001)
574 p.skip(4, 'unknown record field 2')
575 domain_offset = p.read_uint()
576 name_offset = p.read_uint()
577 path_offset = p.read_uint()
578 value_offset = p.read_uint()
579 p.skip(8, 'unknown record field 3')
580 expiration_date = _mac_absolute_time_to_posix(p.read_double())
581 _creation_date = _mac_absolute_time_to_posix(p.read_double()) # noqa: F841
582
583 try:
584 p.skip_to(domain_offset)
585 domain = p.read_cstring()
586
587 p.skip_to(name_offset)
588 name = p.read_cstring()
589
590 p.skip_to(path_offset)
591 path = p.read_cstring()
592
593 p.skip_to(value_offset)
594 value = p.read_cstring()
595 except UnicodeDecodeError:
596 logger.warning('failed to parse Safari cookie because UTF-8 decoding failed', only_once=True)
597 return record_size
598
599 p.skip_to(record_size, 'space at the end of the record')
600
601 cookie = compat_cookiejar_Cookie(
602 version=0, name=name, value=value, port=None, port_specified=False,
603 domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'),
604 path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False,
605 comment=None, comment_url=None, rest={})
606 jar.set_cookie(cookie)
607 return record_size
608
609
610def parse_safari_cookies(data, jar=None, logger=YDLLogger()):
611 """
612 References:
613 - https://github.com/libyal/dtformats/blob/main/documentation/Safari%20Cookies.asciidoc
614 - this data appears to be out of date but the important parts of the database structure is the same
615 - there are a few bytes here and there which are skipped during parsing
616 """
617 if jar is None:
618 jar = YoutubeDLCookieJar()
619 page_sizes, body_start = _parse_safari_cookies_header(data, logger)
620 p = DataParser(data[body_start:], logger)
621 for page_size in page_sizes:
622 _parse_safari_cookies_page(p.read_bytes(page_size), jar, logger)
623 p.skip_to_end('footer')
624 return jar
625
626
627class _LinuxDesktopEnvironment(Enum):
628 """
629 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.h
630 DesktopEnvironment
631 """
632 OTHER = auto()
633 CINNAMON = auto()
634 GNOME = auto()
635 KDE = auto()
636 PANTHEON = auto()
637 UNITY = auto()
638 XFCE = auto()
639
640
641class _LinuxKeyring(Enum):
642 """
643 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.h
644 SelectedLinuxBackend
645 """
646 KWALLET = auto()
647 GNOMEKEYRING = auto()
648 BASICTEXT = auto()
649
650
651SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys()
652
653
654def _get_linux_desktop_environment(env):
655 """
656 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc
657 GetDesktopEnvironment
658 """
659 xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
660 desktop_session = env.get('DESKTOP_SESSION', None)
661 if xdg_current_desktop is not None:
662 xdg_current_desktop = xdg_current_desktop.split(':')[0].strip()
663
664 if xdg_current_desktop == 'Unity':
665 if desktop_session is not None and 'gnome-fallback' in desktop_session:
666 return _LinuxDesktopEnvironment.GNOME
667 else:
668 return _LinuxDesktopEnvironment.UNITY
669 elif xdg_current_desktop == 'GNOME':
670 return _LinuxDesktopEnvironment.GNOME
671 elif xdg_current_desktop == 'X-Cinnamon':
672 return _LinuxDesktopEnvironment.CINNAMON
673 elif xdg_current_desktop == 'KDE':
674 return _LinuxDesktopEnvironment.KDE
675 elif xdg_current_desktop == 'Pantheon':
676 return _LinuxDesktopEnvironment.PANTHEON
677 elif xdg_current_desktop == 'XFCE':
678 return _LinuxDesktopEnvironment.XFCE
679 elif desktop_session is not None:
680 if desktop_session in ('mate', 'gnome'):
681 return _LinuxDesktopEnvironment.GNOME
682 elif 'kde' in desktop_session:
683 return _LinuxDesktopEnvironment.KDE
684 elif 'xfce' in desktop_session:
685 return _LinuxDesktopEnvironment.XFCE
686 else:
687 if 'GNOME_DESKTOP_SESSION_ID' in env:
688 return _LinuxDesktopEnvironment.GNOME
689 elif 'KDE_FULL_SESSION' in env:
690 return _LinuxDesktopEnvironment.KDE
691 return _LinuxDesktopEnvironment.OTHER
692
693
694def _choose_linux_keyring(logger):
695 """
696 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.cc
697 SelectBackend
698 """
699 desktop_environment = _get_linux_desktop_environment(os.environ)
700 logger.debug(f'detected desktop environment: {desktop_environment.name}')
701 if desktop_environment == _LinuxDesktopEnvironment.KDE:
702 linux_keyring = _LinuxKeyring.KWALLET
703 elif desktop_environment == _LinuxDesktopEnvironment.OTHER:
704 linux_keyring = _LinuxKeyring.BASICTEXT
705 else:
706 linux_keyring = _LinuxKeyring.GNOMEKEYRING
707 return linux_keyring
708
709
710def _get_kwallet_network_wallet(logger):
711 """ The name of the wallet used to store network passwords.
712
713 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/kwallet_dbus.cc
714 KWalletDBus::NetworkWallet
715 which does a dbus call to the following function:
716 https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html
717 Wallet::NetworkWallet
718 """
719 default_wallet = 'kdewallet'
720 try:
721 proc = Popen([
722 'dbus-send', '--session', '--print-reply=literal',
723 '--dest=org.kde.kwalletd5',
724 '/modules/kwalletd5',
725 'org.kde.KWallet.networkWallet'
726 ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
727
728 stdout, stderr = proc.communicate_or_kill()
729 if proc.returncode != 0:
730 logger.warning('failed to read NetworkWallet')
731 return default_wallet
732 else:
733 network_wallet = stdout.decode().strip()
734 logger.debug(f'NetworkWallet = "{network_wallet}"')
735 return network_wallet
736 except Exception as e:
737 logger.warning(f'exception while obtaining NetworkWallet: {e}')
738 return default_wallet
739
740
741def _get_kwallet_password(browser_keyring_name, logger):
742 logger.debug('using kwallet-query to obtain password from kwallet')
743
744 if shutil.which('kwallet-query') is None:
745 logger.error('kwallet-query command not found. KWallet and kwallet-query '
746 'must be installed to read from KWallet. kwallet-query should be'
747 'included in the kwallet package for your distribution')
748 return b''
749
750 network_wallet = _get_kwallet_network_wallet(logger)
751
752 try:
753 proc = Popen([
754 'kwallet-query',
755 '--read-password', f'{browser_keyring_name} Safe Storage',
756 '--folder', f'{browser_keyring_name} Keys',
757 network_wallet
758 ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
759
760 stdout, stderr = proc.communicate_or_kill()
761 if proc.returncode != 0:
762 logger.error(f'kwallet-query failed with return code {proc.returncode}. Please consult '
763 'the kwallet-query man page for details')
764 return b''
765 else:
766 if stdout.lower().startswith(b'failed to read'):
767 logger.debug('failed to read password from kwallet. Using empty string instead')
768 # this sometimes occurs in KDE because chrome does not check hasEntry and instead
769 # just tries to read the value (which kwallet returns "") whereas kwallet-query
770 # checks hasEntry. To verify this:
771 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
772 # while starting chrome.
773 # this may be a bug as the intended behaviour is to generate a random password and store
774 # it, but that doesn't matter here.
775 return b''
776 else:
777 logger.debug('password found')
778 if stdout[-1:] == b'\n':
779 stdout = stdout[:-1]
780 return stdout
781 except Exception as e:
782 logger.warning(f'exception running kwallet-query: {error_to_str(e)}')
783 return b''
784
785
786def _get_gnome_keyring_password(browser_keyring_name, logger):
787 if not secretstorage:
788 logger.error(f'secretstorage not available {_SECRETSTORAGE_UNAVAILABLE_REASON}')
789 return b''
790 # the Gnome keyring does not seem to organise keys in the same way as KWallet,
791 # using `dbus-monitor` during startup, it can be observed that chromium lists all keys
792 # and presumably searches for its key in the list. It appears that we must do the same.
793 # https://github.com/jaraco/keyring/issues/556
794 with contextlib.closing(secretstorage.dbus_init()) as con:
795 col = secretstorage.get_default_collection(con)
796 for item in col.get_all_items():
797 if item.get_label() == f'{browser_keyring_name} Safe Storage':
798 return item.get_secret()
799 else:
800 logger.error('failed to read from keyring')
801 return b''
802
803
804def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
805 # note: chrome/chromium can be run with the following flags to determine which keyring backend
806 # it has chosen to use
807 # chromium --enable-logging=stderr --v=1 2>&1 | grep key_storage_
808 # Chromium supports a flag: --password-store=<basic|gnome|kwallet> so the automatic detection
809 # will not be sufficient in all cases.
810
811 keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger)
812 logger.debug(f'Chosen keyring: {keyring.name}')
813
814 if keyring == _LinuxKeyring.KWALLET:
815 return _get_kwallet_password(browser_keyring_name, logger)
816 elif keyring == _LinuxKeyring.GNOMEKEYRING:
817 return _get_gnome_keyring_password(browser_keyring_name, logger)
818 elif keyring == _LinuxKeyring.BASICTEXT:
819 # when basic text is chosen, all cookies are stored as v10 (so no keyring password is required)
820 return None
821 assert False, f'Unknown keyring {keyring}'
822
823
824def _get_mac_keyring_password(browser_keyring_name, logger):
825 logger.debug('using find-generic-password to obtain password from OSX keychain')
826 try:
827 proc = Popen(
828 ['security', 'find-generic-password',
829 '-w', # write password to stdout
830 '-a', browser_keyring_name, # match 'account'
831 '-s', f'{browser_keyring_name} Safe Storage'], # match 'service'
832 stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
833
834 stdout, stderr = proc.communicate_or_kill()
835 if stdout[-1:] == b'\n':
836 stdout = stdout[:-1]
837 return stdout
838 except Exception as e:
839 logger.warning(f'exception running find-generic-password: {error_to_str(e)}')
840 return None
841
842
843def _get_windows_v10_key(browser_root, logger):
844 path = _find_most_recently_used_file(browser_root, 'Local State', logger)
845 if path is None:
846 logger.error('could not find local state file')
847 return None
848 logger.debug(f'Found local state file at "{path}"')
849 with open(path, encoding='utf8') as f:
850 data = json.load(f)
851 try:
852 base64_key = data['os_crypt']['encrypted_key']
853 except KeyError:
854 logger.error('no encrypted key in Local State')
855 return None
856 encrypted_key = compat_b64decode(base64_key)
857 prefix = b'DPAPI'
858 if not encrypted_key.startswith(prefix):
859 logger.error('invalid key')
860 return None
861 return _decrypt_windows_dpapi(encrypted_key[len(prefix):], logger)
862
863
864def pbkdf2_sha1(password, salt, iterations, key_length):
865 return pbkdf2_hmac('sha1', password, salt, iterations, key_length)
866
867
868def _decrypt_aes_cbc(ciphertext, key, logger, initialization_vector=b' ' * 16):
869 plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
870 try:
871 return plaintext.decode()
872 except UnicodeDecodeError:
873 logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
874 return None
875
876
877def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
878 try:
879 plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce)
880 except ValueError:
881 logger.warning('failed to decrypt cookie (AES-GCM) because the MAC check failed. Possibly the key is wrong?', only_once=True)
882 return None
883
884 try:
885 return plaintext.decode()
886 except UnicodeDecodeError:
887 logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
888 return None
889
890
891def _decrypt_windows_dpapi(ciphertext, logger):
892 """
893 References:
894 - https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata
895 """
896 from ctypes.wintypes import DWORD
897
898 class DATA_BLOB(ctypes.Structure):
899 _fields_ = [('cbData', DWORD),
900 ('pbData', ctypes.POINTER(ctypes.c_char))]
901
902 buffer = ctypes.create_string_buffer(ciphertext)
903 blob_in = DATA_BLOB(ctypes.sizeof(buffer), buffer)
904 blob_out = DATA_BLOB()
905 ret = ctypes.windll.crypt32.CryptUnprotectData(
906 ctypes.byref(blob_in), # pDataIn
907 None, # ppszDataDescr: human readable description of pDataIn
908 None, # pOptionalEntropy: salt?
909 None, # pvReserved: must be NULL
910 None, # pPromptStruct: information about prompts to display
911 0, # dwFlags
912 ctypes.byref(blob_out) # pDataOut
913 )
914 if not ret:
915 logger.warning('failed to decrypt with DPAPI', only_once=True)
916 return None
917
918 result = ctypes.string_at(blob_out.pbData, blob_out.cbData)
919 ctypes.windll.kernel32.LocalFree(blob_out.pbData)
920 return result
921
922
923def _config_home():
924 return os.environ.get('XDG_CONFIG_HOME', os.path.expanduser('~/.config'))
925
926
927def _open_database_copy(database_path, tmpdir):
928 # cannot open sqlite databases if they are already in use (e.g. by the browser)
929 database_copy_path = os.path.join(tmpdir, 'temporary.sqlite')
930 shutil.copy(database_path, database_copy_path)
931 conn = sqlite3.connect(database_copy_path)
932 return conn.cursor()
933
934
935def _get_column_names(cursor, table_name):
936 table_info = cursor.execute(f'PRAGMA table_info({table_name})').fetchall()
937 return [row[1].decode() for row in table_info]
938
939
940def _find_most_recently_used_file(root, filename, logger):
941 # if there are multiple browser profiles, take the most recently used one
942 i, paths = 0, []
943 with _create_progress_bar(logger) as progress_bar:
944 for curr_root, dirs, files in os.walk(root):
945 for file in files:
946 i += 1
947 progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched')
948 if file == filename:
949 paths.append(os.path.join(curr_root, file))
950 return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime)
951
952
953def _merge_cookie_jars(jars):
954 output_jar = YoutubeDLCookieJar()
955 for jar in jars:
956 for cookie in jar:
957 output_jar.set_cookie(cookie)
958 if jar.filename is not None:
959 output_jar.filename = jar.filename
960 return output_jar
961
962
963def _is_path(value):
964 return os.path.sep in value
965
966
967def _parse_browser_specification(browser_name, profile=None, keyring=None):
968 if browser_name not in SUPPORTED_BROWSERS:
969 raise ValueError(f'unsupported browser: "{browser_name}"')
970 if keyring not in (None, *SUPPORTED_KEYRINGS):
971 raise ValueError(f'unsupported keyring: "{keyring}"')
972 if profile is not None and _is_path(profile):
973 profile = os.path.expanduser(profile)
974 return browser_name, profile, keyring