]> jfr.im git - yt-dlp.git/blob - yt_dlp/cookies.py
[extractor] Framework for embed detection (#4307)
[yt-dlp.git] / yt_dlp / cookies.py
1 import base64
2 import contextlib
3 import ctypes
4 import http.cookiejar
5 import json
6 import os
7 import shutil
8 import struct
9 import subprocess
10 import sys
11 import tempfile
12 import time
13 from datetime import datetime, timedelta, timezone
14 from enum import Enum, auto
15 from hashlib import pbkdf2_hmac
16
17 from .aes import (
18 aes_cbc_decrypt_bytes,
19 aes_gcm_decrypt_and_verify_bytes,
20 unpad_pkcs7,
21 )
22 from .dependencies import (
23 _SECRETSTORAGE_UNAVAILABLE_REASON,
24 secretstorage,
25 sqlite3,
26 )
27 from .minicurses import MultilinePrinter, QuietMultilinePrinter
28 from .utils import Popen, YoutubeDLCookieJar, error_to_str, expand_path
29
30 CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
31 SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
32
33
34 class YDLLogger:
35 def __init__(self, ydl=None):
36 self._ydl = ydl
37
38 def debug(self, message):
39 if self._ydl:
40 self._ydl.write_debug(message)
41
42 def info(self, message):
43 if self._ydl:
44 self._ydl.to_screen(f'[Cookies] {message}')
45
46 def warning(self, message, only_once=False):
47 if self._ydl:
48 self._ydl.report_warning(message, only_once)
49
50 def error(self, message):
51 if self._ydl:
52 self._ydl.report_error(message)
53
54 class ProgressBar(MultilinePrinter):
55 _DELAY, _timer = 0.1, 0
56
57 def print(self, message):
58 if time.time() - self._timer > self._DELAY:
59 self.print_at_line(f'[Cookies] {message}', 0)
60 self._timer = time.time()
61
62 def progress_bar(self):
63 """Return a context manager with a print method. (Optional)"""
64 # Do not print to files/pipes, loggers, or when --no-progress is used
65 if not self._ydl or self._ydl.params.get('noprogress') or self._ydl.params.get('logger'):
66 return
67 file = self._ydl._out_files.error
68 try:
69 if not file.isatty():
70 return
71 except BaseException:
72 return
73 return self.ProgressBar(file, preserve_output=False)
74
75
76 def _create_progress_bar(logger):
77 if hasattr(logger, 'progress_bar'):
78 printer = logger.progress_bar()
79 if printer:
80 return printer
81 printer = QuietMultilinePrinter()
82 printer.print = lambda _: None
83 return printer
84
85
86 def load_cookies(cookie_file, browser_specification, ydl):
87 cookie_jars = []
88 if browser_specification is not None:
89 browser_name, profile, keyring = _parse_browser_specification(*browser_specification)
90 cookie_jars.append(extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring))
91
92 if cookie_file is not None:
93 is_filename = YoutubeDLCookieJar.is_path(cookie_file)
94 if is_filename:
95 cookie_file = expand_path(cookie_file)
96
97 jar = YoutubeDLCookieJar(cookie_file)
98 if not is_filename or os.access(cookie_file, os.R_OK):
99 jar.load(ignore_discard=True, ignore_expires=True)
100 cookie_jars.append(jar)
101
102 return _merge_cookie_jars(cookie_jars)
103
104
105 def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None):
106 if browser_name == 'firefox':
107 return _extract_firefox_cookies(profile, logger)
108 elif browser_name == 'safari':
109 return _extract_safari_cookies(profile, logger)
110 elif browser_name in CHROMIUM_BASED_BROWSERS:
111 return _extract_chrome_cookies(browser_name, profile, keyring, logger)
112 else:
113 raise ValueError(f'unknown browser: {browser_name}')
114
115
116 def _extract_firefox_cookies(profile, logger):
117 logger.info('Extracting cookies from firefox')
118 if not sqlite3:
119 logger.warning('Cannot extract cookies from firefox without sqlite3 support. '
120 'Please use a python interpreter compiled with sqlite3 support')
121 return YoutubeDLCookieJar()
122
123 if profile is None:
124 search_root = _firefox_browser_dir()
125 elif _is_path(profile):
126 search_root = profile
127 else:
128 search_root = os.path.join(_firefox_browser_dir(), profile)
129
130 cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite', logger)
131 if cookie_database_path is None:
132 raise FileNotFoundError(f'could not find firefox cookies database in {search_root}')
133 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
134
135 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
136 cursor = None
137 try:
138 cursor = _open_database_copy(cookie_database_path, tmpdir)
139 cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
140 jar = YoutubeDLCookieJar()
141 with _create_progress_bar(logger) as progress_bar:
142 table = cursor.fetchall()
143 total_cookie_count = len(table)
144 for i, (host, name, value, path, expiry, is_secure) in enumerate(table):
145 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
146 cookie = http.cookiejar.Cookie(
147 version=0, name=name, value=value, port=None, port_specified=False,
148 domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'),
149 path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False,
150 comment=None, comment_url=None, rest={})
151 jar.set_cookie(cookie)
152 logger.info(f'Extracted {len(jar)} cookies from firefox')
153 return jar
154 finally:
155 if cursor is not None:
156 cursor.connection.close()
157
158
159 def _firefox_browser_dir():
160 if sys.platform in ('cygwin', 'win32'):
161 return os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles')
162 elif sys.platform == 'darwin':
163 return os.path.expanduser('~/Library/Application Support/Firefox')
164 return os.path.expanduser('~/.mozilla/firefox')
165
166
167 def _get_chromium_based_browser_settings(browser_name):
168 # https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md
169 if sys.platform in ('cygwin', 'win32'):
170 appdata_local = os.path.expandvars('%LOCALAPPDATA%')
171 appdata_roaming = os.path.expandvars('%APPDATA%')
172 browser_dir = {
173 'brave': os.path.join(appdata_local, R'BraveSoftware\Brave-Browser\User Data'),
174 'chrome': os.path.join(appdata_local, R'Google\Chrome\User Data'),
175 'chromium': os.path.join(appdata_local, R'Chromium\User Data'),
176 'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
177 'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
178 'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
179 }[browser_name]
180
181 elif sys.platform == 'darwin':
182 appdata = os.path.expanduser('~/Library/Application Support')
183 browser_dir = {
184 'brave': os.path.join(appdata, 'BraveSoftware/Brave-Browser'),
185 'chrome': os.path.join(appdata, 'Google/Chrome'),
186 'chromium': os.path.join(appdata, 'Chromium'),
187 'edge': os.path.join(appdata, 'Microsoft Edge'),
188 'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
189 'vivaldi': os.path.join(appdata, 'Vivaldi'),
190 }[browser_name]
191
192 else:
193 config = _config_home()
194 browser_dir = {
195 'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'),
196 'chrome': os.path.join(config, 'google-chrome'),
197 'chromium': os.path.join(config, 'chromium'),
198 'edge': os.path.join(config, 'microsoft-edge'),
199 'opera': os.path.join(config, 'opera'),
200 'vivaldi': os.path.join(config, 'vivaldi'),
201 }[browser_name]
202
203 # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
204 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
205 keyring_name = {
206 'brave': 'Brave',
207 'chrome': 'Chrome',
208 'chromium': 'Chromium',
209 'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
210 'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
211 'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
212 }[browser_name]
213
214 browsers_without_profiles = {'opera'}
215
216 return {
217 'browser_dir': browser_dir,
218 'keyring_name': keyring_name,
219 'supports_profiles': browser_name not in browsers_without_profiles
220 }
221
222
223 def _extract_chrome_cookies(browser_name, profile, keyring, logger):
224 logger.info(f'Extracting cookies from {browser_name}')
225
226 if not sqlite3:
227 logger.warning(f'Cannot extract cookies from {browser_name} without sqlite3 support. '
228 'Please use a python interpreter compiled with sqlite3 support')
229 return YoutubeDLCookieJar()
230
231 config = _get_chromium_based_browser_settings(browser_name)
232
233 if profile is None:
234 search_root = config['browser_dir']
235 elif _is_path(profile):
236 search_root = profile
237 config['browser_dir'] = os.path.dirname(profile) if config['supports_profiles'] else profile
238 else:
239 if config['supports_profiles']:
240 search_root = os.path.join(config['browser_dir'], profile)
241 else:
242 logger.error(f'{browser_name} does not support profiles')
243 search_root = config['browser_dir']
244
245 cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies', logger)
246 if cookie_database_path is None:
247 raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"')
248 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
249
250 decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring)
251
252 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
253 cursor = None
254 try:
255 cursor = _open_database_copy(cookie_database_path, tmpdir)
256 cursor.connection.text_factory = bytes
257 column_names = _get_column_names(cursor, 'cookies')
258 secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
259 cursor.execute(f'SELECT host_key, name, value, encrypted_value, path, expires_utc, {secure_column} FROM cookies')
260 jar = YoutubeDLCookieJar()
261 failed_cookies = 0
262 unencrypted_cookies = 0
263 with _create_progress_bar(logger) as progress_bar:
264 table = cursor.fetchall()
265 total_cookie_count = len(table)
266 for i, line in enumerate(table):
267 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
268 is_encrypted, cookie = _process_chrome_cookie(decryptor, *line)
269 if not cookie:
270 failed_cookies += 1
271 continue
272 elif not is_encrypted:
273 unencrypted_cookies += 1
274 jar.set_cookie(cookie)
275 if failed_cookies > 0:
276 failed_message = f' ({failed_cookies} could not be decrypted)'
277 else:
278 failed_message = ''
279 logger.info(f'Extracted {len(jar)} cookies from {browser_name}{failed_message}')
280 counts = decryptor._cookie_counts.copy()
281 counts['unencrypted'] = unencrypted_cookies
282 logger.debug(f'cookie version breakdown: {counts}')
283 return jar
284 finally:
285 if cursor is not None:
286 cursor.connection.close()
287
288
289 def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, path, expires_utc, is_secure):
290 host_key = host_key.decode()
291 name = name.decode()
292 value = value.decode()
293 path = path.decode()
294 is_encrypted = not value and encrypted_value
295
296 if is_encrypted:
297 value = decryptor.decrypt(encrypted_value)
298 if value is None:
299 return is_encrypted, None
300
301 return is_encrypted, http.cookiejar.Cookie(
302 version=0, name=name, value=value, port=None, port_specified=False,
303 domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
304 path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False,
305 comment=None, comment_url=None, rest={})
306
307
308 class ChromeCookieDecryptor:
309 """
310 Overview:
311
312 Linux:
313 - cookies are either v10 or v11
314 - v10: AES-CBC encrypted with a fixed key
315 - v11: AES-CBC encrypted with an OS protected key (keyring)
316 - v11 keys can be stored in various places depending on the activate desktop environment [2]
317
318 Mac:
319 - cookies are either v10 or not v10
320 - v10: AES-CBC encrypted with an OS protected key (keyring) and more key derivation iterations than linux
321 - not v10: 'old data' stored as plaintext
322
323 Windows:
324 - cookies are either v10 or not v10
325 - v10: AES-GCM encrypted with a key which is encrypted with DPAPI
326 - not v10: encrypted with DPAPI
327
328 Sources:
329 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/
330 - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_linux.cc
331 - KeyStorageLinux::CreateService
332 """
333
334 _cookie_counts = {}
335
336 def decrypt(self, encrypted_value):
337 raise NotImplementedError('Must be implemented by sub classes')
338
339
340 def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None):
341 if sys.platform == 'darwin':
342 return MacChromeCookieDecryptor(browser_keyring_name, logger)
343 elif sys.platform in ('win32', 'cygwin'):
344 return WindowsChromeCookieDecryptor(browser_root, logger)
345 return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring)
346
347
348 class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
349 def __init__(self, browser_keyring_name, logger, *, keyring=None):
350 self._logger = logger
351 self._v10_key = self.derive_key(b'peanuts')
352 password = _get_linux_keyring_password(browser_keyring_name, keyring, logger)
353 self._v11_key = None if password is None else self.derive_key(password)
354 self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
355
356 @staticmethod
357 def derive_key(password):
358 # values from
359 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_linux.cc
360 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16)
361
362 def decrypt(self, encrypted_value):
363 version = encrypted_value[:3]
364 ciphertext = encrypted_value[3:]
365
366 if version == b'v10':
367 self._cookie_counts['v10'] += 1
368 return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
369
370 elif version == b'v11':
371 self._cookie_counts['v11'] += 1
372 if self._v11_key is None:
373 self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
374 return None
375 return _decrypt_aes_cbc(ciphertext, self._v11_key, self._logger)
376
377 else:
378 self._cookie_counts['other'] += 1
379 return None
380
381
382 class MacChromeCookieDecryptor(ChromeCookieDecryptor):
383 def __init__(self, browser_keyring_name, logger):
384 self._logger = logger
385 password = _get_mac_keyring_password(browser_keyring_name, logger)
386 self._v10_key = None if password is None else self.derive_key(password)
387 self._cookie_counts = {'v10': 0, 'other': 0}
388
389 @staticmethod
390 def derive_key(password):
391 # values from
392 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
393 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16)
394
395 def decrypt(self, encrypted_value):
396 version = encrypted_value[:3]
397 ciphertext = encrypted_value[3:]
398
399 if version == b'v10':
400 self._cookie_counts['v10'] += 1
401 if self._v10_key is None:
402 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
403 return None
404
405 return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
406
407 else:
408 self._cookie_counts['other'] += 1
409 # other prefixes are considered 'old data' which were stored as plaintext
410 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
411 return encrypted_value
412
413
414 class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
415 def __init__(self, browser_root, logger):
416 self._logger = logger
417 self._v10_key = _get_windows_v10_key(browser_root, logger)
418 self._cookie_counts = {'v10': 0, 'other': 0}
419
420 def decrypt(self, encrypted_value):
421 version = encrypted_value[:3]
422 ciphertext = encrypted_value[3:]
423
424 if version == b'v10':
425 self._cookie_counts['v10'] += 1
426 if self._v10_key is None:
427 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
428 return None
429
430 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
431 # kNonceLength
432 nonce_length = 96 // 8
433 # boringssl
434 # EVP_AEAD_AES_GCM_TAG_LEN
435 authentication_tag_length = 16
436
437 raw_ciphertext = ciphertext
438 nonce = raw_ciphertext[:nonce_length]
439 ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length]
440 authentication_tag = raw_ciphertext[-authentication_tag_length:]
441
442 return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger)
443
444 else:
445 self._cookie_counts['other'] += 1
446 # any other prefix means the data is DPAPI encrypted
447 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
448 return _decrypt_windows_dpapi(encrypted_value, self._logger).decode()
449
450
451 def _extract_safari_cookies(profile, logger):
452 if profile is not None:
453 logger.error('safari does not support profiles')
454 if sys.platform != 'darwin':
455 raise ValueError(f'unsupported platform: {sys.platform}')
456
457 cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')
458
459 if not os.path.isfile(cookies_path):
460 logger.debug('Trying secondary cookie location')
461 cookies_path = os.path.expanduser('~/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies')
462 if not os.path.isfile(cookies_path):
463 raise FileNotFoundError('could not find safari cookies database')
464
465 with open(cookies_path, 'rb') as f:
466 cookies_data = f.read()
467
468 jar = parse_safari_cookies(cookies_data, logger=logger)
469 logger.info(f'Extracted {len(jar)} cookies from safari')
470 return jar
471
472
473 class ParserError(Exception):
474 pass
475
476
477 class DataParser:
478 def __init__(self, data, logger):
479 self._data = data
480 self.cursor = 0
481 self._logger = logger
482
483 def read_bytes(self, num_bytes):
484 if num_bytes < 0:
485 raise ParserError(f'invalid read of {num_bytes} bytes')
486 end = self.cursor + num_bytes
487 if end > len(self._data):
488 raise ParserError('reached end of input')
489 data = self._data[self.cursor:end]
490 self.cursor = end
491 return data
492
493 def expect_bytes(self, expected_value, message):
494 value = self.read_bytes(len(expected_value))
495 if value != expected_value:
496 raise ParserError(f'unexpected value: {value} != {expected_value} ({message})')
497
498 def read_uint(self, big_endian=False):
499 data_format = '>I' if big_endian else '<I'
500 return struct.unpack(data_format, self.read_bytes(4))[0]
501
502 def read_double(self, big_endian=False):
503 data_format = '>d' if big_endian else '<d'
504 return struct.unpack(data_format, self.read_bytes(8))[0]
505
506 def read_cstring(self):
507 buffer = []
508 while True:
509 c = self.read_bytes(1)
510 if c == b'\x00':
511 return b''.join(buffer).decode()
512 else:
513 buffer.append(c)
514
515 def skip(self, num_bytes, description='unknown'):
516 if num_bytes > 0:
517 self._logger.debug(f'skipping {num_bytes} bytes ({description}): {self.read_bytes(num_bytes)!r}')
518 elif num_bytes < 0:
519 raise ParserError(f'invalid skip of {num_bytes} bytes')
520
521 def skip_to(self, offset, description='unknown'):
522 self.skip(offset - self.cursor, description)
523
524 def skip_to_end(self, description='unknown'):
525 self.skip_to(len(self._data), description)
526
527
528 def _mac_absolute_time_to_posix(timestamp):
529 return int((datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc) + timedelta(seconds=timestamp)).timestamp())
530
531
532 def _parse_safari_cookies_header(data, logger):
533 p = DataParser(data, logger)
534 p.expect_bytes(b'cook', 'database signature')
535 number_of_pages = p.read_uint(big_endian=True)
536 page_sizes = [p.read_uint(big_endian=True) for _ in range(number_of_pages)]
537 return page_sizes, p.cursor
538
539
540 def _parse_safari_cookies_page(data, jar, logger):
541 p = DataParser(data, logger)
542 p.expect_bytes(b'\x00\x00\x01\x00', 'page signature')
543 number_of_cookies = p.read_uint()
544 record_offsets = [p.read_uint() for _ in range(number_of_cookies)]
545 if number_of_cookies == 0:
546 logger.debug(f'a cookies page of size {len(data)} has no cookies')
547 return
548
549 p.skip_to(record_offsets[0], 'unknown page header field')
550
551 with _create_progress_bar(logger) as progress_bar:
552 for i, record_offset in enumerate(record_offsets):
553 progress_bar.print(f'Loading cookie {i: 6d}/{number_of_cookies: 6d}')
554 p.skip_to(record_offset, 'space between records')
555 record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger)
556 p.read_bytes(record_length)
557 p.skip_to_end('space in between pages')
558
559
560 def _parse_safari_cookies_record(data, jar, logger):
561 p = DataParser(data, logger)
562 record_size = p.read_uint()
563 p.skip(4, 'unknown record field 1')
564 flags = p.read_uint()
565 is_secure = bool(flags & 0x0001)
566 p.skip(4, 'unknown record field 2')
567 domain_offset = p.read_uint()
568 name_offset = p.read_uint()
569 path_offset = p.read_uint()
570 value_offset = p.read_uint()
571 p.skip(8, 'unknown record field 3')
572 expiration_date = _mac_absolute_time_to_posix(p.read_double())
573 _creation_date = _mac_absolute_time_to_posix(p.read_double()) # noqa: F841
574
575 try:
576 p.skip_to(domain_offset)
577 domain = p.read_cstring()
578
579 p.skip_to(name_offset)
580 name = p.read_cstring()
581
582 p.skip_to(path_offset)
583 path = p.read_cstring()
584
585 p.skip_to(value_offset)
586 value = p.read_cstring()
587 except UnicodeDecodeError:
588 logger.warning('failed to parse Safari cookie because UTF-8 decoding failed', only_once=True)
589 return record_size
590
591 p.skip_to(record_size, 'space at the end of the record')
592
593 cookie = http.cookiejar.Cookie(
594 version=0, name=name, value=value, port=None, port_specified=False,
595 domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'),
596 path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False,
597 comment=None, comment_url=None, rest={})
598 jar.set_cookie(cookie)
599 return record_size
600
601
602 def parse_safari_cookies(data, jar=None, logger=YDLLogger()):
603 """
604 References:
605 - https://github.com/libyal/dtformats/blob/main/documentation/Safari%20Cookies.asciidoc
606 - this data appears to be out of date but the important parts of the database structure is the same
607 - there are a few bytes here and there which are skipped during parsing
608 """
609 if jar is None:
610 jar = YoutubeDLCookieJar()
611 page_sizes, body_start = _parse_safari_cookies_header(data, logger)
612 p = DataParser(data[body_start:], logger)
613 for page_size in page_sizes:
614 _parse_safari_cookies_page(p.read_bytes(page_size), jar, logger)
615 p.skip_to_end('footer')
616 return jar
617
618
619 class _LinuxDesktopEnvironment(Enum):
620 """
621 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.h
622 DesktopEnvironment
623 """
624 OTHER = auto()
625 CINNAMON = auto()
626 GNOME = auto()
627 KDE = auto()
628 PANTHEON = auto()
629 UNITY = auto()
630 XFCE = auto()
631
632
633 class _LinuxKeyring(Enum):
634 """
635 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.h
636 SelectedLinuxBackend
637 """
638 KWALLET = auto()
639 GNOMEKEYRING = auto()
640 BASICTEXT = auto()
641
642
643 SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys()
644
645
646 def _get_linux_desktop_environment(env):
647 """
648 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc
649 GetDesktopEnvironment
650 """
651 xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
652 desktop_session = env.get('DESKTOP_SESSION', None)
653 if xdg_current_desktop is not None:
654 xdg_current_desktop = xdg_current_desktop.split(':')[0].strip()
655
656 if xdg_current_desktop == 'Unity':
657 if desktop_session is not None and 'gnome-fallback' in desktop_session:
658 return _LinuxDesktopEnvironment.GNOME
659 else:
660 return _LinuxDesktopEnvironment.UNITY
661 elif xdg_current_desktop == 'GNOME':
662 return _LinuxDesktopEnvironment.GNOME
663 elif xdg_current_desktop == 'X-Cinnamon':
664 return _LinuxDesktopEnvironment.CINNAMON
665 elif xdg_current_desktop == 'KDE':
666 return _LinuxDesktopEnvironment.KDE
667 elif xdg_current_desktop == 'Pantheon':
668 return _LinuxDesktopEnvironment.PANTHEON
669 elif xdg_current_desktop == 'XFCE':
670 return _LinuxDesktopEnvironment.XFCE
671 elif desktop_session is not None:
672 if desktop_session in ('mate', 'gnome'):
673 return _LinuxDesktopEnvironment.GNOME
674 elif 'kde' in desktop_session:
675 return _LinuxDesktopEnvironment.KDE
676 elif 'xfce' in desktop_session:
677 return _LinuxDesktopEnvironment.XFCE
678 else:
679 if 'GNOME_DESKTOP_SESSION_ID' in env:
680 return _LinuxDesktopEnvironment.GNOME
681 elif 'KDE_FULL_SESSION' in env:
682 return _LinuxDesktopEnvironment.KDE
683 return _LinuxDesktopEnvironment.OTHER
684
685
686 def _choose_linux_keyring(logger):
687 """
688 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.cc
689 SelectBackend
690 """
691 desktop_environment = _get_linux_desktop_environment(os.environ)
692 logger.debug(f'detected desktop environment: {desktop_environment.name}')
693 if desktop_environment == _LinuxDesktopEnvironment.KDE:
694 linux_keyring = _LinuxKeyring.KWALLET
695 elif desktop_environment == _LinuxDesktopEnvironment.OTHER:
696 linux_keyring = _LinuxKeyring.BASICTEXT
697 else:
698 linux_keyring = _LinuxKeyring.GNOMEKEYRING
699 return linux_keyring
700
701
702 def _get_kwallet_network_wallet(logger):
703 """ The name of the wallet used to store network passwords.
704
705 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/kwallet_dbus.cc
706 KWalletDBus::NetworkWallet
707 which does a dbus call to the following function:
708 https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html
709 Wallet::NetworkWallet
710 """
711 default_wallet = 'kdewallet'
712 try:
713 stdout, _, returncode = Popen.run([
714 'dbus-send', '--session', '--print-reply=literal',
715 '--dest=org.kde.kwalletd5',
716 '/modules/kwalletd5',
717 'org.kde.KWallet.networkWallet'
718 ], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
719
720 if returncode:
721 logger.warning('failed to read NetworkWallet')
722 return default_wallet
723 else:
724 logger.debug(f'NetworkWallet = "{stdout.strip()}"')
725 return stdout.strip()
726 except Exception as e:
727 logger.warning(f'exception while obtaining NetworkWallet: {e}')
728 return default_wallet
729
730
731 def _get_kwallet_password(browser_keyring_name, logger):
732 logger.debug('using kwallet-query to obtain password from kwallet')
733
734 if shutil.which('kwallet-query') is None:
735 logger.error('kwallet-query command not found. KWallet and kwallet-query '
736 'must be installed to read from KWallet. kwallet-query should be'
737 'included in the kwallet package for your distribution')
738 return b''
739
740 network_wallet = _get_kwallet_network_wallet(logger)
741
742 try:
743 stdout, _, returncode = Popen.run([
744 'kwallet-query',
745 '--read-password', f'{browser_keyring_name} Safe Storage',
746 '--folder', f'{browser_keyring_name} Keys',
747 network_wallet
748 ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
749
750 if returncode:
751 logger.error(f'kwallet-query failed with return code {returncode}. '
752 'Please consult the kwallet-query man page for details')
753 return b''
754 else:
755 if stdout.lower().startswith(b'failed to read'):
756 logger.debug('failed to read password from kwallet. Using empty string instead')
757 # this sometimes occurs in KDE because chrome does not check hasEntry and instead
758 # just tries to read the value (which kwallet returns "") whereas kwallet-query
759 # checks hasEntry. To verify this:
760 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
761 # while starting chrome.
762 # this may be a bug as the intended behaviour is to generate a random password and store
763 # it, but that doesn't matter here.
764 return b''
765 else:
766 logger.debug('password found')
767 return stdout.rstrip(b'\n')
768 except Exception as e:
769 logger.warning(f'exception running kwallet-query: {error_to_str(e)}')
770 return b''
771
772
773 def _get_gnome_keyring_password(browser_keyring_name, logger):
774 if not secretstorage:
775 logger.error(f'secretstorage not available {_SECRETSTORAGE_UNAVAILABLE_REASON}')
776 return b''
777 # the Gnome keyring does not seem to organise keys in the same way as KWallet,
778 # using `dbus-monitor` during startup, it can be observed that chromium lists all keys
779 # and presumably searches for its key in the list. It appears that we must do the same.
780 # https://github.com/jaraco/keyring/issues/556
781 with contextlib.closing(secretstorage.dbus_init()) as con:
782 col = secretstorage.get_default_collection(con)
783 for item in col.get_all_items():
784 if item.get_label() == f'{browser_keyring_name} Safe Storage':
785 return item.get_secret()
786 else:
787 logger.error('failed to read from keyring')
788 return b''
789
790
791 def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
792 # note: chrome/chromium can be run with the following flags to determine which keyring backend
793 # it has chosen to use
794 # chromium --enable-logging=stderr --v=1 2>&1 | grep key_storage_
795 # Chromium supports a flag: --password-store=<basic|gnome|kwallet> so the automatic detection
796 # will not be sufficient in all cases.
797
798 keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger)
799 logger.debug(f'Chosen keyring: {keyring.name}')
800
801 if keyring == _LinuxKeyring.KWALLET:
802 return _get_kwallet_password(browser_keyring_name, logger)
803 elif keyring == _LinuxKeyring.GNOMEKEYRING:
804 return _get_gnome_keyring_password(browser_keyring_name, logger)
805 elif keyring == _LinuxKeyring.BASICTEXT:
806 # when basic text is chosen, all cookies are stored as v10 (so no keyring password is required)
807 return None
808 assert False, f'Unknown keyring {keyring}'
809
810
811 def _get_mac_keyring_password(browser_keyring_name, logger):
812 logger.debug('using find-generic-password to obtain password from OSX keychain')
813 try:
814 stdout, _, _ = Popen.run(
815 ['security', 'find-generic-password',
816 '-w', # write password to stdout
817 '-a', browser_keyring_name, # match 'account'
818 '-s', f'{browser_keyring_name} Safe Storage'], # match 'service'
819 stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
820 return stdout.rstrip(b'\n')
821 except Exception as e:
822 logger.warning(f'exception running find-generic-password: {error_to_str(e)}')
823 return None
824
825
826 def _get_windows_v10_key(browser_root, logger):
827 path = _find_most_recently_used_file(browser_root, 'Local State', logger)
828 if path is None:
829 logger.error('could not find local state file')
830 return None
831 logger.debug(f'Found local state file at "{path}"')
832 with open(path, encoding='utf8') as f:
833 data = json.load(f)
834 try:
835 base64_key = data['os_crypt']['encrypted_key']
836 except KeyError:
837 logger.error('no encrypted key in Local State')
838 return None
839 encrypted_key = base64.b64decode(base64_key)
840 prefix = b'DPAPI'
841 if not encrypted_key.startswith(prefix):
842 logger.error('invalid key')
843 return None
844 return _decrypt_windows_dpapi(encrypted_key[len(prefix):], logger)
845
846
847 def pbkdf2_sha1(password, salt, iterations, key_length):
848 return pbkdf2_hmac('sha1', password, salt, iterations, key_length)
849
850
851 def _decrypt_aes_cbc(ciphertext, key, logger, initialization_vector=b' ' * 16):
852 plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
853 try:
854 return plaintext.decode()
855 except UnicodeDecodeError:
856 logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
857 return None
858
859
860 def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
861 try:
862 plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce)
863 except ValueError:
864 logger.warning('failed to decrypt cookie (AES-GCM) because the MAC check failed. Possibly the key is wrong?', only_once=True)
865 return None
866
867 try:
868 return plaintext.decode()
869 except UnicodeDecodeError:
870 logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
871 return None
872
873
874 def _decrypt_windows_dpapi(ciphertext, logger):
875 """
876 References:
877 - https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata
878 """
879 from ctypes.wintypes import DWORD
880
881 class DATA_BLOB(ctypes.Structure):
882 _fields_ = [('cbData', DWORD),
883 ('pbData', ctypes.POINTER(ctypes.c_char))]
884
885 buffer = ctypes.create_string_buffer(ciphertext)
886 blob_in = DATA_BLOB(ctypes.sizeof(buffer), buffer)
887 blob_out = DATA_BLOB()
888 ret = ctypes.windll.crypt32.CryptUnprotectData(
889 ctypes.byref(blob_in), # pDataIn
890 None, # ppszDataDescr: human readable description of pDataIn
891 None, # pOptionalEntropy: salt?
892 None, # pvReserved: must be NULL
893 None, # pPromptStruct: information about prompts to display
894 0, # dwFlags
895 ctypes.byref(blob_out) # pDataOut
896 )
897 if not ret:
898 logger.warning('failed to decrypt with DPAPI', only_once=True)
899 return None
900
901 result = ctypes.string_at(blob_out.pbData, blob_out.cbData)
902 ctypes.windll.kernel32.LocalFree(blob_out.pbData)
903 return result
904
905
906 def _config_home():
907 return os.environ.get('XDG_CONFIG_HOME', os.path.expanduser('~/.config'))
908
909
910 def _open_database_copy(database_path, tmpdir):
911 # cannot open sqlite databases if they are already in use (e.g. by the browser)
912 database_copy_path = os.path.join(tmpdir, 'temporary.sqlite')
913 shutil.copy(database_path, database_copy_path)
914 conn = sqlite3.connect(database_copy_path)
915 return conn.cursor()
916
917
918 def _get_column_names(cursor, table_name):
919 table_info = cursor.execute(f'PRAGMA table_info({table_name})').fetchall()
920 return [row[1].decode() for row in table_info]
921
922
923 def _find_most_recently_used_file(root, filename, logger):
924 # if there are multiple browser profiles, take the most recently used one
925 i, paths = 0, []
926 with _create_progress_bar(logger) as progress_bar:
927 for curr_root, dirs, files in os.walk(root):
928 for file in files:
929 i += 1
930 progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched')
931 if file == filename:
932 paths.append(os.path.join(curr_root, file))
933 return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime)
934
935
936 def _merge_cookie_jars(jars):
937 output_jar = YoutubeDLCookieJar()
938 for jar in jars:
939 for cookie in jar:
940 output_jar.set_cookie(cookie)
941 if jar.filename is not None:
942 output_jar.filename = jar.filename
943 return output_jar
944
945
946 def _is_path(value):
947 return os.path.sep in value
948
949
950 def _parse_browser_specification(browser_name, profile=None, keyring=None):
951 if browser_name not in SUPPORTED_BROWSERS:
952 raise ValueError(f'unsupported browser: "{browser_name}"')
953 if keyring not in (None, *SUPPORTED_KEYRINGS):
954 raise ValueError(f'unsupported keyring: "{keyring}"')
955 if profile is not None and _is_path(profile):
956 profile = os.path.expanduser(profile)
957 return browser_name, profile, keyring