]> jfr.im git - yt-dlp.git/blame - yt_dlp/cookies.py
[cleanup] Mark some compat variables for removal (#2173)
[yt-dlp.git] / yt_dlp / cookies.py
CommitLineData
f59f5ef8 1import contextlib
982ee69a
MB
2import ctypes
3import json
4import os
5import shutil
982ee69a
MB
6import struct
7import subprocess
8import sys
9import tempfile
10from datetime import datetime, timedelta, timezone
f59f5ef8 11from enum import Enum, auto
982ee69a
MB
12from hashlib import pbkdf2_hmac
13
1d3586d0 14from .aes import (
15 aes_cbc_decrypt_bytes,
16 aes_gcm_decrypt_and_verify_bytes,
17 unpad_pkcs7,
18)
09906f55 19from .compat import (
982ee69a
MB
20 compat_b64decode,
21 compat_cookiejar_Cookie,
22)
97ec5bc5 23from .minicurses import MultilinePrinter, QuietMultilinePrinter
09906f55 24from .utils import (
a44ca5a4 25 error_to_str,
982ee69a 26 expand_path,
d3c93ec2 27 Popen,
982ee69a
MB
28 YoutubeDLCookieJar,
29)
30
767b02a9
MB
31try:
32 import sqlite3
33 SQLITE_AVAILABLE = True
34except ImportError:
35 # although sqlite3 is part of the standard library, it is possible to compile python without
36 # sqlite support. See: https://github.com/yt-dlp/yt-dlp/issues/544
37 SQLITE_AVAILABLE = False
38
39
982ee69a 40try:
f59f5ef8
MB
41 import secretstorage
42 SECRETSTORAGE_AVAILABLE = True
982ee69a 43except ImportError:
f59f5ef8
MB
44 SECRETSTORAGE_AVAILABLE = False
45 SECRETSTORAGE_UNAVAILABLE_REASON = (
46 'as the `secretstorage` module is not installed. '
47 'Please install by running `python3 -m pip install secretstorage`.')
063c409d 48except Exception as _err:
f59f5ef8
MB
49 SECRETSTORAGE_AVAILABLE = False
50 SECRETSTORAGE_UNAVAILABLE_REASON = f'as the `secretstorage` module could not be initialized. {_err}'
982ee69a
MB
51
52
53CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
54SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
55
56
57class YDLLogger:
58 def __init__(self, ydl=None):
59 self._ydl = ydl
60
61 def debug(self, message):
62 if self._ydl:
63 self._ydl.write_debug(message)
64
65 def info(self, message):
66 if self._ydl:
67 self._ydl.to_screen(f'[Cookies] {message}')
68
69 def warning(self, message, only_once=False):
70 if self._ydl:
71 self._ydl.report_warning(message, only_once)
72
73 def error(self, message):
74 if self._ydl:
75 self._ydl.report_error(message)
76
97ec5bc5 77 def progress_bar(self):
78 """Return a context manager with a print method. (Optional)"""
79 # Do not print to files/pipes, loggers, or when --no-progress is used
80 if not self._ydl or self._ydl.params.get('noprogress') or self._ydl.params.get('logger'):
81 return
82 file = self._ydl._out_files['error']
83 try:
84 if not file.isatty():
85 return
86 except BaseException:
87 return
88
89 printer = MultilinePrinter(file, preserve_output=False)
90 printer.print = lambda message: printer.print_at_line(f'[Cookies] {message}', 0)
91 return printer
92
93
94def _create_progress_bar(logger):
95 if hasattr(logger, 'progress_bar'):
96 printer = logger.progress_bar()
97 if printer:
98 return printer
99 printer = QuietMultilinePrinter()
100 printer.print = lambda _: None
101 return printer
102
982ee69a
MB
103
104def load_cookies(cookie_file, browser_specification, ydl):
105 cookie_jars = []
106 if browser_specification is not None:
f59f5ef8
MB
107 browser_name, profile, keyring = _parse_browser_specification(*browser_specification)
108 cookie_jars.append(extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring))
982ee69a
MB
109
110 if cookie_file is not None:
111 cookie_file = expand_path(cookie_file)
112 jar = YoutubeDLCookieJar(cookie_file)
113 if os.access(cookie_file, os.R_OK):
114 jar.load(ignore_discard=True, ignore_expires=True)
115 cookie_jars.append(jar)
116
117 return _merge_cookie_jars(cookie_jars)
118
119
f59f5ef8 120def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None):
982ee69a
MB
121 if browser_name == 'firefox':
122 return _extract_firefox_cookies(profile, logger)
123 elif browser_name == 'safari':
124 return _extract_safari_cookies(profile, logger)
125 elif browser_name in CHROMIUM_BASED_BROWSERS:
f59f5ef8 126 return _extract_chrome_cookies(browser_name, profile, keyring, logger)
982ee69a
MB
127 else:
128 raise ValueError('unknown browser: {}'.format(browser_name))
129
130
131def _extract_firefox_cookies(profile, logger):
132 logger.info('Extracting cookies from firefox')
767b02a9
MB
133 if not SQLITE_AVAILABLE:
134 logger.warning('Cannot extract cookies from firefox without sqlite3 support. '
135 'Please use a python interpreter compiled with sqlite3 support')
136 return YoutubeDLCookieJar()
982ee69a
MB
137
138 if profile is None:
139 search_root = _firefox_browser_dir()
140 elif _is_path(profile):
141 search_root = profile
142 else:
143 search_root = os.path.join(_firefox_browser_dir(), profile)
144
97ec5bc5 145 cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite', logger)
982ee69a
MB
146 if cookie_database_path is None:
147 raise FileNotFoundError('could not find firefox cookies database in {}'.format(search_root))
526d74ec 148 logger.debug('Extracting cookies from: "{}"'.format(cookie_database_path))
982ee69a 149
0930b11f 150 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
982ee69a
MB
151 cursor = None
152 try:
153 cursor = _open_database_copy(cookie_database_path, tmpdir)
154 cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
155 jar = YoutubeDLCookieJar()
97ec5bc5 156 with _create_progress_bar(logger) as progress_bar:
157 table = cursor.fetchall()
158 total_cookie_count = len(table)
159 for i, (host, name, value, path, expiry, is_secure) in enumerate(table):
160 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
161 cookie = compat_cookiejar_Cookie(
162 version=0, name=name, value=value, port=None, port_specified=False,
163 domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'),
164 path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False,
165 comment=None, comment_url=None, rest={})
166 jar.set_cookie(cookie)
982ee69a
MB
167 logger.info('Extracted {} cookies from firefox'.format(len(jar)))
168 return jar
169 finally:
170 if cursor is not None:
171 cursor.connection.close()
172
173
174def _firefox_browser_dir():
175 if sys.platform in ('linux', 'linux2'):
176 return os.path.expanduser('~/.mozilla/firefox')
177 elif sys.platform == 'win32':
178 return os.path.expandvars(r'%APPDATA%\Mozilla\Firefox\Profiles')
179 elif sys.platform == 'darwin':
180 return os.path.expanduser('~/Library/Application Support/Firefox')
181 else:
182 raise ValueError('unsupported platform: {}'.format(sys.platform))
183
184
185def _get_chromium_based_browser_settings(browser_name):
186 # https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md
187 if sys.platform in ('linux', 'linux2'):
188 config = _config_home()
189 browser_dir = {
190 'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'),
191 'chrome': os.path.join(config, 'google-chrome'),
192 'chromium': os.path.join(config, 'chromium'),
193 'edge': os.path.join(config, 'microsoft-edge'),
194 'opera': os.path.join(config, 'opera'),
195 'vivaldi': os.path.join(config, 'vivaldi'),
196 }[browser_name]
197
198 elif sys.platform == 'win32':
199 appdata_local = os.path.expandvars('%LOCALAPPDATA%')
200 appdata_roaming = os.path.expandvars('%APPDATA%')
201 browser_dir = {
202 'brave': os.path.join(appdata_local, r'BraveSoftware\Brave-Browser\User Data'),
203 'chrome': os.path.join(appdata_local, r'Google\Chrome\User Data'),
204 'chromium': os.path.join(appdata_local, r'Chromium\User Data'),
205 'edge': os.path.join(appdata_local, r'Microsoft\Edge\User Data'),
206 'opera': os.path.join(appdata_roaming, r'Opera Software\Opera Stable'),
207 'vivaldi': os.path.join(appdata_local, r'Vivaldi\User Data'),
208 }[browser_name]
209
210 elif sys.platform == 'darwin':
211 appdata = os.path.expanduser('~/Library/Application Support')
212 browser_dir = {
213 'brave': os.path.join(appdata, 'BraveSoftware/Brave-Browser'),
214 'chrome': os.path.join(appdata, 'Google/Chrome'),
215 'chromium': os.path.join(appdata, 'Chromium'),
216 'edge': os.path.join(appdata, 'Microsoft Edge'),
217 'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
218 'vivaldi': os.path.join(appdata, 'Vivaldi'),
219 }[browser_name]
220
221 else:
222 raise ValueError('unsupported platform: {}'.format(sys.platform))
223
224 # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
225 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
226 keyring_name = {
227 'brave': 'Brave',
228 'chrome': 'Chrome',
229 'chromium': 'Chromium',
29b208f6 230 'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
982ee69a
MB
231 'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
232 'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
233 }[browser_name]
234
235 browsers_without_profiles = {'opera'}
236
237 return {
238 'browser_dir': browser_dir,
239 'keyring_name': keyring_name,
240 'supports_profiles': browser_name not in browsers_without_profiles
241 }
242
243
f59f5ef8 244def _extract_chrome_cookies(browser_name, profile, keyring, logger):
982ee69a 245 logger.info('Extracting cookies from {}'.format(browser_name))
767b02a9
MB
246
247 if not SQLITE_AVAILABLE:
248 logger.warning(('Cannot extract cookies from {} without sqlite3 support. '
249 'Please use a python interpreter compiled with sqlite3 support').format(browser_name))
250 return YoutubeDLCookieJar()
251
982ee69a
MB
252 config = _get_chromium_based_browser_settings(browser_name)
253
254 if profile is None:
255 search_root = config['browser_dir']
256 elif _is_path(profile):
257 search_root = profile
258 config['browser_dir'] = os.path.dirname(profile) if config['supports_profiles'] else profile
259 else:
260 if config['supports_profiles']:
261 search_root = os.path.join(config['browser_dir'], profile)
262 else:
263 logger.error('{} does not support profiles'.format(browser_name))
264 search_root = config['browser_dir']
265
97ec5bc5 266 cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies', logger)
982ee69a
MB
267 if cookie_database_path is None:
268 raise FileNotFoundError('could not find {} cookies database in "{}"'.format(browser_name, search_root))
526d74ec 269 logger.debug('Extracting cookies from: "{}"'.format(cookie_database_path))
982ee69a 270
f59f5ef8 271 decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring)
982ee69a 272
0930b11f 273 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
982ee69a
MB
274 cursor = None
275 try:
276 cursor = _open_database_copy(cookie_database_path, tmpdir)
277 cursor.connection.text_factory = bytes
278 column_names = _get_column_names(cursor, 'cookies')
279 secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
280 cursor.execute('SELECT host_key, name, value, encrypted_value, path, '
281 'expires_utc, {} FROM cookies'.format(secure_column))
282 jar = YoutubeDLCookieJar()
283 failed_cookies = 0
f59f5ef8 284 unencrypted_cookies = 0
97ec5bc5 285 with _create_progress_bar(logger) as progress_bar:
286 table = cursor.fetchall()
287 total_cookie_count = len(table)
288 for i, line in enumerate(table):
289 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
290 is_encrypted, cookie = _process_chrome_cookie(decryptor, *line)
291 if not cookie:
982ee69a
MB
292 failed_cookies += 1
293 continue
97ec5bc5 294 elif not is_encrypted:
295 unencrypted_cookies += 1
296 jar.set_cookie(cookie)
982ee69a
MB
297 if failed_cookies > 0:
298 failed_message = ' ({} could not be decrypted)'.format(failed_cookies)
299 else:
300 failed_message = ''
301 logger.info('Extracted {} cookies from {}{}'.format(len(jar), browser_name, failed_message))
f59f5ef8
MB
302 counts = decryptor.cookie_counts.copy()
303 counts['unencrypted'] = unencrypted_cookies
304 logger.debug('cookie version breakdown: {}'.format(counts))
982ee69a
MB
305 return jar
306 finally:
307 if cursor is not None:
308 cursor.connection.close()
309
310
97ec5bc5 311def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, path, expires_utc, is_secure):
312 host_key = host_key.decode('utf-8')
313 name = name.decode('utf-8')
314 value = value.decode('utf-8')
315 path = path.decode('utf-8')
316 is_encrypted = not value and encrypted_value
317
318 if is_encrypted:
319 value = decryptor.decrypt(encrypted_value)
320 if value is None:
321 return is_encrypted, None
322
323 return is_encrypted, compat_cookiejar_Cookie(
324 version=0, name=name, value=value, port=None, port_specified=False,
325 domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
326 path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False,
327 comment=None, comment_url=None, rest={})
328
329
982ee69a
MB
330class ChromeCookieDecryptor:
331 """
332 Overview:
333
334 Linux:
335 - cookies are either v10 or v11
336 - v10: AES-CBC encrypted with a fixed key
337 - v11: AES-CBC encrypted with an OS protected key (keyring)
338 - v11 keys can be stored in various places depending on the activate desktop environment [2]
339
340 Mac:
341 - cookies are either v10 or not v10
342 - v10: AES-CBC encrypted with an OS protected key (keyring) and more key derivation iterations than linux
343 - not v10: 'old data' stored as plaintext
344
345 Windows:
346 - cookies are either v10 or not v10
347 - v10: AES-GCM encrypted with a key which is encrypted with DPAPI
348 - not v10: encrypted with DPAPI
349
350 Sources:
351 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/
352 - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_linux.cc
353 - KeyStorageLinux::CreateService
354 """
355
356 def decrypt(self, encrypted_value):
357 raise NotImplementedError
358
f59f5ef8
MB
359 @property
360 def cookie_counts(self):
361 raise NotImplementedError
362
982ee69a 363
f59f5ef8 364def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None):
982ee69a 365 if sys.platform in ('linux', 'linux2'):
f59f5ef8 366 return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring)
982ee69a
MB
367 elif sys.platform == 'darwin':
368 return MacChromeCookieDecryptor(browser_keyring_name, logger)
369 elif sys.platform == 'win32':
370 return WindowsChromeCookieDecryptor(browser_root, logger)
371 else:
372 raise NotImplementedError('Chrome cookie decryption is not supported '
373 'on this platform: {}'.format(sys.platform))
374
375
376class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
f59f5ef8 377 def __init__(self, browser_keyring_name, logger, *, keyring=None):
982ee69a
MB
378 self._logger = logger
379 self._v10_key = self.derive_key(b'peanuts')
f59f5ef8
MB
380 password = _get_linux_keyring_password(browser_keyring_name, keyring, logger)
381 self._v11_key = None if password is None else self.derive_key(password)
382 self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
982ee69a
MB
383
384 @staticmethod
385 def derive_key(password):
386 # values from
387 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_linux.cc
388 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16)
389
f59f5ef8
MB
390 @property
391 def cookie_counts(self):
392 return self._cookie_counts
393
982ee69a
MB
394 def decrypt(self, encrypted_value):
395 version = encrypted_value[:3]
396 ciphertext = encrypted_value[3:]
397
398 if version == b'v10':
f59f5ef8 399 self._cookie_counts['v10'] += 1
982ee69a
MB
400 return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
401
402 elif version == b'v11':
f59f5ef8 403 self._cookie_counts['v11'] += 1
982ee69a 404 if self._v11_key is None:
f59f5ef8 405 self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
982ee69a
MB
406 return None
407 return _decrypt_aes_cbc(ciphertext, self._v11_key, self._logger)
408
409 else:
f59f5ef8 410 self._cookie_counts['other'] += 1
982ee69a
MB
411 return None
412
413
414class MacChromeCookieDecryptor(ChromeCookieDecryptor):
415 def __init__(self, browser_keyring_name, logger):
416 self._logger = logger
f440b14f 417 password = _get_mac_keyring_password(browser_keyring_name, logger)
982ee69a 418 self._v10_key = None if password is None else self.derive_key(password)
f59f5ef8 419 self._cookie_counts = {'v10': 0, 'other': 0}
982ee69a
MB
420
421 @staticmethod
422 def derive_key(password):
423 # values from
424 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
425 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16)
426
f59f5ef8
MB
427 @property
428 def cookie_counts(self):
429 return self._cookie_counts
430
982ee69a
MB
431 def decrypt(self, encrypted_value):
432 version = encrypted_value[:3]
433 ciphertext = encrypted_value[3:]
434
435 if version == b'v10':
f59f5ef8 436 self._cookie_counts['v10'] += 1
982ee69a
MB
437 if self._v10_key is None:
438 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
439 return None
440
441 return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
442
443 else:
f59f5ef8 444 self._cookie_counts['other'] += 1
982ee69a
MB
445 # other prefixes are considered 'old data' which were stored as plaintext
446 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
447 return encrypted_value
448
449
450class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
451 def __init__(self, browser_root, logger):
452 self._logger = logger
453 self._v10_key = _get_windows_v10_key(browser_root, logger)
f59f5ef8
MB
454 self._cookie_counts = {'v10': 0, 'other': 0}
455
456 @property
457 def cookie_counts(self):
458 return self._cookie_counts
982ee69a
MB
459
460 def decrypt(self, encrypted_value):
461 version = encrypted_value[:3]
462 ciphertext = encrypted_value[3:]
463
464 if version == b'v10':
f59f5ef8 465 self._cookie_counts['v10'] += 1
982ee69a
MB
466 if self._v10_key is None:
467 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
468 return None
982ee69a
MB
469
470 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
471 # kNonceLength
472 nonce_length = 96 // 8
473 # boringssl
474 # EVP_AEAD_AES_GCM_TAG_LEN
475 authentication_tag_length = 16
476
477 raw_ciphertext = ciphertext
478 nonce = raw_ciphertext[:nonce_length]
479 ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length]
480 authentication_tag = raw_ciphertext[-authentication_tag_length:]
481
482 return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger)
483
484 else:
f59f5ef8 485 self._cookie_counts['other'] += 1
982ee69a
MB
486 # any other prefix means the data is DPAPI encrypted
487 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
488 return _decrypt_windows_dpapi(encrypted_value, self._logger).decode('utf-8')
489
490
491def _extract_safari_cookies(profile, logger):
492 if profile is not None:
493 logger.error('safari does not support profiles')
494 if sys.platform != 'darwin':
495 raise ValueError('unsupported platform: {}'.format(sys.platform))
496
497 cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')
498
499 if not os.path.isfile(cookies_path):
1f7db853
MP
500 logger.debug('Trying secondary cookie location')
501 cookies_path = os.path.expanduser('~/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies')
502 if not os.path.isfile(cookies_path):
503 raise FileNotFoundError('could not find safari cookies database')
982ee69a
MB
504
505 with open(cookies_path, 'rb') as f:
506 cookies_data = f.read()
507
508 jar = parse_safari_cookies(cookies_data, logger=logger)
509 logger.info('Extracted {} cookies from safari'.format(len(jar)))
510 return jar
511
512
513class ParserError(Exception):
514 pass
515
516
517class DataParser:
518 def __init__(self, data, logger):
519 self._data = data
520 self.cursor = 0
521 self._logger = logger
522
523 def read_bytes(self, num_bytes):
524 if num_bytes < 0:
525 raise ParserError('invalid read of {} bytes'.format(num_bytes))
526 end = self.cursor + num_bytes
527 if end > len(self._data):
528 raise ParserError('reached end of input')
529 data = self._data[self.cursor:end]
530 self.cursor = end
531 return data
532
533 def expect_bytes(self, expected_value, message):
534 value = self.read_bytes(len(expected_value))
535 if value != expected_value:
536 raise ParserError('unexpected value: {} != {} ({})'.format(value, expected_value, message))
537
538 def read_uint(self, big_endian=False):
539 data_format = '>I' if big_endian else '<I'
540 return struct.unpack(data_format, self.read_bytes(4))[0]
541
542 def read_double(self, big_endian=False):
543 data_format = '>d' if big_endian else '<d'
544 return struct.unpack(data_format, self.read_bytes(8))[0]
545
546 def read_cstring(self):
547 buffer = []
548 while True:
549 c = self.read_bytes(1)
550 if c == b'\x00':
551 return b''.join(buffer).decode('utf-8')
552 else:
553 buffer.append(c)
554
555 def skip(self, num_bytes, description='unknown'):
556 if num_bytes > 0:
557 self._logger.debug('skipping {} bytes ({}): {}'.format(
558 num_bytes, description, self.read_bytes(num_bytes)))
559 elif num_bytes < 0:
560 raise ParserError('invalid skip of {} bytes'.format(num_bytes))
561
562 def skip_to(self, offset, description='unknown'):
563 self.skip(offset - self.cursor, description)
564
565 def skip_to_end(self, description='unknown'):
566 self.skip_to(len(self._data), description)
567
568
569def _mac_absolute_time_to_posix(timestamp):
570 return int((datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc) + timedelta(seconds=timestamp)).timestamp())
571
572
573def _parse_safari_cookies_header(data, logger):
574 p = DataParser(data, logger)
575 p.expect_bytes(b'cook', 'database signature')
576 number_of_pages = p.read_uint(big_endian=True)
577 page_sizes = [p.read_uint(big_endian=True) for _ in range(number_of_pages)]
578 return page_sizes, p.cursor
579
580
581def _parse_safari_cookies_page(data, jar, logger):
582 p = DataParser(data, logger)
583 p.expect_bytes(b'\x00\x00\x01\x00', 'page signature')
584 number_of_cookies = p.read_uint()
585 record_offsets = [p.read_uint() for _ in range(number_of_cookies)]
586 if number_of_cookies == 0:
587 logger.debug('a cookies page of size {} has no cookies'.format(len(data)))
588 return
589
590 p.skip_to(record_offsets[0], 'unknown page header field')
591
97ec5bc5 592 with _create_progress_bar(logger) as progress_bar:
593 for i, record_offset in enumerate(record_offsets):
594 progress_bar.print(f'Loading cookie {i: 6d}/{number_of_cookies: 6d}')
595 p.skip_to(record_offset, 'space between records')
596 record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger)
597 p.read_bytes(record_length)
982ee69a
MB
598 p.skip_to_end('space in between pages')
599
600
601def _parse_safari_cookies_record(data, jar, logger):
602 p = DataParser(data, logger)
603 record_size = p.read_uint()
604 p.skip(4, 'unknown record field 1')
605 flags = p.read_uint()
606 is_secure = bool(flags & 0x0001)
607 p.skip(4, 'unknown record field 2')
608 domain_offset = p.read_uint()
609 name_offset = p.read_uint()
610 path_offset = p.read_uint()
611 value_offset = p.read_uint()
612 p.skip(8, 'unknown record field 3')
613 expiration_date = _mac_absolute_time_to_posix(p.read_double())
614 _creation_date = _mac_absolute_time_to_posix(p.read_double()) # noqa: F841
615
616 try:
617 p.skip_to(domain_offset)
618 domain = p.read_cstring()
619
620 p.skip_to(name_offset)
621 name = p.read_cstring()
622
623 p.skip_to(path_offset)
624 path = p.read_cstring()
625
626 p.skip_to(value_offset)
627 value = p.read_cstring()
628 except UnicodeDecodeError:
f440b14f 629 logger.warning('failed to parse Safari cookie because UTF-8 decoding failed', only_once=True)
982ee69a
MB
630 return record_size
631
632 p.skip_to(record_size, 'space at the end of the record')
633
634 cookie = compat_cookiejar_Cookie(
635 version=0, name=name, value=value, port=None, port_specified=False,
636 domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'),
637 path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False,
638 comment=None, comment_url=None, rest={})
639 jar.set_cookie(cookie)
640 return record_size
641
642
643def parse_safari_cookies(data, jar=None, logger=YDLLogger()):
644 """
645 References:
646 - https://github.com/libyal/dtformats/blob/main/documentation/Safari%20Cookies.asciidoc
647 - this data appears to be out of date but the important parts of the database structure is the same
648 - there are a few bytes here and there which are skipped during parsing
649 """
650 if jar is None:
651 jar = YoutubeDLCookieJar()
652 page_sizes, body_start = _parse_safari_cookies_header(data, logger)
653 p = DataParser(data[body_start:], logger)
654 for page_size in page_sizes:
655 _parse_safari_cookies_page(p.read_bytes(page_size), jar, logger)
656 p.skip_to_end('footer')
657 return jar
658
659
f59f5ef8
MB
660class _LinuxDesktopEnvironment(Enum):
661 """
662 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.h
663 DesktopEnvironment
664 """
665 OTHER = auto()
666 CINNAMON = auto()
667 GNOME = auto()
668 KDE = auto()
669 PANTHEON = auto()
670 UNITY = auto()
671 XFCE = auto()
982ee69a
MB
672
673
f59f5ef8
MB
674class _LinuxKeyring(Enum):
675 """
676 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.h
677 SelectedLinuxBackend
678 """
679 KWALLET = auto()
680 GNOMEKEYRING = auto()
681 BASICTEXT = auto()
682
683
684SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys()
685
686
687def _get_linux_desktop_environment(env):
688 """
689 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc
690 GetDesktopEnvironment
691 """
692 xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
693 desktop_session = env.get('DESKTOP_SESSION', None)
694 if xdg_current_desktop is not None:
695 xdg_current_desktop = xdg_current_desktop.split(':')[0].strip()
696
697 if xdg_current_desktop == 'Unity':
698 if desktop_session is not None and 'gnome-fallback' in desktop_session:
699 return _LinuxDesktopEnvironment.GNOME
700 else:
701 return _LinuxDesktopEnvironment.UNITY
702 elif xdg_current_desktop == 'GNOME':
703 return _LinuxDesktopEnvironment.GNOME
704 elif xdg_current_desktop == 'X-Cinnamon':
705 return _LinuxDesktopEnvironment.CINNAMON
706 elif xdg_current_desktop == 'KDE':
707 return _LinuxDesktopEnvironment.KDE
708 elif xdg_current_desktop == 'Pantheon':
709 return _LinuxDesktopEnvironment.PANTHEON
710 elif xdg_current_desktop == 'XFCE':
711 return _LinuxDesktopEnvironment.XFCE
712 elif desktop_session is not None:
713 if desktop_session in ('mate', 'gnome'):
714 return _LinuxDesktopEnvironment.GNOME
715 elif 'kde' in desktop_session:
716 return _LinuxDesktopEnvironment.KDE
717 elif 'xfce' in desktop_session:
718 return _LinuxDesktopEnvironment.XFCE
719 else:
720 if 'GNOME_DESKTOP_SESSION_ID' in env:
721 return _LinuxDesktopEnvironment.GNOME
722 elif 'KDE_FULL_SESSION' in env:
723 return _LinuxDesktopEnvironment.KDE
fa8fd951 724 return _LinuxDesktopEnvironment.OTHER
f59f5ef8
MB
725
726
727def _choose_linux_keyring(logger):
728 """
729 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.cc
730 SelectBackend
731 """
732 desktop_environment = _get_linux_desktop_environment(os.environ)
733 logger.debug('detected desktop environment: {}'.format(desktop_environment.name))
734 if desktop_environment == _LinuxDesktopEnvironment.KDE:
735 linux_keyring = _LinuxKeyring.KWALLET
736 elif desktop_environment == _LinuxDesktopEnvironment.OTHER:
737 linux_keyring = _LinuxKeyring.BASICTEXT
982ee69a 738 else:
f59f5ef8
MB
739 linux_keyring = _LinuxKeyring.GNOMEKEYRING
740 return linux_keyring
741
742
743def _get_kwallet_network_wallet(logger):
744 """ The name of the wallet used to store network passwords.
745
746 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/kwallet_dbus.cc
747 KWalletDBus::NetworkWallet
748 which does a dbus call to the following function:
749 https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html
750 Wallet::NetworkWallet
751 """
752 default_wallet = 'kdewallet'
753 try:
754 proc = Popen([
755 'dbus-send', '--session', '--print-reply=literal',
756 '--dest=org.kde.kwalletd5',
757 '/modules/kwalletd5',
758 'org.kde.KWallet.networkWallet'
759 ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
760
761 stdout, stderr = proc.communicate_or_kill()
762 if proc.returncode != 0:
763 logger.warning('failed to read NetworkWallet')
764 return default_wallet
765 else:
766 network_wallet = stdout.decode('utf-8').strip()
767 logger.debug('NetworkWallet = "{}"'.format(network_wallet))
768 return network_wallet
a44ca5a4 769 except Exception as e:
f59f5ef8
MB
770 logger.warning('exception while obtaining NetworkWallet: {}'.format(e))
771 return default_wallet
772
773
774def _get_kwallet_password(browser_keyring_name, logger):
775 logger.debug('using kwallet-query to obtain password from kwallet')
776
777 if shutil.which('kwallet-query') is None:
778 logger.error('kwallet-query command not found. KWallet and kwallet-query '
779 'must be installed to read from KWallet. kwallet-query should be'
780 'included in the kwallet package for your distribution')
781 return b''
782
783 network_wallet = _get_kwallet_network_wallet(logger)
784
785 try:
786 proc = Popen([
787 'kwallet-query',
788 '--read-password', '{} Safe Storage'.format(browser_keyring_name),
789 '--folder', '{} Keys'.format(browser_keyring_name),
790 network_wallet
791 ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
792
793 stdout, stderr = proc.communicate_or_kill()
794 if proc.returncode != 0:
795 logger.error('kwallet-query failed with return code {}. Please consult '
796 'the kwallet-query man page for details'.format(proc.returncode))
797 return b''
798 else:
799 if stdout.lower().startswith(b'failed to read'):
800 logger.debug('failed to read password from kwallet. Using empty string instead')
801 # this sometimes occurs in KDE because chrome does not check hasEntry and instead
802 # just tries to read the value (which kwallet returns "") whereas kwallet-query
803 # checks hasEntry. To verify this:
804 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
805 # while starting chrome.
806 # this may be a bug as the intended behaviour is to generate a random password and store
807 # it, but that doesn't matter here.
808 return b''
809 else:
810 logger.debug('password found')
811 if stdout[-1:] == b'\n':
812 stdout = stdout[:-1]
813 return stdout
a44ca5a4 814 except Exception as e:
815 logger.warning(f'exception running kwallet-query: {error_to_str(e)}')
f59f5ef8
MB
816 return b''
817
818
819def _get_gnome_keyring_password(browser_keyring_name, logger):
820 if not SECRETSTORAGE_AVAILABLE:
821 logger.error('secretstorage not available {}'.format(SECRETSTORAGE_UNAVAILABLE_REASON))
822 return b''
823 # the Gnome keyring does not seem to organise keys in the same way as KWallet,
824 # using `dbus-monitor` during startup, it can be observed that chromium lists all keys
825 # and presumably searches for its key in the list. It appears that we must do the same.
826 # https://github.com/jaraco/keyring/issues/556
827 with contextlib.closing(secretstorage.dbus_init()) as con:
828 col = secretstorage.get_default_collection(con)
829 for item in col.get_all_items():
830 if item.get_label() == '{} Safe Storage'.format(browser_keyring_name):
831 return item.get_secret()
832 else:
833 logger.error('failed to read from keyring')
834 return b''
835
836
837def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
838 # note: chrome/chromium can be run with the following flags to determine which keyring backend
839 # it has chosen to use
840 # chromium --enable-logging=stderr --v=1 2>&1 | grep key_storage_
841 # Chromium supports a flag: --password-store=<basic|gnome|kwallet> so the automatic detection
842 # will not be sufficient in all cases.
843
2c539d49 844 keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger)
f59f5ef8
MB
845 logger.debug(f'Chosen keyring: {keyring.name}')
846
847 if keyring == _LinuxKeyring.KWALLET:
848 return _get_kwallet_password(browser_keyring_name, logger)
849 elif keyring == _LinuxKeyring.GNOMEKEYRING:
850 return _get_gnome_keyring_password(browser_keyring_name, logger)
851 elif keyring == _LinuxKeyring.BASICTEXT:
852 # when basic text is chosen, all cookies are stored as v10 (so no keyring password is required)
853 return None
854 assert False, f'Unknown keyring {keyring}'
855
856
857def _get_mac_keyring_password(browser_keyring_name, logger):
858 logger.debug('using find-generic-password to obtain password from OSX keychain')
859 try:
d3c93ec2 860 proc = Popen(
861 ['security', 'find-generic-password',
862 '-w', # write password to stdout
863 '-a', browser_keyring_name, # match 'account'
864 '-s', '{} Safe Storage'.format(browser_keyring_name)], # match 'service'
865 stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
f59f5ef8
MB
866
867 stdout, stderr = proc.communicate_or_kill()
868 if stdout[-1:] == b'\n':
869 stdout = stdout[:-1]
870 return stdout
a44ca5a4 871 except Exception as e:
872 logger.warning(f'exception running find-generic-password: {error_to_str(e)}')
f59f5ef8 873 return None
982ee69a
MB
874
875
876def _get_windows_v10_key(browser_root, logger):
97ec5bc5 877 path = _find_most_recently_used_file(browser_root, 'Local State', logger)
982ee69a
MB
878 if path is None:
879 logger.error('could not find local state file')
880 return None
97ec5bc5 881 logger.debug(f'Found local state file at "{path}"')
ad0090d0 882 with open(path, 'r', encoding='utf8') as f:
982ee69a
MB
883 data = json.load(f)
884 try:
885 base64_key = data['os_crypt']['encrypted_key']
886 except KeyError:
887 logger.error('no encrypted key in Local State')
888 return None
889 encrypted_key = compat_b64decode(base64_key)
890 prefix = b'DPAPI'
891 if not encrypted_key.startswith(prefix):
892 logger.error('invalid key')
893 return None
894 return _decrypt_windows_dpapi(encrypted_key[len(prefix):], logger)
895
896
897def pbkdf2_sha1(password, salt, iterations, key_length):
898 return pbkdf2_hmac('sha1', password, salt, iterations, key_length)
899
900
901def _decrypt_aes_cbc(ciphertext, key, logger, initialization_vector=b' ' * 16):
1d3586d0 902 plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
982ee69a 903 try:
1d3586d0 904 return plaintext.decode('utf-8')
982ee69a 905 except UnicodeDecodeError:
f440b14f 906 logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
907 return None
908
909
910def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
982ee69a 911 try:
09906f55 912 plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce)
982ee69a 913 except ValueError:
f440b14f 914 logger.warning('failed to decrypt cookie (AES-GCM) because the MAC check failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
915 return None
916
917 try:
918 return plaintext.decode('utf-8')
919 except UnicodeDecodeError:
f440b14f 920 logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
921 return None
922
923
924def _decrypt_windows_dpapi(ciphertext, logger):
925 """
926 References:
927 - https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata
928 """
929 from ctypes.wintypes import DWORD
930
931 class DATA_BLOB(ctypes.Structure):
932 _fields_ = [('cbData', DWORD),
933 ('pbData', ctypes.POINTER(ctypes.c_char))]
934
935 buffer = ctypes.create_string_buffer(ciphertext)
936 blob_in = DATA_BLOB(ctypes.sizeof(buffer), buffer)
937 blob_out = DATA_BLOB()
938 ret = ctypes.windll.crypt32.CryptUnprotectData(
939 ctypes.byref(blob_in), # pDataIn
940 None, # ppszDataDescr: human readable description of pDataIn
941 None, # pOptionalEntropy: salt?
942 None, # pvReserved: must be NULL
943 None, # pPromptStruct: information about prompts to display
944 0, # dwFlags
945 ctypes.byref(blob_out) # pDataOut
946 )
947 if not ret:
f9be9cb9 948 logger.warning('failed to decrypt with DPAPI', only_once=True)
982ee69a
MB
949 return None
950
951 result = ctypes.string_at(blob_out.pbData, blob_out.cbData)
952 ctypes.windll.kernel32.LocalFree(blob_out.pbData)
953 return result
954
955
956def _config_home():
957 return os.environ.get('XDG_CONFIG_HOME', os.path.expanduser('~/.config'))
958
959
960def _open_database_copy(database_path, tmpdir):
961 # cannot open sqlite databases if they are already in use (e.g. by the browser)
962 database_copy_path = os.path.join(tmpdir, 'temporary.sqlite')
963 shutil.copy(database_path, database_copy_path)
964 conn = sqlite3.connect(database_copy_path)
965 return conn.cursor()
966
967
968def _get_column_names(cursor, table_name):
969 table_info = cursor.execute('PRAGMA table_info({})'.format(table_name)).fetchall()
970 return [row[1].decode('utf-8') for row in table_info]
971
972
97ec5bc5 973def _find_most_recently_used_file(root, filename, logger):
982ee69a 974 # if there are multiple browser profiles, take the most recently used one
97ec5bc5 975 i, paths = 0, []
976 with _create_progress_bar(logger) as progress_bar:
977 for curr_root, dirs, files in os.walk(root):
978 for file in files:
979 i += 1
980 progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched')
981 if file == filename:
982 paths.append(os.path.join(curr_root, file))
982ee69a
MB
983 return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime)
984
985
986def _merge_cookie_jars(jars):
987 output_jar = YoutubeDLCookieJar()
988 for jar in jars:
989 for cookie in jar:
990 output_jar.set_cookie(cookie)
991 if jar.filename is not None:
992 output_jar.filename = jar.filename
993 return output_jar
994
995
996def _is_path(value):
997 return os.path.sep in value
998
999
f59f5ef8 1000def _parse_browser_specification(browser_name, profile=None, keyring=None):
982ee69a
MB
1001 if browser_name not in SUPPORTED_BROWSERS:
1002 raise ValueError(f'unsupported browser: "{browser_name}"')
f59f5ef8
MB
1003 if keyring not in (None, *SUPPORTED_KEYRINGS):
1004 raise ValueError(f'unsupported keyring: "{keyring}"')
982ee69a
MB
1005 if profile is not None and _is_path(profile):
1006 profile = os.path.expanduser(profile)
f59f5ef8 1007 return browser_name, profile, keyring