]> jfr.im git - yt-dlp.git/blame - yt_dlp/cookies.py
[extractor] Document netrc machines
[yt-dlp.git] / yt_dlp / cookies.py
CommitLineData
f59f5ef8 1import contextlib
982ee69a
MB
2import ctypes
3import json
4import os
5import shutil
982ee69a
MB
6import struct
7import subprocess
8import sys
9import tempfile
10from datetime import datetime, timedelta, timezone
f59f5ef8 11from enum import Enum, auto
982ee69a
MB
12from hashlib import pbkdf2_hmac
13
1d3586d0 14from .aes import (
15 aes_cbc_decrypt_bytes,
16 aes_gcm_decrypt_and_verify_bytes,
17 unpad_pkcs7,
18)
f8271158 19from .compat import compat_b64decode, compat_cookiejar_Cookie
9b8ee23b 20from .dependencies import (
21 _SECRETSTORAGE_UNAVAILABLE_REASON,
22 secretstorage,
23 sqlite3,
24)
97ec5bc5 25from .minicurses import MultilinePrinter, QuietMultilinePrinter
f8271158 26from .utils import Popen, YoutubeDLCookieJar, error_to_str, expand_path
982ee69a 27
982ee69a
MB
28CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
29SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
30
31
32class YDLLogger:
33 def __init__(self, ydl=None):
34 self._ydl = ydl
35
36 def debug(self, message):
37 if self._ydl:
38 self._ydl.write_debug(message)
39
40 def info(self, message):
41 if self._ydl:
42 self._ydl.to_screen(f'[Cookies] {message}')
43
44 def warning(self, message, only_once=False):
45 if self._ydl:
46 self._ydl.report_warning(message, only_once)
47
48 def error(self, message):
49 if self._ydl:
50 self._ydl.report_error(message)
51
97ec5bc5 52 def progress_bar(self):
53 """Return a context manager with a print method. (Optional)"""
54 # Do not print to files/pipes, loggers, or when --no-progress is used
55 if not self._ydl or self._ydl.params.get('noprogress') or self._ydl.params.get('logger'):
56 return
57 file = self._ydl._out_files['error']
58 try:
59 if not file.isatty():
60 return
61 except BaseException:
62 return
63
64 printer = MultilinePrinter(file, preserve_output=False)
65 printer.print = lambda message: printer.print_at_line(f'[Cookies] {message}', 0)
66 return printer
67
68
69def _create_progress_bar(logger):
70 if hasattr(logger, 'progress_bar'):
71 printer = logger.progress_bar()
72 if printer:
73 return printer
74 printer = QuietMultilinePrinter()
75 printer.print = lambda _: None
76 return printer
77
982ee69a
MB
78
79def load_cookies(cookie_file, browser_specification, ydl):
80 cookie_jars = []
81 if browser_specification is not None:
f59f5ef8
MB
82 browser_name, profile, keyring = _parse_browser_specification(*browser_specification)
83 cookie_jars.append(extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring))
982ee69a
MB
84
85 if cookie_file is not None:
d76fa1f3 86 is_filename = YoutubeDLCookieJar.is_path(cookie_file)
87 if is_filename:
88 cookie_file = expand_path(cookie_file)
89
982ee69a 90 jar = YoutubeDLCookieJar(cookie_file)
d76fa1f3 91 if not is_filename or os.access(cookie_file, os.R_OK):
982ee69a
MB
92 jar.load(ignore_discard=True, ignore_expires=True)
93 cookie_jars.append(jar)
94
95 return _merge_cookie_jars(cookie_jars)
96
97
f59f5ef8 98def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None):
982ee69a
MB
99 if browser_name == 'firefox':
100 return _extract_firefox_cookies(profile, logger)
101 elif browser_name == 'safari':
102 return _extract_safari_cookies(profile, logger)
103 elif browser_name in CHROMIUM_BASED_BROWSERS:
f59f5ef8 104 return _extract_chrome_cookies(browser_name, profile, keyring, logger)
982ee69a 105 else:
86e5f3ed 106 raise ValueError(f'unknown browser: {browser_name}')
982ee69a
MB
107
108
109def _extract_firefox_cookies(profile, logger):
110 logger.info('Extracting cookies from firefox')
9b8ee23b 111 if not sqlite3:
767b02a9
MB
112 logger.warning('Cannot extract cookies from firefox without sqlite3 support. '
113 'Please use a python interpreter compiled with sqlite3 support')
114 return YoutubeDLCookieJar()
982ee69a
MB
115
116 if profile is None:
117 search_root = _firefox_browser_dir()
118 elif _is_path(profile):
119 search_root = profile
120 else:
121 search_root = os.path.join(_firefox_browser_dir(), profile)
122
97ec5bc5 123 cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite', logger)
982ee69a 124 if cookie_database_path is None:
86e5f3ed 125 raise FileNotFoundError(f'could not find firefox cookies database in {search_root}')
126 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
982ee69a 127
0930b11f 128 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
982ee69a
MB
129 cursor = None
130 try:
131 cursor = _open_database_copy(cookie_database_path, tmpdir)
132 cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
133 jar = YoutubeDLCookieJar()
97ec5bc5 134 with _create_progress_bar(logger) as progress_bar:
135 table = cursor.fetchall()
136 total_cookie_count = len(table)
137 for i, (host, name, value, path, expiry, is_secure) in enumerate(table):
138 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
139 cookie = compat_cookiejar_Cookie(
140 version=0, name=name, value=value, port=None, port_specified=False,
141 domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'),
142 path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False,
143 comment=None, comment_url=None, rest={})
144 jar.set_cookie(cookie)
86e5f3ed 145 logger.info(f'Extracted {len(jar)} cookies from firefox')
982ee69a
MB
146 return jar
147 finally:
148 if cursor is not None:
149 cursor.connection.close()
150
151
152def _firefox_browser_dir():
153 if sys.platform in ('linux', 'linux2'):
154 return os.path.expanduser('~/.mozilla/firefox')
155 elif sys.platform == 'win32':
19a03940 156 return os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles')
982ee69a
MB
157 elif sys.platform == 'darwin':
158 return os.path.expanduser('~/Library/Application Support/Firefox')
159 else:
86e5f3ed 160 raise ValueError(f'unsupported platform: {sys.platform}')
982ee69a
MB
161
162
163def _get_chromium_based_browser_settings(browser_name):
164 # https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md
165 if sys.platform in ('linux', 'linux2'):
166 config = _config_home()
167 browser_dir = {
168 'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'),
169 'chrome': os.path.join(config, 'google-chrome'),
170 'chromium': os.path.join(config, 'chromium'),
171 'edge': os.path.join(config, 'microsoft-edge'),
172 'opera': os.path.join(config, 'opera'),
173 'vivaldi': os.path.join(config, 'vivaldi'),
174 }[browser_name]
175
176 elif sys.platform == 'win32':
177 appdata_local = os.path.expandvars('%LOCALAPPDATA%')
178 appdata_roaming = os.path.expandvars('%APPDATA%')
179 browser_dir = {
19a03940 180 'brave': os.path.join(appdata_local, R'BraveSoftware\Brave-Browser\User Data'),
181 'chrome': os.path.join(appdata_local, R'Google\Chrome\User Data'),
182 'chromium': os.path.join(appdata_local, R'Chromium\User Data'),
183 'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
184 'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
185 'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
982ee69a
MB
186 }[browser_name]
187
188 elif sys.platform == 'darwin':
189 appdata = os.path.expanduser('~/Library/Application Support')
190 browser_dir = {
191 'brave': os.path.join(appdata, 'BraveSoftware/Brave-Browser'),
192 'chrome': os.path.join(appdata, 'Google/Chrome'),
193 'chromium': os.path.join(appdata, 'Chromium'),
194 'edge': os.path.join(appdata, 'Microsoft Edge'),
195 'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
196 'vivaldi': os.path.join(appdata, 'Vivaldi'),
197 }[browser_name]
198
199 else:
86e5f3ed 200 raise ValueError(f'unsupported platform: {sys.platform}')
982ee69a
MB
201
202 # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
203 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
204 keyring_name = {
205 'brave': 'Brave',
206 'chrome': 'Chrome',
207 'chromium': 'Chromium',
29b208f6 208 'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
982ee69a
MB
209 'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
210 'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
211 }[browser_name]
212
213 browsers_without_profiles = {'opera'}
214
215 return {
216 'browser_dir': browser_dir,
217 'keyring_name': keyring_name,
218 'supports_profiles': browser_name not in browsers_without_profiles
219 }
220
221
f59f5ef8 222def _extract_chrome_cookies(browser_name, profile, keyring, logger):
86e5f3ed 223 logger.info(f'Extracting cookies from {browser_name}')
767b02a9 224
9b8ee23b 225 if not sqlite3:
19a03940 226 logger.warning(f'Cannot extract cookies from {browser_name} without sqlite3 support. '
227 'Please use a python interpreter compiled with sqlite3 support')
767b02a9
MB
228 return YoutubeDLCookieJar()
229
982ee69a
MB
230 config = _get_chromium_based_browser_settings(browser_name)
231
232 if profile is None:
233 search_root = config['browser_dir']
234 elif _is_path(profile):
235 search_root = profile
236 config['browser_dir'] = os.path.dirname(profile) if config['supports_profiles'] else profile
237 else:
238 if config['supports_profiles']:
239 search_root = os.path.join(config['browser_dir'], profile)
240 else:
86e5f3ed 241 logger.error(f'{browser_name} does not support profiles')
982ee69a
MB
242 search_root = config['browser_dir']
243
97ec5bc5 244 cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies', logger)
982ee69a 245 if cookie_database_path is None:
86e5f3ed 246 raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"')
247 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
982ee69a 248
f59f5ef8 249 decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring)
982ee69a 250
0930b11f 251 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
982ee69a
MB
252 cursor = None
253 try:
254 cursor = _open_database_copy(cookie_database_path, tmpdir)
255 cursor.connection.text_factory = bytes
256 column_names = _get_column_names(cursor, 'cookies')
257 secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
19a03940 258 cursor.execute(f'SELECT host_key, name, value, encrypted_value, path, expires_utc, {secure_column} FROM cookies')
982ee69a
MB
259 jar = YoutubeDLCookieJar()
260 failed_cookies = 0
f59f5ef8 261 unencrypted_cookies = 0
97ec5bc5 262 with _create_progress_bar(logger) as progress_bar:
263 table = cursor.fetchall()
264 total_cookie_count = len(table)
265 for i, line in enumerate(table):
266 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
267 is_encrypted, cookie = _process_chrome_cookie(decryptor, *line)
268 if not cookie:
982ee69a
MB
269 failed_cookies += 1
270 continue
97ec5bc5 271 elif not is_encrypted:
272 unencrypted_cookies += 1
273 jar.set_cookie(cookie)
982ee69a 274 if failed_cookies > 0:
86e5f3ed 275 failed_message = f' ({failed_cookies} could not be decrypted)'
982ee69a
MB
276 else:
277 failed_message = ''
86e5f3ed 278 logger.info(f'Extracted {len(jar)} cookies from {browser_name}{failed_message}')
f59f5ef8
MB
279 counts = decryptor.cookie_counts.copy()
280 counts['unencrypted'] = unencrypted_cookies
86e5f3ed 281 logger.debug(f'cookie version breakdown: {counts}')
982ee69a
MB
282 return jar
283 finally:
284 if cursor is not None:
285 cursor.connection.close()
286
287
97ec5bc5 288def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, path, expires_utc, is_secure):
0f06bcd7 289 host_key = host_key.decode()
290 name = name.decode()
291 value = value.decode()
292 path = path.decode()
97ec5bc5 293 is_encrypted = not value and encrypted_value
294
295 if is_encrypted:
296 value = decryptor.decrypt(encrypted_value)
297 if value is None:
298 return is_encrypted, None
299
300 return is_encrypted, compat_cookiejar_Cookie(
301 version=0, name=name, value=value, port=None, port_specified=False,
302 domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
303 path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False,
304 comment=None, comment_url=None, rest={})
305
306
982ee69a
MB
307class ChromeCookieDecryptor:
308 """
309 Overview:
310
311 Linux:
312 - cookies are either v10 or v11
313 - v10: AES-CBC encrypted with a fixed key
314 - v11: AES-CBC encrypted with an OS protected key (keyring)
315 - v11 keys can be stored in various places depending on the activate desktop environment [2]
316
317 Mac:
318 - cookies are either v10 or not v10
319 - v10: AES-CBC encrypted with an OS protected key (keyring) and more key derivation iterations than linux
320 - not v10: 'old data' stored as plaintext
321
322 Windows:
323 - cookies are either v10 or not v10
324 - v10: AES-GCM encrypted with a key which is encrypted with DPAPI
325 - not v10: encrypted with DPAPI
326
327 Sources:
328 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/
329 - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_linux.cc
330 - KeyStorageLinux::CreateService
331 """
332
333 def decrypt(self, encrypted_value):
19a03940 334 raise NotImplementedError('Must be implemented by sub classes')
982ee69a 335
f59f5ef8
MB
336 @property
337 def cookie_counts(self):
19a03940 338 raise NotImplementedError('Must be implemented by sub classes')
f59f5ef8 339
982ee69a 340
f59f5ef8 341def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None):
982ee69a 342 if sys.platform in ('linux', 'linux2'):
f59f5ef8 343 return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring)
982ee69a
MB
344 elif sys.platform == 'darwin':
345 return MacChromeCookieDecryptor(browser_keyring_name, logger)
346 elif sys.platform == 'win32':
347 return WindowsChromeCookieDecryptor(browser_root, logger)
348 else:
19a03940 349 raise NotImplementedError(f'Chrome cookie decryption is not supported on this platform: {sys.platform}')
982ee69a
MB
350
351
352class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
f59f5ef8 353 def __init__(self, browser_keyring_name, logger, *, keyring=None):
982ee69a
MB
354 self._logger = logger
355 self._v10_key = self.derive_key(b'peanuts')
f59f5ef8
MB
356 password = _get_linux_keyring_password(browser_keyring_name, keyring, logger)
357 self._v11_key = None if password is None else self.derive_key(password)
358 self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
982ee69a
MB
359
360 @staticmethod
361 def derive_key(password):
362 # values from
363 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_linux.cc
364 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16)
365
f59f5ef8
MB
366 @property
367 def cookie_counts(self):
368 return self._cookie_counts
369
982ee69a
MB
370 def decrypt(self, encrypted_value):
371 version = encrypted_value[:3]
372 ciphertext = encrypted_value[3:]
373
374 if version == b'v10':
f59f5ef8 375 self._cookie_counts['v10'] += 1
982ee69a
MB
376 return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
377
378 elif version == b'v11':
f59f5ef8 379 self._cookie_counts['v11'] += 1
982ee69a 380 if self._v11_key is None:
f59f5ef8 381 self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
982ee69a
MB
382 return None
383 return _decrypt_aes_cbc(ciphertext, self._v11_key, self._logger)
384
385 else:
f59f5ef8 386 self._cookie_counts['other'] += 1
982ee69a
MB
387 return None
388
389
390class MacChromeCookieDecryptor(ChromeCookieDecryptor):
391 def __init__(self, browser_keyring_name, logger):
392 self._logger = logger
f440b14f 393 password = _get_mac_keyring_password(browser_keyring_name, logger)
982ee69a 394 self._v10_key = None if password is None else self.derive_key(password)
f59f5ef8 395 self._cookie_counts = {'v10': 0, 'other': 0}
982ee69a
MB
396
397 @staticmethod
398 def derive_key(password):
399 # values from
400 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
401 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16)
402
f59f5ef8
MB
403 @property
404 def cookie_counts(self):
405 return self._cookie_counts
406
982ee69a
MB
407 def decrypt(self, encrypted_value):
408 version = encrypted_value[:3]
409 ciphertext = encrypted_value[3:]
410
411 if version == b'v10':
f59f5ef8 412 self._cookie_counts['v10'] += 1
982ee69a
MB
413 if self._v10_key is None:
414 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
415 return None
416
417 return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
418
419 else:
f59f5ef8 420 self._cookie_counts['other'] += 1
982ee69a
MB
421 # other prefixes are considered 'old data' which were stored as plaintext
422 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
423 return encrypted_value
424
425
426class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
427 def __init__(self, browser_root, logger):
428 self._logger = logger
429 self._v10_key = _get_windows_v10_key(browser_root, logger)
f59f5ef8
MB
430 self._cookie_counts = {'v10': 0, 'other': 0}
431
432 @property
433 def cookie_counts(self):
434 return self._cookie_counts
982ee69a
MB
435
436 def decrypt(self, encrypted_value):
437 version = encrypted_value[:3]
438 ciphertext = encrypted_value[3:]
439
440 if version == b'v10':
f59f5ef8 441 self._cookie_counts['v10'] += 1
982ee69a
MB
442 if self._v10_key is None:
443 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
444 return None
982ee69a
MB
445
446 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
447 # kNonceLength
448 nonce_length = 96 // 8
449 # boringssl
450 # EVP_AEAD_AES_GCM_TAG_LEN
451 authentication_tag_length = 16
452
453 raw_ciphertext = ciphertext
454 nonce = raw_ciphertext[:nonce_length]
455 ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length]
456 authentication_tag = raw_ciphertext[-authentication_tag_length:]
457
458 return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger)
459
460 else:
f59f5ef8 461 self._cookie_counts['other'] += 1
982ee69a
MB
462 # any other prefix means the data is DPAPI encrypted
463 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
0f06bcd7 464 return _decrypt_windows_dpapi(encrypted_value, self._logger).decode()
982ee69a
MB
465
466
467def _extract_safari_cookies(profile, logger):
468 if profile is not None:
469 logger.error('safari does not support profiles')
470 if sys.platform != 'darwin':
86e5f3ed 471 raise ValueError(f'unsupported platform: {sys.platform}')
982ee69a
MB
472
473 cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')
474
475 if not os.path.isfile(cookies_path):
1f7db853
MP
476 logger.debug('Trying secondary cookie location')
477 cookies_path = os.path.expanduser('~/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies')
478 if not os.path.isfile(cookies_path):
479 raise FileNotFoundError('could not find safari cookies database')
982ee69a
MB
480
481 with open(cookies_path, 'rb') as f:
482 cookies_data = f.read()
483
484 jar = parse_safari_cookies(cookies_data, logger=logger)
86e5f3ed 485 logger.info(f'Extracted {len(jar)} cookies from safari')
982ee69a
MB
486 return jar
487
488
489class ParserError(Exception):
490 pass
491
492
493class DataParser:
494 def __init__(self, data, logger):
495 self._data = data
496 self.cursor = 0
497 self._logger = logger
498
499 def read_bytes(self, num_bytes):
500 if num_bytes < 0:
86e5f3ed 501 raise ParserError(f'invalid read of {num_bytes} bytes')
982ee69a
MB
502 end = self.cursor + num_bytes
503 if end > len(self._data):
504 raise ParserError('reached end of input')
505 data = self._data[self.cursor:end]
506 self.cursor = end
507 return data
508
509 def expect_bytes(self, expected_value, message):
510 value = self.read_bytes(len(expected_value))
511 if value != expected_value:
86e5f3ed 512 raise ParserError(f'unexpected value: {value} != {expected_value} ({message})')
982ee69a
MB
513
514 def read_uint(self, big_endian=False):
515 data_format = '>I' if big_endian else '<I'
516 return struct.unpack(data_format, self.read_bytes(4))[0]
517
518 def read_double(self, big_endian=False):
519 data_format = '>d' if big_endian else '<d'
520 return struct.unpack(data_format, self.read_bytes(8))[0]
521
522 def read_cstring(self):
523 buffer = []
524 while True:
525 c = self.read_bytes(1)
526 if c == b'\x00':
0f06bcd7 527 return b''.join(buffer).decode()
982ee69a
MB
528 else:
529 buffer.append(c)
530
531 def skip(self, num_bytes, description='unknown'):
532 if num_bytes > 0:
19a03940 533 self._logger.debug(f'skipping {num_bytes} bytes ({description}): {self.read_bytes(num_bytes)!r}')
982ee69a 534 elif num_bytes < 0:
86e5f3ed 535 raise ParserError(f'invalid skip of {num_bytes} bytes')
982ee69a
MB
536
537 def skip_to(self, offset, description='unknown'):
538 self.skip(offset - self.cursor, description)
539
540 def skip_to_end(self, description='unknown'):
541 self.skip_to(len(self._data), description)
542
543
544def _mac_absolute_time_to_posix(timestamp):
545 return int((datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc) + timedelta(seconds=timestamp)).timestamp())
546
547
548def _parse_safari_cookies_header(data, logger):
549 p = DataParser(data, logger)
550 p.expect_bytes(b'cook', 'database signature')
551 number_of_pages = p.read_uint(big_endian=True)
552 page_sizes = [p.read_uint(big_endian=True) for _ in range(number_of_pages)]
553 return page_sizes, p.cursor
554
555
556def _parse_safari_cookies_page(data, jar, logger):
557 p = DataParser(data, logger)
558 p.expect_bytes(b'\x00\x00\x01\x00', 'page signature')
559 number_of_cookies = p.read_uint()
560 record_offsets = [p.read_uint() for _ in range(number_of_cookies)]
561 if number_of_cookies == 0:
86e5f3ed 562 logger.debug(f'a cookies page of size {len(data)} has no cookies')
982ee69a
MB
563 return
564
565 p.skip_to(record_offsets[0], 'unknown page header field')
566
97ec5bc5 567 with _create_progress_bar(logger) as progress_bar:
568 for i, record_offset in enumerate(record_offsets):
569 progress_bar.print(f'Loading cookie {i: 6d}/{number_of_cookies: 6d}')
570 p.skip_to(record_offset, 'space between records')
571 record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger)
572 p.read_bytes(record_length)
982ee69a
MB
573 p.skip_to_end('space in between pages')
574
575
576def _parse_safari_cookies_record(data, jar, logger):
577 p = DataParser(data, logger)
578 record_size = p.read_uint()
579 p.skip(4, 'unknown record field 1')
580 flags = p.read_uint()
581 is_secure = bool(flags & 0x0001)
582 p.skip(4, 'unknown record field 2')
583 domain_offset = p.read_uint()
584 name_offset = p.read_uint()
585 path_offset = p.read_uint()
586 value_offset = p.read_uint()
587 p.skip(8, 'unknown record field 3')
588 expiration_date = _mac_absolute_time_to_posix(p.read_double())
589 _creation_date = _mac_absolute_time_to_posix(p.read_double()) # noqa: F841
590
591 try:
592 p.skip_to(domain_offset)
593 domain = p.read_cstring()
594
595 p.skip_to(name_offset)
596 name = p.read_cstring()
597
598 p.skip_to(path_offset)
599 path = p.read_cstring()
600
601 p.skip_to(value_offset)
602 value = p.read_cstring()
603 except UnicodeDecodeError:
f440b14f 604 logger.warning('failed to parse Safari cookie because UTF-8 decoding failed', only_once=True)
982ee69a
MB
605 return record_size
606
607 p.skip_to(record_size, 'space at the end of the record')
608
609 cookie = compat_cookiejar_Cookie(
610 version=0, name=name, value=value, port=None, port_specified=False,
611 domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'),
612 path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False,
613 comment=None, comment_url=None, rest={})
614 jar.set_cookie(cookie)
615 return record_size
616
617
618def parse_safari_cookies(data, jar=None, logger=YDLLogger()):
619 """
620 References:
621 - https://github.com/libyal/dtformats/blob/main/documentation/Safari%20Cookies.asciidoc
622 - this data appears to be out of date but the important parts of the database structure is the same
623 - there are a few bytes here and there which are skipped during parsing
624 """
625 if jar is None:
626 jar = YoutubeDLCookieJar()
627 page_sizes, body_start = _parse_safari_cookies_header(data, logger)
628 p = DataParser(data[body_start:], logger)
629 for page_size in page_sizes:
630 _parse_safari_cookies_page(p.read_bytes(page_size), jar, logger)
631 p.skip_to_end('footer')
632 return jar
633
634
f59f5ef8
MB
635class _LinuxDesktopEnvironment(Enum):
636 """
637 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.h
638 DesktopEnvironment
639 """
640 OTHER = auto()
641 CINNAMON = auto()
642 GNOME = auto()
643 KDE = auto()
644 PANTHEON = auto()
645 UNITY = auto()
646 XFCE = auto()
982ee69a
MB
647
648
f59f5ef8
MB
649class _LinuxKeyring(Enum):
650 """
651 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.h
652 SelectedLinuxBackend
653 """
654 KWALLET = auto()
655 GNOMEKEYRING = auto()
656 BASICTEXT = auto()
657
658
659SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys()
660
661
662def _get_linux_desktop_environment(env):
663 """
664 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc
665 GetDesktopEnvironment
666 """
667 xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
668 desktop_session = env.get('DESKTOP_SESSION', None)
669 if xdg_current_desktop is not None:
670 xdg_current_desktop = xdg_current_desktop.split(':')[0].strip()
671
672 if xdg_current_desktop == 'Unity':
673 if desktop_session is not None and 'gnome-fallback' in desktop_session:
674 return _LinuxDesktopEnvironment.GNOME
675 else:
676 return _LinuxDesktopEnvironment.UNITY
677 elif xdg_current_desktop == 'GNOME':
678 return _LinuxDesktopEnvironment.GNOME
679 elif xdg_current_desktop == 'X-Cinnamon':
680 return _LinuxDesktopEnvironment.CINNAMON
681 elif xdg_current_desktop == 'KDE':
682 return _LinuxDesktopEnvironment.KDE
683 elif xdg_current_desktop == 'Pantheon':
684 return _LinuxDesktopEnvironment.PANTHEON
685 elif xdg_current_desktop == 'XFCE':
686 return _LinuxDesktopEnvironment.XFCE
687 elif desktop_session is not None:
688 if desktop_session in ('mate', 'gnome'):
689 return _LinuxDesktopEnvironment.GNOME
690 elif 'kde' in desktop_session:
691 return _LinuxDesktopEnvironment.KDE
692 elif 'xfce' in desktop_session:
693 return _LinuxDesktopEnvironment.XFCE
694 else:
695 if 'GNOME_DESKTOP_SESSION_ID' in env:
696 return _LinuxDesktopEnvironment.GNOME
697 elif 'KDE_FULL_SESSION' in env:
698 return _LinuxDesktopEnvironment.KDE
fa8fd951 699 return _LinuxDesktopEnvironment.OTHER
f59f5ef8
MB
700
701
702def _choose_linux_keyring(logger):
703 """
704 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.cc
705 SelectBackend
706 """
707 desktop_environment = _get_linux_desktop_environment(os.environ)
86e5f3ed 708 logger.debug(f'detected desktop environment: {desktop_environment.name}')
f59f5ef8
MB
709 if desktop_environment == _LinuxDesktopEnvironment.KDE:
710 linux_keyring = _LinuxKeyring.KWALLET
711 elif desktop_environment == _LinuxDesktopEnvironment.OTHER:
712 linux_keyring = _LinuxKeyring.BASICTEXT
982ee69a 713 else:
f59f5ef8
MB
714 linux_keyring = _LinuxKeyring.GNOMEKEYRING
715 return linux_keyring
716
717
718def _get_kwallet_network_wallet(logger):
719 """ The name of the wallet used to store network passwords.
720
721 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/kwallet_dbus.cc
722 KWalletDBus::NetworkWallet
723 which does a dbus call to the following function:
724 https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html
725 Wallet::NetworkWallet
726 """
727 default_wallet = 'kdewallet'
728 try:
729 proc = Popen([
730 'dbus-send', '--session', '--print-reply=literal',
731 '--dest=org.kde.kwalletd5',
732 '/modules/kwalletd5',
733 'org.kde.KWallet.networkWallet'
734 ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
735
736 stdout, stderr = proc.communicate_or_kill()
737 if proc.returncode != 0:
738 logger.warning('failed to read NetworkWallet')
739 return default_wallet
740 else:
0f06bcd7 741 network_wallet = stdout.decode().strip()
86e5f3ed 742 logger.debug(f'NetworkWallet = "{network_wallet}"')
f59f5ef8 743 return network_wallet
a44ca5a4 744 except Exception as e:
86e5f3ed 745 logger.warning(f'exception while obtaining NetworkWallet: {e}')
f59f5ef8
MB
746 return default_wallet
747
748
749def _get_kwallet_password(browser_keyring_name, logger):
750 logger.debug('using kwallet-query to obtain password from kwallet')
751
752 if shutil.which('kwallet-query') is None:
753 logger.error('kwallet-query command not found. KWallet and kwallet-query '
754 'must be installed to read from KWallet. kwallet-query should be'
755 'included in the kwallet package for your distribution')
756 return b''
757
758 network_wallet = _get_kwallet_network_wallet(logger)
759
760 try:
761 proc = Popen([
762 'kwallet-query',
86e5f3ed 763 '--read-password', f'{browser_keyring_name} Safe Storage',
764 '--folder', f'{browser_keyring_name} Keys',
f59f5ef8
MB
765 network_wallet
766 ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
767
768 stdout, stderr = proc.communicate_or_kill()
769 if proc.returncode != 0:
19a03940 770 logger.error(f'kwallet-query failed with return code {proc.returncode}. Please consult '
771 'the kwallet-query man page for details')
f59f5ef8
MB
772 return b''
773 else:
774 if stdout.lower().startswith(b'failed to read'):
775 logger.debug('failed to read password from kwallet. Using empty string instead')
776 # this sometimes occurs in KDE because chrome does not check hasEntry and instead
777 # just tries to read the value (which kwallet returns "") whereas kwallet-query
778 # checks hasEntry. To verify this:
779 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
780 # while starting chrome.
781 # this may be a bug as the intended behaviour is to generate a random password and store
782 # it, but that doesn't matter here.
783 return b''
784 else:
785 logger.debug('password found')
786 if stdout[-1:] == b'\n':
787 stdout = stdout[:-1]
788 return stdout
a44ca5a4 789 except Exception as e:
790 logger.warning(f'exception running kwallet-query: {error_to_str(e)}')
f59f5ef8
MB
791 return b''
792
793
794def _get_gnome_keyring_password(browser_keyring_name, logger):
9b8ee23b 795 if not secretstorage:
796 logger.error(f'secretstorage not available {_SECRETSTORAGE_UNAVAILABLE_REASON}')
f59f5ef8
MB
797 return b''
798 # the Gnome keyring does not seem to organise keys in the same way as KWallet,
799 # using `dbus-monitor` during startup, it can be observed that chromium lists all keys
800 # and presumably searches for its key in the list. It appears that we must do the same.
801 # https://github.com/jaraco/keyring/issues/556
802 with contextlib.closing(secretstorage.dbus_init()) as con:
803 col = secretstorage.get_default_collection(con)
804 for item in col.get_all_items():
86e5f3ed 805 if item.get_label() == f'{browser_keyring_name} Safe Storage':
f59f5ef8
MB
806 return item.get_secret()
807 else:
808 logger.error('failed to read from keyring')
809 return b''
810
811
812def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
813 # note: chrome/chromium can be run with the following flags to determine which keyring backend
814 # it has chosen to use
815 # chromium --enable-logging=stderr --v=1 2>&1 | grep key_storage_
816 # Chromium supports a flag: --password-store=<basic|gnome|kwallet> so the automatic detection
817 # will not be sufficient in all cases.
818
2c539d49 819 keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger)
f59f5ef8
MB
820 logger.debug(f'Chosen keyring: {keyring.name}')
821
822 if keyring == _LinuxKeyring.KWALLET:
823 return _get_kwallet_password(browser_keyring_name, logger)
824 elif keyring == _LinuxKeyring.GNOMEKEYRING:
825 return _get_gnome_keyring_password(browser_keyring_name, logger)
826 elif keyring == _LinuxKeyring.BASICTEXT:
827 # when basic text is chosen, all cookies are stored as v10 (so no keyring password is required)
828 return None
829 assert False, f'Unknown keyring {keyring}'
830
831
832def _get_mac_keyring_password(browser_keyring_name, logger):
833 logger.debug('using find-generic-password to obtain password from OSX keychain')
834 try:
d3c93ec2 835 proc = Popen(
836 ['security', 'find-generic-password',
837 '-w', # write password to stdout
838 '-a', browser_keyring_name, # match 'account'
86e5f3ed 839 '-s', f'{browser_keyring_name} Safe Storage'], # match 'service'
d3c93ec2 840 stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
f59f5ef8
MB
841
842 stdout, stderr = proc.communicate_or_kill()
843 if stdout[-1:] == b'\n':
844 stdout = stdout[:-1]
845 return stdout
a44ca5a4 846 except Exception as e:
847 logger.warning(f'exception running find-generic-password: {error_to_str(e)}')
f59f5ef8 848 return None
982ee69a
MB
849
850
851def _get_windows_v10_key(browser_root, logger):
97ec5bc5 852 path = _find_most_recently_used_file(browser_root, 'Local State', logger)
982ee69a
MB
853 if path is None:
854 logger.error('could not find local state file')
855 return None
97ec5bc5 856 logger.debug(f'Found local state file at "{path}"')
86e5f3ed 857 with open(path, encoding='utf8') as f:
982ee69a
MB
858 data = json.load(f)
859 try:
860 base64_key = data['os_crypt']['encrypted_key']
861 except KeyError:
862 logger.error('no encrypted key in Local State')
863 return None
864 encrypted_key = compat_b64decode(base64_key)
865 prefix = b'DPAPI'
866 if not encrypted_key.startswith(prefix):
867 logger.error('invalid key')
868 return None
869 return _decrypt_windows_dpapi(encrypted_key[len(prefix):], logger)
870
871
872def pbkdf2_sha1(password, salt, iterations, key_length):
873 return pbkdf2_hmac('sha1', password, salt, iterations, key_length)
874
875
876def _decrypt_aes_cbc(ciphertext, key, logger, initialization_vector=b' ' * 16):
1d3586d0 877 plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
982ee69a 878 try:
0f06bcd7 879 return plaintext.decode()
982ee69a 880 except UnicodeDecodeError:
f440b14f 881 logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
882 return None
883
884
885def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
982ee69a 886 try:
09906f55 887 plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce)
982ee69a 888 except ValueError:
f440b14f 889 logger.warning('failed to decrypt cookie (AES-GCM) because the MAC check failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
890 return None
891
892 try:
0f06bcd7 893 return plaintext.decode()
982ee69a 894 except UnicodeDecodeError:
f440b14f 895 logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
896 return None
897
898
899def _decrypt_windows_dpapi(ciphertext, logger):
900 """
901 References:
902 - https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata
903 """
904 from ctypes.wintypes import DWORD
905
906 class DATA_BLOB(ctypes.Structure):
907 _fields_ = [('cbData', DWORD),
908 ('pbData', ctypes.POINTER(ctypes.c_char))]
909
910 buffer = ctypes.create_string_buffer(ciphertext)
911 blob_in = DATA_BLOB(ctypes.sizeof(buffer), buffer)
912 blob_out = DATA_BLOB()
913 ret = ctypes.windll.crypt32.CryptUnprotectData(
914 ctypes.byref(blob_in), # pDataIn
915 None, # ppszDataDescr: human readable description of pDataIn
916 None, # pOptionalEntropy: salt?
917 None, # pvReserved: must be NULL
918 None, # pPromptStruct: information about prompts to display
919 0, # dwFlags
920 ctypes.byref(blob_out) # pDataOut
921 )
922 if not ret:
f9be9cb9 923 logger.warning('failed to decrypt with DPAPI', only_once=True)
982ee69a
MB
924 return None
925
926 result = ctypes.string_at(blob_out.pbData, blob_out.cbData)
927 ctypes.windll.kernel32.LocalFree(blob_out.pbData)
928 return result
929
930
931def _config_home():
932 return os.environ.get('XDG_CONFIG_HOME', os.path.expanduser('~/.config'))
933
934
935def _open_database_copy(database_path, tmpdir):
936 # cannot open sqlite databases if they are already in use (e.g. by the browser)
937 database_copy_path = os.path.join(tmpdir, 'temporary.sqlite')
938 shutil.copy(database_path, database_copy_path)
939 conn = sqlite3.connect(database_copy_path)
940 return conn.cursor()
941
942
943def _get_column_names(cursor, table_name):
86e5f3ed 944 table_info = cursor.execute(f'PRAGMA table_info({table_name})').fetchall()
0f06bcd7 945 return [row[1].decode() for row in table_info]
982ee69a
MB
946
947
97ec5bc5 948def _find_most_recently_used_file(root, filename, logger):
982ee69a 949 # if there are multiple browser profiles, take the most recently used one
97ec5bc5 950 i, paths = 0, []
951 with _create_progress_bar(logger) as progress_bar:
952 for curr_root, dirs, files in os.walk(root):
953 for file in files:
954 i += 1
955 progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched')
956 if file == filename:
957 paths.append(os.path.join(curr_root, file))
982ee69a
MB
958 return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime)
959
960
961def _merge_cookie_jars(jars):
962 output_jar = YoutubeDLCookieJar()
963 for jar in jars:
964 for cookie in jar:
965 output_jar.set_cookie(cookie)
966 if jar.filename is not None:
967 output_jar.filename = jar.filename
968 return output_jar
969
970
971def _is_path(value):
972 return os.path.sep in value
973
974
f59f5ef8 975def _parse_browser_specification(browser_name, profile=None, keyring=None):
982ee69a
MB
976 if browser_name not in SUPPORTED_BROWSERS:
977 raise ValueError(f'unsupported browser: "{browser_name}"')
f59f5ef8
MB
978 if keyring not in (None, *SUPPORTED_KEYRINGS):
979 raise ValueError(f'unsupported keyring: "{keyring}"')
982ee69a
MB
980 if profile is not None and _is_path(profile):
981 profile = os.path.expanduser(profile)
f59f5ef8 982 return browser_name, profile, keyring