]> jfr.im git - yt-dlp.git/blame - yt_dlp/cookies.py
[youtube, cleanup] Minor refactoring
[yt-dlp.git] / yt_dlp / cookies.py
CommitLineData
f59f5ef8 1import contextlib
982ee69a
MB
2import ctypes
3import json
4import os
5import shutil
982ee69a
MB
6import struct
7import subprocess
8import sys
9import tempfile
10from datetime import datetime, timedelta, timezone
f59f5ef8 11from enum import Enum, auto
982ee69a
MB
12from hashlib import pbkdf2_hmac
13
1d3586d0 14from .aes import (
15 aes_cbc_decrypt_bytes,
16 aes_gcm_decrypt_and_verify_bytes,
17 unpad_pkcs7,
18)
09906f55 19from .compat import (
982ee69a
MB
20 compat_b64decode,
21 compat_cookiejar_Cookie,
22)
09906f55 23from .utils import (
a44ca5a4 24 error_to_str,
982ee69a 25 expand_path,
d3c93ec2 26 Popen,
982ee69a
MB
27 YoutubeDLCookieJar,
28)
29
767b02a9
MB
30try:
31 import sqlite3
32 SQLITE_AVAILABLE = True
33except ImportError:
34 # although sqlite3 is part of the standard library, it is possible to compile python without
35 # sqlite support. See: https://github.com/yt-dlp/yt-dlp/issues/544
36 SQLITE_AVAILABLE = False
37
38
982ee69a 39try:
f59f5ef8
MB
40 import secretstorage
41 SECRETSTORAGE_AVAILABLE = True
982ee69a 42except ImportError:
f59f5ef8
MB
43 SECRETSTORAGE_AVAILABLE = False
44 SECRETSTORAGE_UNAVAILABLE_REASON = (
45 'as the `secretstorage` module is not installed. '
46 'Please install by running `python3 -m pip install secretstorage`.')
063c409d 47except Exception as _err:
f59f5ef8
MB
48 SECRETSTORAGE_AVAILABLE = False
49 SECRETSTORAGE_UNAVAILABLE_REASON = f'as the `secretstorage` module could not be initialized. {_err}'
982ee69a
MB
50
51
52CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
53SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
54
55
56class YDLLogger:
57 def __init__(self, ydl=None):
58 self._ydl = ydl
59
60 def debug(self, message):
61 if self._ydl:
62 self._ydl.write_debug(message)
63
64 def info(self, message):
65 if self._ydl:
66 self._ydl.to_screen(f'[Cookies] {message}')
67
68 def warning(self, message, only_once=False):
69 if self._ydl:
70 self._ydl.report_warning(message, only_once)
71
72 def error(self, message):
73 if self._ydl:
74 self._ydl.report_error(message)
75
76
77def load_cookies(cookie_file, browser_specification, ydl):
78 cookie_jars = []
79 if browser_specification is not None:
f59f5ef8
MB
80 browser_name, profile, keyring = _parse_browser_specification(*browser_specification)
81 cookie_jars.append(extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring))
982ee69a
MB
82
83 if cookie_file is not None:
84 cookie_file = expand_path(cookie_file)
85 jar = YoutubeDLCookieJar(cookie_file)
86 if os.access(cookie_file, os.R_OK):
87 jar.load(ignore_discard=True, ignore_expires=True)
88 cookie_jars.append(jar)
89
90 return _merge_cookie_jars(cookie_jars)
91
92
f59f5ef8 93def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None):
982ee69a
MB
94 if browser_name == 'firefox':
95 return _extract_firefox_cookies(profile, logger)
96 elif browser_name == 'safari':
97 return _extract_safari_cookies(profile, logger)
98 elif browser_name in CHROMIUM_BASED_BROWSERS:
f59f5ef8 99 return _extract_chrome_cookies(browser_name, profile, keyring, logger)
982ee69a
MB
100 else:
101 raise ValueError('unknown browser: {}'.format(browser_name))
102
103
104def _extract_firefox_cookies(profile, logger):
105 logger.info('Extracting cookies from firefox')
767b02a9
MB
106 if not SQLITE_AVAILABLE:
107 logger.warning('Cannot extract cookies from firefox without sqlite3 support. '
108 'Please use a python interpreter compiled with sqlite3 support')
109 return YoutubeDLCookieJar()
982ee69a
MB
110
111 if profile is None:
112 search_root = _firefox_browser_dir()
113 elif _is_path(profile):
114 search_root = profile
115 else:
116 search_root = os.path.join(_firefox_browser_dir(), profile)
117
118 cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite')
119 if cookie_database_path is None:
120 raise FileNotFoundError('could not find firefox cookies database in {}'.format(search_root))
526d74ec 121 logger.debug('Extracting cookies from: "{}"'.format(cookie_database_path))
982ee69a 122
0930b11f 123 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
982ee69a
MB
124 cursor = None
125 try:
126 cursor = _open_database_copy(cookie_database_path, tmpdir)
127 cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
128 jar = YoutubeDLCookieJar()
129 for host, name, value, path, expiry, is_secure in cursor.fetchall():
130 cookie = compat_cookiejar_Cookie(
131 version=0, name=name, value=value, port=None, port_specified=False,
132 domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'),
133 path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False,
134 comment=None, comment_url=None, rest={})
135 jar.set_cookie(cookie)
136 logger.info('Extracted {} cookies from firefox'.format(len(jar)))
137 return jar
138 finally:
139 if cursor is not None:
140 cursor.connection.close()
141
142
143def _firefox_browser_dir():
144 if sys.platform in ('linux', 'linux2'):
145 return os.path.expanduser('~/.mozilla/firefox')
146 elif sys.platform == 'win32':
147 return os.path.expandvars(r'%APPDATA%\Mozilla\Firefox\Profiles')
148 elif sys.platform == 'darwin':
149 return os.path.expanduser('~/Library/Application Support/Firefox')
150 else:
151 raise ValueError('unsupported platform: {}'.format(sys.platform))
152
153
154def _get_chromium_based_browser_settings(browser_name):
155 # https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md
156 if sys.platform in ('linux', 'linux2'):
157 config = _config_home()
158 browser_dir = {
159 'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'),
160 'chrome': os.path.join(config, 'google-chrome'),
161 'chromium': os.path.join(config, 'chromium'),
162 'edge': os.path.join(config, 'microsoft-edge'),
163 'opera': os.path.join(config, 'opera'),
164 'vivaldi': os.path.join(config, 'vivaldi'),
165 }[browser_name]
166
167 elif sys.platform == 'win32':
168 appdata_local = os.path.expandvars('%LOCALAPPDATA%')
169 appdata_roaming = os.path.expandvars('%APPDATA%')
170 browser_dir = {
171 'brave': os.path.join(appdata_local, r'BraveSoftware\Brave-Browser\User Data'),
172 'chrome': os.path.join(appdata_local, r'Google\Chrome\User Data'),
173 'chromium': os.path.join(appdata_local, r'Chromium\User Data'),
174 'edge': os.path.join(appdata_local, r'Microsoft\Edge\User Data'),
175 'opera': os.path.join(appdata_roaming, r'Opera Software\Opera Stable'),
176 'vivaldi': os.path.join(appdata_local, r'Vivaldi\User Data'),
177 }[browser_name]
178
179 elif sys.platform == 'darwin':
180 appdata = os.path.expanduser('~/Library/Application Support')
181 browser_dir = {
182 'brave': os.path.join(appdata, 'BraveSoftware/Brave-Browser'),
183 'chrome': os.path.join(appdata, 'Google/Chrome'),
184 'chromium': os.path.join(appdata, 'Chromium'),
185 'edge': os.path.join(appdata, 'Microsoft Edge'),
186 'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
187 'vivaldi': os.path.join(appdata, 'Vivaldi'),
188 }[browser_name]
189
190 else:
191 raise ValueError('unsupported platform: {}'.format(sys.platform))
192
193 # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
194 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
195 keyring_name = {
196 'brave': 'Brave',
197 'chrome': 'Chrome',
198 'chromium': 'Chromium',
29b208f6 199 'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
982ee69a
MB
200 'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
201 'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
202 }[browser_name]
203
204 browsers_without_profiles = {'opera'}
205
206 return {
207 'browser_dir': browser_dir,
208 'keyring_name': keyring_name,
209 'supports_profiles': browser_name not in browsers_without_profiles
210 }
211
212
f59f5ef8 213def _extract_chrome_cookies(browser_name, profile, keyring, logger):
982ee69a 214 logger.info('Extracting cookies from {}'.format(browser_name))
767b02a9
MB
215
216 if not SQLITE_AVAILABLE:
217 logger.warning(('Cannot extract cookies from {} without sqlite3 support. '
218 'Please use a python interpreter compiled with sqlite3 support').format(browser_name))
219 return YoutubeDLCookieJar()
220
982ee69a
MB
221 config = _get_chromium_based_browser_settings(browser_name)
222
223 if profile is None:
224 search_root = config['browser_dir']
225 elif _is_path(profile):
226 search_root = profile
227 config['browser_dir'] = os.path.dirname(profile) if config['supports_profiles'] else profile
228 else:
229 if config['supports_profiles']:
230 search_root = os.path.join(config['browser_dir'], profile)
231 else:
232 logger.error('{} does not support profiles'.format(browser_name))
233 search_root = config['browser_dir']
234
235 cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies')
236 if cookie_database_path is None:
237 raise FileNotFoundError('could not find {} cookies database in "{}"'.format(browser_name, search_root))
526d74ec 238 logger.debug('Extracting cookies from: "{}"'.format(cookie_database_path))
982ee69a 239
f59f5ef8 240 decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring)
982ee69a 241
0930b11f 242 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
982ee69a
MB
243 cursor = None
244 try:
245 cursor = _open_database_copy(cookie_database_path, tmpdir)
246 cursor.connection.text_factory = bytes
247 column_names = _get_column_names(cursor, 'cookies')
248 secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
249 cursor.execute('SELECT host_key, name, value, encrypted_value, path, '
250 'expires_utc, {} FROM cookies'.format(secure_column))
251 jar = YoutubeDLCookieJar()
252 failed_cookies = 0
f59f5ef8 253 unencrypted_cookies = 0
982ee69a
MB
254 for host_key, name, value, encrypted_value, path, expires_utc, is_secure in cursor.fetchall():
255 host_key = host_key.decode('utf-8')
256 name = name.decode('utf-8')
257 value = value.decode('utf-8')
258 path = path.decode('utf-8')
259
260 if not value and encrypted_value:
261 value = decryptor.decrypt(encrypted_value)
262 if value is None:
263 failed_cookies += 1
264 continue
f59f5ef8
MB
265 else:
266 unencrypted_cookies += 1
982ee69a
MB
267
268 cookie = compat_cookiejar_Cookie(
269 version=0, name=name, value=value, port=None, port_specified=False,
270 domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
271 path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False,
272 comment=None, comment_url=None, rest={})
273 jar.set_cookie(cookie)
274 if failed_cookies > 0:
275 failed_message = ' ({} could not be decrypted)'.format(failed_cookies)
276 else:
277 failed_message = ''
278 logger.info('Extracted {} cookies from {}{}'.format(len(jar), browser_name, failed_message))
f59f5ef8
MB
279 counts = decryptor.cookie_counts.copy()
280 counts['unencrypted'] = unencrypted_cookies
281 logger.debug('cookie version breakdown: {}'.format(counts))
982ee69a
MB
282 return jar
283 finally:
284 if cursor is not None:
285 cursor.connection.close()
286
287
288class ChromeCookieDecryptor:
289 """
290 Overview:
291
292 Linux:
293 - cookies are either v10 or v11
294 - v10: AES-CBC encrypted with a fixed key
295 - v11: AES-CBC encrypted with an OS protected key (keyring)
296 - v11 keys can be stored in various places depending on the activate desktop environment [2]
297
298 Mac:
299 - cookies are either v10 or not v10
300 - v10: AES-CBC encrypted with an OS protected key (keyring) and more key derivation iterations than linux
301 - not v10: 'old data' stored as plaintext
302
303 Windows:
304 - cookies are either v10 or not v10
305 - v10: AES-GCM encrypted with a key which is encrypted with DPAPI
306 - not v10: encrypted with DPAPI
307
308 Sources:
309 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/
310 - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_linux.cc
311 - KeyStorageLinux::CreateService
312 """
313
314 def decrypt(self, encrypted_value):
315 raise NotImplementedError
316
f59f5ef8
MB
317 @property
318 def cookie_counts(self):
319 raise NotImplementedError
320
982ee69a 321
f59f5ef8 322def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None):
982ee69a 323 if sys.platform in ('linux', 'linux2'):
f59f5ef8 324 return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring)
982ee69a
MB
325 elif sys.platform == 'darwin':
326 return MacChromeCookieDecryptor(browser_keyring_name, logger)
327 elif sys.platform == 'win32':
328 return WindowsChromeCookieDecryptor(browser_root, logger)
329 else:
330 raise NotImplementedError('Chrome cookie decryption is not supported '
331 'on this platform: {}'.format(sys.platform))
332
333
334class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
f59f5ef8 335 def __init__(self, browser_keyring_name, logger, *, keyring=None):
982ee69a
MB
336 self._logger = logger
337 self._v10_key = self.derive_key(b'peanuts')
f59f5ef8
MB
338 password = _get_linux_keyring_password(browser_keyring_name, keyring, logger)
339 self._v11_key = None if password is None else self.derive_key(password)
340 self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
982ee69a
MB
341
342 @staticmethod
343 def derive_key(password):
344 # values from
345 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_linux.cc
346 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16)
347
f59f5ef8
MB
348 @property
349 def cookie_counts(self):
350 return self._cookie_counts
351
982ee69a
MB
352 def decrypt(self, encrypted_value):
353 version = encrypted_value[:3]
354 ciphertext = encrypted_value[3:]
355
356 if version == b'v10':
f59f5ef8 357 self._cookie_counts['v10'] += 1
982ee69a
MB
358 return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
359
360 elif version == b'v11':
f59f5ef8 361 self._cookie_counts['v11'] += 1
982ee69a 362 if self._v11_key is None:
f59f5ef8 363 self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
982ee69a
MB
364 return None
365 return _decrypt_aes_cbc(ciphertext, self._v11_key, self._logger)
366
367 else:
f59f5ef8 368 self._cookie_counts['other'] += 1
982ee69a
MB
369 return None
370
371
372class MacChromeCookieDecryptor(ChromeCookieDecryptor):
373 def __init__(self, browser_keyring_name, logger):
374 self._logger = logger
f440b14f 375 password = _get_mac_keyring_password(browser_keyring_name, logger)
982ee69a 376 self._v10_key = None if password is None else self.derive_key(password)
f59f5ef8 377 self._cookie_counts = {'v10': 0, 'other': 0}
982ee69a
MB
378
379 @staticmethod
380 def derive_key(password):
381 # values from
382 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
383 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16)
384
f59f5ef8
MB
385 @property
386 def cookie_counts(self):
387 return self._cookie_counts
388
982ee69a
MB
389 def decrypt(self, encrypted_value):
390 version = encrypted_value[:3]
391 ciphertext = encrypted_value[3:]
392
393 if version == b'v10':
f59f5ef8 394 self._cookie_counts['v10'] += 1
982ee69a
MB
395 if self._v10_key is None:
396 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
397 return None
398
399 return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
400
401 else:
f59f5ef8 402 self._cookie_counts['other'] += 1
982ee69a
MB
403 # other prefixes are considered 'old data' which were stored as plaintext
404 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
405 return encrypted_value
406
407
408class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
409 def __init__(self, browser_root, logger):
410 self._logger = logger
411 self._v10_key = _get_windows_v10_key(browser_root, logger)
f59f5ef8
MB
412 self._cookie_counts = {'v10': 0, 'other': 0}
413
414 @property
415 def cookie_counts(self):
416 return self._cookie_counts
982ee69a
MB
417
418 def decrypt(self, encrypted_value):
419 version = encrypted_value[:3]
420 ciphertext = encrypted_value[3:]
421
422 if version == b'v10':
f59f5ef8 423 self._cookie_counts['v10'] += 1
982ee69a
MB
424 if self._v10_key is None:
425 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
426 return None
982ee69a
MB
427
428 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
429 # kNonceLength
430 nonce_length = 96 // 8
431 # boringssl
432 # EVP_AEAD_AES_GCM_TAG_LEN
433 authentication_tag_length = 16
434
435 raw_ciphertext = ciphertext
436 nonce = raw_ciphertext[:nonce_length]
437 ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length]
438 authentication_tag = raw_ciphertext[-authentication_tag_length:]
439
440 return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger)
441
442 else:
f59f5ef8 443 self._cookie_counts['other'] += 1
982ee69a
MB
444 # any other prefix means the data is DPAPI encrypted
445 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
446 return _decrypt_windows_dpapi(encrypted_value, self._logger).decode('utf-8')
447
448
449def _extract_safari_cookies(profile, logger):
450 if profile is not None:
451 logger.error('safari does not support profiles')
452 if sys.platform != 'darwin':
453 raise ValueError('unsupported platform: {}'.format(sys.platform))
454
455 cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')
456
457 if not os.path.isfile(cookies_path):
1f7db853
MP
458 logger.debug('Trying secondary cookie location')
459 cookies_path = os.path.expanduser('~/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies')
460 if not os.path.isfile(cookies_path):
461 raise FileNotFoundError('could not find safari cookies database')
982ee69a
MB
462
463 with open(cookies_path, 'rb') as f:
464 cookies_data = f.read()
465
466 jar = parse_safari_cookies(cookies_data, logger=logger)
467 logger.info('Extracted {} cookies from safari'.format(len(jar)))
468 return jar
469
470
471class ParserError(Exception):
472 pass
473
474
475class DataParser:
476 def __init__(self, data, logger):
477 self._data = data
478 self.cursor = 0
479 self._logger = logger
480
481 def read_bytes(self, num_bytes):
482 if num_bytes < 0:
483 raise ParserError('invalid read of {} bytes'.format(num_bytes))
484 end = self.cursor + num_bytes
485 if end > len(self._data):
486 raise ParserError('reached end of input')
487 data = self._data[self.cursor:end]
488 self.cursor = end
489 return data
490
491 def expect_bytes(self, expected_value, message):
492 value = self.read_bytes(len(expected_value))
493 if value != expected_value:
494 raise ParserError('unexpected value: {} != {} ({})'.format(value, expected_value, message))
495
496 def read_uint(self, big_endian=False):
497 data_format = '>I' if big_endian else '<I'
498 return struct.unpack(data_format, self.read_bytes(4))[0]
499
500 def read_double(self, big_endian=False):
501 data_format = '>d' if big_endian else '<d'
502 return struct.unpack(data_format, self.read_bytes(8))[0]
503
504 def read_cstring(self):
505 buffer = []
506 while True:
507 c = self.read_bytes(1)
508 if c == b'\x00':
509 return b''.join(buffer).decode('utf-8')
510 else:
511 buffer.append(c)
512
513 def skip(self, num_bytes, description='unknown'):
514 if num_bytes > 0:
515 self._logger.debug('skipping {} bytes ({}): {}'.format(
516 num_bytes, description, self.read_bytes(num_bytes)))
517 elif num_bytes < 0:
518 raise ParserError('invalid skip of {} bytes'.format(num_bytes))
519
520 def skip_to(self, offset, description='unknown'):
521 self.skip(offset - self.cursor, description)
522
523 def skip_to_end(self, description='unknown'):
524 self.skip_to(len(self._data), description)
525
526
527def _mac_absolute_time_to_posix(timestamp):
528 return int((datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc) + timedelta(seconds=timestamp)).timestamp())
529
530
531def _parse_safari_cookies_header(data, logger):
532 p = DataParser(data, logger)
533 p.expect_bytes(b'cook', 'database signature')
534 number_of_pages = p.read_uint(big_endian=True)
535 page_sizes = [p.read_uint(big_endian=True) for _ in range(number_of_pages)]
536 return page_sizes, p.cursor
537
538
539def _parse_safari_cookies_page(data, jar, logger):
540 p = DataParser(data, logger)
541 p.expect_bytes(b'\x00\x00\x01\x00', 'page signature')
542 number_of_cookies = p.read_uint()
543 record_offsets = [p.read_uint() for _ in range(number_of_cookies)]
544 if number_of_cookies == 0:
545 logger.debug('a cookies page of size {} has no cookies'.format(len(data)))
546 return
547
548 p.skip_to(record_offsets[0], 'unknown page header field')
549
550 for record_offset in record_offsets:
551 p.skip_to(record_offset, 'space between records')
552 record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger)
553 p.read_bytes(record_length)
554 p.skip_to_end('space in between pages')
555
556
557def _parse_safari_cookies_record(data, jar, logger):
558 p = DataParser(data, logger)
559 record_size = p.read_uint()
560 p.skip(4, 'unknown record field 1')
561 flags = p.read_uint()
562 is_secure = bool(flags & 0x0001)
563 p.skip(4, 'unknown record field 2')
564 domain_offset = p.read_uint()
565 name_offset = p.read_uint()
566 path_offset = p.read_uint()
567 value_offset = p.read_uint()
568 p.skip(8, 'unknown record field 3')
569 expiration_date = _mac_absolute_time_to_posix(p.read_double())
570 _creation_date = _mac_absolute_time_to_posix(p.read_double()) # noqa: F841
571
572 try:
573 p.skip_to(domain_offset)
574 domain = p.read_cstring()
575
576 p.skip_to(name_offset)
577 name = p.read_cstring()
578
579 p.skip_to(path_offset)
580 path = p.read_cstring()
581
582 p.skip_to(value_offset)
583 value = p.read_cstring()
584 except UnicodeDecodeError:
f440b14f 585 logger.warning('failed to parse Safari cookie because UTF-8 decoding failed', only_once=True)
982ee69a
MB
586 return record_size
587
588 p.skip_to(record_size, 'space at the end of the record')
589
590 cookie = compat_cookiejar_Cookie(
591 version=0, name=name, value=value, port=None, port_specified=False,
592 domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'),
593 path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False,
594 comment=None, comment_url=None, rest={})
595 jar.set_cookie(cookie)
596 return record_size
597
598
599def parse_safari_cookies(data, jar=None, logger=YDLLogger()):
600 """
601 References:
602 - https://github.com/libyal/dtformats/blob/main/documentation/Safari%20Cookies.asciidoc
603 - this data appears to be out of date but the important parts of the database structure is the same
604 - there are a few bytes here and there which are skipped during parsing
605 """
606 if jar is None:
607 jar = YoutubeDLCookieJar()
608 page_sizes, body_start = _parse_safari_cookies_header(data, logger)
609 p = DataParser(data[body_start:], logger)
610 for page_size in page_sizes:
611 _parse_safari_cookies_page(p.read_bytes(page_size), jar, logger)
612 p.skip_to_end('footer')
613 return jar
614
615
f59f5ef8
MB
616class _LinuxDesktopEnvironment(Enum):
617 """
618 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.h
619 DesktopEnvironment
620 """
621 OTHER = auto()
622 CINNAMON = auto()
623 GNOME = auto()
624 KDE = auto()
625 PANTHEON = auto()
626 UNITY = auto()
627 XFCE = auto()
982ee69a
MB
628
629
f59f5ef8
MB
630class _LinuxKeyring(Enum):
631 """
632 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.h
633 SelectedLinuxBackend
634 """
635 KWALLET = auto()
636 GNOMEKEYRING = auto()
637 BASICTEXT = auto()
638
639
640SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys()
641
642
643def _get_linux_desktop_environment(env):
644 """
645 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc
646 GetDesktopEnvironment
647 """
648 xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
649 desktop_session = env.get('DESKTOP_SESSION', None)
650 if xdg_current_desktop is not None:
651 xdg_current_desktop = xdg_current_desktop.split(':')[0].strip()
652
653 if xdg_current_desktop == 'Unity':
654 if desktop_session is not None and 'gnome-fallback' in desktop_session:
655 return _LinuxDesktopEnvironment.GNOME
656 else:
657 return _LinuxDesktopEnvironment.UNITY
658 elif xdg_current_desktop == 'GNOME':
659 return _LinuxDesktopEnvironment.GNOME
660 elif xdg_current_desktop == 'X-Cinnamon':
661 return _LinuxDesktopEnvironment.CINNAMON
662 elif xdg_current_desktop == 'KDE':
663 return _LinuxDesktopEnvironment.KDE
664 elif xdg_current_desktop == 'Pantheon':
665 return _LinuxDesktopEnvironment.PANTHEON
666 elif xdg_current_desktop == 'XFCE':
667 return _LinuxDesktopEnvironment.XFCE
668 elif desktop_session is not None:
669 if desktop_session in ('mate', 'gnome'):
670 return _LinuxDesktopEnvironment.GNOME
671 elif 'kde' in desktop_session:
672 return _LinuxDesktopEnvironment.KDE
673 elif 'xfce' in desktop_session:
674 return _LinuxDesktopEnvironment.XFCE
675 else:
676 if 'GNOME_DESKTOP_SESSION_ID' in env:
677 return _LinuxDesktopEnvironment.GNOME
678 elif 'KDE_FULL_SESSION' in env:
679 return _LinuxDesktopEnvironment.KDE
fa8fd951 680 return _LinuxDesktopEnvironment.OTHER
f59f5ef8
MB
681
682
683def _choose_linux_keyring(logger):
684 """
685 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.cc
686 SelectBackend
687 """
688 desktop_environment = _get_linux_desktop_environment(os.environ)
689 logger.debug('detected desktop environment: {}'.format(desktop_environment.name))
690 if desktop_environment == _LinuxDesktopEnvironment.KDE:
691 linux_keyring = _LinuxKeyring.KWALLET
692 elif desktop_environment == _LinuxDesktopEnvironment.OTHER:
693 linux_keyring = _LinuxKeyring.BASICTEXT
982ee69a 694 else:
f59f5ef8
MB
695 linux_keyring = _LinuxKeyring.GNOMEKEYRING
696 return linux_keyring
697
698
699def _get_kwallet_network_wallet(logger):
700 """ The name of the wallet used to store network passwords.
701
702 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/kwallet_dbus.cc
703 KWalletDBus::NetworkWallet
704 which does a dbus call to the following function:
705 https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html
706 Wallet::NetworkWallet
707 """
708 default_wallet = 'kdewallet'
709 try:
710 proc = Popen([
711 'dbus-send', '--session', '--print-reply=literal',
712 '--dest=org.kde.kwalletd5',
713 '/modules/kwalletd5',
714 'org.kde.KWallet.networkWallet'
715 ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
716
717 stdout, stderr = proc.communicate_or_kill()
718 if proc.returncode != 0:
719 logger.warning('failed to read NetworkWallet')
720 return default_wallet
721 else:
722 network_wallet = stdout.decode('utf-8').strip()
723 logger.debug('NetworkWallet = "{}"'.format(network_wallet))
724 return network_wallet
a44ca5a4 725 except Exception as e:
f59f5ef8
MB
726 logger.warning('exception while obtaining NetworkWallet: {}'.format(e))
727 return default_wallet
728
729
730def _get_kwallet_password(browser_keyring_name, logger):
731 logger.debug('using kwallet-query to obtain password from kwallet')
732
733 if shutil.which('kwallet-query') is None:
734 logger.error('kwallet-query command not found. KWallet and kwallet-query '
735 'must be installed to read from KWallet. kwallet-query should be'
736 'included in the kwallet package for your distribution')
737 return b''
738
739 network_wallet = _get_kwallet_network_wallet(logger)
740
741 try:
742 proc = Popen([
743 'kwallet-query',
744 '--read-password', '{} Safe Storage'.format(browser_keyring_name),
745 '--folder', '{} Keys'.format(browser_keyring_name),
746 network_wallet
747 ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
748
749 stdout, stderr = proc.communicate_or_kill()
750 if proc.returncode != 0:
751 logger.error('kwallet-query failed with return code {}. Please consult '
752 'the kwallet-query man page for details'.format(proc.returncode))
753 return b''
754 else:
755 if stdout.lower().startswith(b'failed to read'):
756 logger.debug('failed to read password from kwallet. Using empty string instead')
757 # this sometimes occurs in KDE because chrome does not check hasEntry and instead
758 # just tries to read the value (which kwallet returns "") whereas kwallet-query
759 # checks hasEntry. To verify this:
760 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
761 # while starting chrome.
762 # this may be a bug as the intended behaviour is to generate a random password and store
763 # it, but that doesn't matter here.
764 return b''
765 else:
766 logger.debug('password found')
767 if stdout[-1:] == b'\n':
768 stdout = stdout[:-1]
769 return stdout
a44ca5a4 770 except Exception as e:
771 logger.warning(f'exception running kwallet-query: {error_to_str(e)}')
f59f5ef8
MB
772 return b''
773
774
775def _get_gnome_keyring_password(browser_keyring_name, logger):
776 if not SECRETSTORAGE_AVAILABLE:
777 logger.error('secretstorage not available {}'.format(SECRETSTORAGE_UNAVAILABLE_REASON))
778 return b''
779 # the Gnome keyring does not seem to organise keys in the same way as KWallet,
780 # using `dbus-monitor` during startup, it can be observed that chromium lists all keys
781 # and presumably searches for its key in the list. It appears that we must do the same.
782 # https://github.com/jaraco/keyring/issues/556
783 with contextlib.closing(secretstorage.dbus_init()) as con:
784 col = secretstorage.get_default_collection(con)
785 for item in col.get_all_items():
786 if item.get_label() == '{} Safe Storage'.format(browser_keyring_name):
787 return item.get_secret()
788 else:
789 logger.error('failed to read from keyring')
790 return b''
791
792
793def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
794 # note: chrome/chromium can be run with the following flags to determine which keyring backend
795 # it has chosen to use
796 # chromium --enable-logging=stderr --v=1 2>&1 | grep key_storage_
797 # Chromium supports a flag: --password-store=<basic|gnome|kwallet> so the automatic detection
798 # will not be sufficient in all cases.
799
2c539d49 800 keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger)
f59f5ef8
MB
801 logger.debug(f'Chosen keyring: {keyring.name}')
802
803 if keyring == _LinuxKeyring.KWALLET:
804 return _get_kwallet_password(browser_keyring_name, logger)
805 elif keyring == _LinuxKeyring.GNOMEKEYRING:
806 return _get_gnome_keyring_password(browser_keyring_name, logger)
807 elif keyring == _LinuxKeyring.BASICTEXT:
808 # when basic text is chosen, all cookies are stored as v10 (so no keyring password is required)
809 return None
810 assert False, f'Unknown keyring {keyring}'
811
812
813def _get_mac_keyring_password(browser_keyring_name, logger):
814 logger.debug('using find-generic-password to obtain password from OSX keychain')
815 try:
d3c93ec2 816 proc = Popen(
817 ['security', 'find-generic-password',
818 '-w', # write password to stdout
819 '-a', browser_keyring_name, # match 'account'
820 '-s', '{} Safe Storage'.format(browser_keyring_name)], # match 'service'
821 stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
f59f5ef8
MB
822
823 stdout, stderr = proc.communicate_or_kill()
824 if stdout[-1:] == b'\n':
825 stdout = stdout[:-1]
826 return stdout
a44ca5a4 827 except Exception as e:
828 logger.warning(f'exception running find-generic-password: {error_to_str(e)}')
f59f5ef8 829 return None
982ee69a
MB
830
831
832def _get_windows_v10_key(browser_root, logger):
833 path = _find_most_recently_used_file(browser_root, 'Local State')
834 if path is None:
835 logger.error('could not find local state file')
836 return None
ad0090d0 837 with open(path, 'r', encoding='utf8') as f:
982ee69a
MB
838 data = json.load(f)
839 try:
840 base64_key = data['os_crypt']['encrypted_key']
841 except KeyError:
842 logger.error('no encrypted key in Local State')
843 return None
844 encrypted_key = compat_b64decode(base64_key)
845 prefix = b'DPAPI'
846 if not encrypted_key.startswith(prefix):
847 logger.error('invalid key')
848 return None
849 return _decrypt_windows_dpapi(encrypted_key[len(prefix):], logger)
850
851
852def pbkdf2_sha1(password, salt, iterations, key_length):
853 return pbkdf2_hmac('sha1', password, salt, iterations, key_length)
854
855
856def _decrypt_aes_cbc(ciphertext, key, logger, initialization_vector=b' ' * 16):
1d3586d0 857 plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
982ee69a 858 try:
1d3586d0 859 return plaintext.decode('utf-8')
982ee69a 860 except UnicodeDecodeError:
f440b14f 861 logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
862 return None
863
864
865def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
982ee69a 866 try:
09906f55 867 plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce)
982ee69a 868 except ValueError:
f440b14f 869 logger.warning('failed to decrypt cookie (AES-GCM) because the MAC check failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
870 return None
871
872 try:
873 return plaintext.decode('utf-8')
874 except UnicodeDecodeError:
f440b14f 875 logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
876 return None
877
878
879def _decrypt_windows_dpapi(ciphertext, logger):
880 """
881 References:
882 - https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata
883 """
884 from ctypes.wintypes import DWORD
885
886 class DATA_BLOB(ctypes.Structure):
887 _fields_ = [('cbData', DWORD),
888 ('pbData', ctypes.POINTER(ctypes.c_char))]
889
890 buffer = ctypes.create_string_buffer(ciphertext)
891 blob_in = DATA_BLOB(ctypes.sizeof(buffer), buffer)
892 blob_out = DATA_BLOB()
893 ret = ctypes.windll.crypt32.CryptUnprotectData(
894 ctypes.byref(blob_in), # pDataIn
895 None, # ppszDataDescr: human readable description of pDataIn
896 None, # pOptionalEntropy: salt?
897 None, # pvReserved: must be NULL
898 None, # pPromptStruct: information about prompts to display
899 0, # dwFlags
900 ctypes.byref(blob_out) # pDataOut
901 )
902 if not ret:
f9be9cb9 903 logger.warning('failed to decrypt with DPAPI', only_once=True)
982ee69a
MB
904 return None
905
906 result = ctypes.string_at(blob_out.pbData, blob_out.cbData)
907 ctypes.windll.kernel32.LocalFree(blob_out.pbData)
908 return result
909
910
911def _config_home():
912 return os.environ.get('XDG_CONFIG_HOME', os.path.expanduser('~/.config'))
913
914
915def _open_database_copy(database_path, tmpdir):
916 # cannot open sqlite databases if they are already in use (e.g. by the browser)
917 database_copy_path = os.path.join(tmpdir, 'temporary.sqlite')
918 shutil.copy(database_path, database_copy_path)
919 conn = sqlite3.connect(database_copy_path)
920 return conn.cursor()
921
922
923def _get_column_names(cursor, table_name):
924 table_info = cursor.execute('PRAGMA table_info({})'.format(table_name)).fetchall()
925 return [row[1].decode('utf-8') for row in table_info]
926
927
928def _find_most_recently_used_file(root, filename):
929 # if there are multiple browser profiles, take the most recently used one
930 paths = []
931 for root, dirs, files in os.walk(root):
932 for file in files:
933 if file == filename:
934 paths.append(os.path.join(root, file))
935 return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime)
936
937
938def _merge_cookie_jars(jars):
939 output_jar = YoutubeDLCookieJar()
940 for jar in jars:
941 for cookie in jar:
942 output_jar.set_cookie(cookie)
943 if jar.filename is not None:
944 output_jar.filename = jar.filename
945 return output_jar
946
947
948def _is_path(value):
949 return os.path.sep in value
950
951
f59f5ef8 952def _parse_browser_specification(browser_name, profile=None, keyring=None):
982ee69a
MB
953 if browser_name not in SUPPORTED_BROWSERS:
954 raise ValueError(f'unsupported browser: "{browser_name}"')
f59f5ef8
MB
955 if keyring not in (None, *SUPPORTED_KEYRINGS):
956 raise ValueError(f'unsupported keyring: "{keyring}"')
982ee69a
MB
957 if profile is not None and _is_path(profile):
958 profile = os.path.expanduser(profile)
f59f5ef8 959 return browser_name, profile, keyring