]> jfr.im git - yt-dlp.git/blame - yt_dlp/cookies.py
[outtmpl] Handle hard-coded file extension better
[yt-dlp.git] / yt_dlp / cookies.py
CommitLineData
f59f5ef8 1import contextlib
982ee69a
MB
2import ctypes
3import json
4import os
5import shutil
982ee69a
MB
6import struct
7import subprocess
8import sys
9import tempfile
10from datetime import datetime, timedelta, timezone
f59f5ef8 11from enum import Enum, auto
982ee69a
MB
12from hashlib import pbkdf2_hmac
13
1d3586d0 14from .aes import (
15 aes_cbc_decrypt_bytes,
16 aes_gcm_decrypt_and_verify_bytes,
17 unpad_pkcs7,
18)
09906f55 19from .compat import (
982ee69a
MB
20 compat_b64decode,
21 compat_cookiejar_Cookie,
22)
09906f55 23from .utils import (
982ee69a 24 expand_path,
d3c93ec2 25 Popen,
982ee69a
MB
26 YoutubeDLCookieJar,
27)
28
767b02a9
MB
29try:
30 import sqlite3
31 SQLITE_AVAILABLE = True
32except ImportError:
33 # although sqlite3 is part of the standard library, it is possible to compile python without
34 # sqlite support. See: https://github.com/yt-dlp/yt-dlp/issues/544
35 SQLITE_AVAILABLE = False
36
37
982ee69a 38try:
f59f5ef8
MB
39 import secretstorage
40 SECRETSTORAGE_AVAILABLE = True
982ee69a 41except ImportError:
f59f5ef8
MB
42 SECRETSTORAGE_AVAILABLE = False
43 SECRETSTORAGE_UNAVAILABLE_REASON = (
44 'as the `secretstorage` module is not installed. '
45 'Please install by running `python3 -m pip install secretstorage`.')
063c409d 46except Exception as _err:
f59f5ef8
MB
47 SECRETSTORAGE_AVAILABLE = False
48 SECRETSTORAGE_UNAVAILABLE_REASON = f'as the `secretstorage` module could not be initialized. {_err}'
982ee69a
MB
49
50
51CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
52SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
53
54
55class YDLLogger:
56 def __init__(self, ydl=None):
57 self._ydl = ydl
58
59 def debug(self, message):
60 if self._ydl:
61 self._ydl.write_debug(message)
62
63 def info(self, message):
64 if self._ydl:
65 self._ydl.to_screen(f'[Cookies] {message}')
66
67 def warning(self, message, only_once=False):
68 if self._ydl:
69 self._ydl.report_warning(message, only_once)
70
71 def error(self, message):
72 if self._ydl:
73 self._ydl.report_error(message)
74
75
76def load_cookies(cookie_file, browser_specification, ydl):
77 cookie_jars = []
78 if browser_specification is not None:
f59f5ef8
MB
79 browser_name, profile, keyring = _parse_browser_specification(*browser_specification)
80 cookie_jars.append(extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring))
982ee69a
MB
81
82 if cookie_file is not None:
83 cookie_file = expand_path(cookie_file)
84 jar = YoutubeDLCookieJar(cookie_file)
85 if os.access(cookie_file, os.R_OK):
86 jar.load(ignore_discard=True, ignore_expires=True)
87 cookie_jars.append(jar)
88
89 return _merge_cookie_jars(cookie_jars)
90
91
f59f5ef8 92def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None):
982ee69a
MB
93 if browser_name == 'firefox':
94 return _extract_firefox_cookies(profile, logger)
95 elif browser_name == 'safari':
96 return _extract_safari_cookies(profile, logger)
97 elif browser_name in CHROMIUM_BASED_BROWSERS:
f59f5ef8 98 return _extract_chrome_cookies(browser_name, profile, keyring, logger)
982ee69a
MB
99 else:
100 raise ValueError('unknown browser: {}'.format(browser_name))
101
102
103def _extract_firefox_cookies(profile, logger):
104 logger.info('Extracting cookies from firefox')
767b02a9
MB
105 if not SQLITE_AVAILABLE:
106 logger.warning('Cannot extract cookies from firefox without sqlite3 support. '
107 'Please use a python interpreter compiled with sqlite3 support')
108 return YoutubeDLCookieJar()
982ee69a
MB
109
110 if profile is None:
111 search_root = _firefox_browser_dir()
112 elif _is_path(profile):
113 search_root = profile
114 else:
115 search_root = os.path.join(_firefox_browser_dir(), profile)
116
117 cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite')
118 if cookie_database_path is None:
119 raise FileNotFoundError('could not find firefox cookies database in {}'.format(search_root))
526d74ec 120 logger.debug('Extracting cookies from: "{}"'.format(cookie_database_path))
982ee69a 121
0930b11f 122 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
982ee69a
MB
123 cursor = None
124 try:
125 cursor = _open_database_copy(cookie_database_path, tmpdir)
126 cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
127 jar = YoutubeDLCookieJar()
128 for host, name, value, path, expiry, is_secure in cursor.fetchall():
129 cookie = compat_cookiejar_Cookie(
130 version=0, name=name, value=value, port=None, port_specified=False,
131 domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'),
132 path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False,
133 comment=None, comment_url=None, rest={})
134 jar.set_cookie(cookie)
135 logger.info('Extracted {} cookies from firefox'.format(len(jar)))
136 return jar
137 finally:
138 if cursor is not None:
139 cursor.connection.close()
140
141
142def _firefox_browser_dir():
143 if sys.platform in ('linux', 'linux2'):
144 return os.path.expanduser('~/.mozilla/firefox')
145 elif sys.platform == 'win32':
146 return os.path.expandvars(r'%APPDATA%\Mozilla\Firefox\Profiles')
147 elif sys.platform == 'darwin':
148 return os.path.expanduser('~/Library/Application Support/Firefox')
149 else:
150 raise ValueError('unsupported platform: {}'.format(sys.platform))
151
152
153def _get_chromium_based_browser_settings(browser_name):
154 # https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md
155 if sys.platform in ('linux', 'linux2'):
156 config = _config_home()
157 browser_dir = {
158 'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'),
159 'chrome': os.path.join(config, 'google-chrome'),
160 'chromium': os.path.join(config, 'chromium'),
161 'edge': os.path.join(config, 'microsoft-edge'),
162 'opera': os.path.join(config, 'opera'),
163 'vivaldi': os.path.join(config, 'vivaldi'),
164 }[browser_name]
165
166 elif sys.platform == 'win32':
167 appdata_local = os.path.expandvars('%LOCALAPPDATA%')
168 appdata_roaming = os.path.expandvars('%APPDATA%')
169 browser_dir = {
170 'brave': os.path.join(appdata_local, r'BraveSoftware\Brave-Browser\User Data'),
171 'chrome': os.path.join(appdata_local, r'Google\Chrome\User Data'),
172 'chromium': os.path.join(appdata_local, r'Chromium\User Data'),
173 'edge': os.path.join(appdata_local, r'Microsoft\Edge\User Data'),
174 'opera': os.path.join(appdata_roaming, r'Opera Software\Opera Stable'),
175 'vivaldi': os.path.join(appdata_local, r'Vivaldi\User Data'),
176 }[browser_name]
177
178 elif sys.platform == 'darwin':
179 appdata = os.path.expanduser('~/Library/Application Support')
180 browser_dir = {
181 'brave': os.path.join(appdata, 'BraveSoftware/Brave-Browser'),
182 'chrome': os.path.join(appdata, 'Google/Chrome'),
183 'chromium': os.path.join(appdata, 'Chromium'),
184 'edge': os.path.join(appdata, 'Microsoft Edge'),
185 'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
186 'vivaldi': os.path.join(appdata, 'Vivaldi'),
187 }[browser_name]
188
189 else:
190 raise ValueError('unsupported platform: {}'.format(sys.platform))
191
192 # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
193 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
194 keyring_name = {
195 'brave': 'Brave',
196 'chrome': 'Chrome',
197 'chromium': 'Chromium',
29b208f6 198 'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
982ee69a
MB
199 'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
200 'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
201 }[browser_name]
202
203 browsers_without_profiles = {'opera'}
204
205 return {
206 'browser_dir': browser_dir,
207 'keyring_name': keyring_name,
208 'supports_profiles': browser_name not in browsers_without_profiles
209 }
210
211
f59f5ef8 212def _extract_chrome_cookies(browser_name, profile, keyring, logger):
982ee69a 213 logger.info('Extracting cookies from {}'.format(browser_name))
767b02a9
MB
214
215 if not SQLITE_AVAILABLE:
216 logger.warning(('Cannot extract cookies from {} without sqlite3 support. '
217 'Please use a python interpreter compiled with sqlite3 support').format(browser_name))
218 return YoutubeDLCookieJar()
219
982ee69a
MB
220 config = _get_chromium_based_browser_settings(browser_name)
221
222 if profile is None:
223 search_root = config['browser_dir']
224 elif _is_path(profile):
225 search_root = profile
226 config['browser_dir'] = os.path.dirname(profile) if config['supports_profiles'] else profile
227 else:
228 if config['supports_profiles']:
229 search_root = os.path.join(config['browser_dir'], profile)
230 else:
231 logger.error('{} does not support profiles'.format(browser_name))
232 search_root = config['browser_dir']
233
234 cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies')
235 if cookie_database_path is None:
236 raise FileNotFoundError('could not find {} cookies database in "{}"'.format(browser_name, search_root))
526d74ec 237 logger.debug('Extracting cookies from: "{}"'.format(cookie_database_path))
982ee69a 238
f59f5ef8 239 decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring)
982ee69a 240
0930b11f 241 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
982ee69a
MB
242 cursor = None
243 try:
244 cursor = _open_database_copy(cookie_database_path, tmpdir)
245 cursor.connection.text_factory = bytes
246 column_names = _get_column_names(cursor, 'cookies')
247 secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
248 cursor.execute('SELECT host_key, name, value, encrypted_value, path, '
249 'expires_utc, {} FROM cookies'.format(secure_column))
250 jar = YoutubeDLCookieJar()
251 failed_cookies = 0
f59f5ef8 252 unencrypted_cookies = 0
982ee69a
MB
253 for host_key, name, value, encrypted_value, path, expires_utc, is_secure in cursor.fetchall():
254 host_key = host_key.decode('utf-8')
255 name = name.decode('utf-8')
256 value = value.decode('utf-8')
257 path = path.decode('utf-8')
258
259 if not value and encrypted_value:
260 value = decryptor.decrypt(encrypted_value)
261 if value is None:
262 failed_cookies += 1
263 continue
f59f5ef8
MB
264 else:
265 unencrypted_cookies += 1
982ee69a
MB
266
267 cookie = compat_cookiejar_Cookie(
268 version=0, name=name, value=value, port=None, port_specified=False,
269 domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
270 path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False,
271 comment=None, comment_url=None, rest={})
272 jar.set_cookie(cookie)
273 if failed_cookies > 0:
274 failed_message = ' ({} could not be decrypted)'.format(failed_cookies)
275 else:
276 failed_message = ''
277 logger.info('Extracted {} cookies from {}{}'.format(len(jar), browser_name, failed_message))
f59f5ef8
MB
278 counts = decryptor.cookie_counts.copy()
279 counts['unencrypted'] = unencrypted_cookies
280 logger.debug('cookie version breakdown: {}'.format(counts))
982ee69a
MB
281 return jar
282 finally:
283 if cursor is not None:
284 cursor.connection.close()
285
286
287class ChromeCookieDecryptor:
288 """
289 Overview:
290
291 Linux:
292 - cookies are either v10 or v11
293 - v10: AES-CBC encrypted with a fixed key
294 - v11: AES-CBC encrypted with an OS protected key (keyring)
295 - v11 keys can be stored in various places depending on the activate desktop environment [2]
296
297 Mac:
298 - cookies are either v10 or not v10
299 - v10: AES-CBC encrypted with an OS protected key (keyring) and more key derivation iterations than linux
300 - not v10: 'old data' stored as plaintext
301
302 Windows:
303 - cookies are either v10 or not v10
304 - v10: AES-GCM encrypted with a key which is encrypted with DPAPI
305 - not v10: encrypted with DPAPI
306
307 Sources:
308 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/
309 - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_linux.cc
310 - KeyStorageLinux::CreateService
311 """
312
313 def decrypt(self, encrypted_value):
314 raise NotImplementedError
315
f59f5ef8
MB
316 @property
317 def cookie_counts(self):
318 raise NotImplementedError
319
982ee69a 320
f59f5ef8 321def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None):
982ee69a 322 if sys.platform in ('linux', 'linux2'):
f59f5ef8 323 return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring)
982ee69a
MB
324 elif sys.platform == 'darwin':
325 return MacChromeCookieDecryptor(browser_keyring_name, logger)
326 elif sys.platform == 'win32':
327 return WindowsChromeCookieDecryptor(browser_root, logger)
328 else:
329 raise NotImplementedError('Chrome cookie decryption is not supported '
330 'on this platform: {}'.format(sys.platform))
331
332
333class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
f59f5ef8 334 def __init__(self, browser_keyring_name, logger, *, keyring=None):
982ee69a
MB
335 self._logger = logger
336 self._v10_key = self.derive_key(b'peanuts')
f59f5ef8
MB
337 password = _get_linux_keyring_password(browser_keyring_name, keyring, logger)
338 self._v11_key = None if password is None else self.derive_key(password)
339 self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
982ee69a
MB
340
341 @staticmethod
342 def derive_key(password):
343 # values from
344 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_linux.cc
345 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16)
346
f59f5ef8
MB
347 @property
348 def cookie_counts(self):
349 return self._cookie_counts
350
982ee69a
MB
351 def decrypt(self, encrypted_value):
352 version = encrypted_value[:3]
353 ciphertext = encrypted_value[3:]
354
355 if version == b'v10':
f59f5ef8 356 self._cookie_counts['v10'] += 1
982ee69a
MB
357 return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
358
359 elif version == b'v11':
f59f5ef8 360 self._cookie_counts['v11'] += 1
982ee69a 361 if self._v11_key is None:
f59f5ef8 362 self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
982ee69a
MB
363 return None
364 return _decrypt_aes_cbc(ciphertext, self._v11_key, self._logger)
365
366 else:
f59f5ef8 367 self._cookie_counts['other'] += 1
982ee69a
MB
368 return None
369
370
371class MacChromeCookieDecryptor(ChromeCookieDecryptor):
372 def __init__(self, browser_keyring_name, logger):
373 self._logger = logger
f440b14f 374 password = _get_mac_keyring_password(browser_keyring_name, logger)
982ee69a 375 self._v10_key = None if password is None else self.derive_key(password)
f59f5ef8 376 self._cookie_counts = {'v10': 0, 'other': 0}
982ee69a
MB
377
378 @staticmethod
379 def derive_key(password):
380 # values from
381 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
382 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16)
383
f59f5ef8
MB
384 @property
385 def cookie_counts(self):
386 return self._cookie_counts
387
982ee69a
MB
388 def decrypt(self, encrypted_value):
389 version = encrypted_value[:3]
390 ciphertext = encrypted_value[3:]
391
392 if version == b'v10':
f59f5ef8 393 self._cookie_counts['v10'] += 1
982ee69a
MB
394 if self._v10_key is None:
395 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
396 return None
397
398 return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
399
400 else:
f59f5ef8 401 self._cookie_counts['other'] += 1
982ee69a
MB
402 # other prefixes are considered 'old data' which were stored as plaintext
403 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
404 return encrypted_value
405
406
407class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
408 def __init__(self, browser_root, logger):
409 self._logger = logger
410 self._v10_key = _get_windows_v10_key(browser_root, logger)
f59f5ef8
MB
411 self._cookie_counts = {'v10': 0, 'other': 0}
412
413 @property
414 def cookie_counts(self):
415 return self._cookie_counts
982ee69a
MB
416
417 def decrypt(self, encrypted_value):
418 version = encrypted_value[:3]
419 ciphertext = encrypted_value[3:]
420
421 if version == b'v10':
f59f5ef8 422 self._cookie_counts['v10'] += 1
982ee69a
MB
423 if self._v10_key is None:
424 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
425 return None
982ee69a
MB
426
427 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
428 # kNonceLength
429 nonce_length = 96 // 8
430 # boringssl
431 # EVP_AEAD_AES_GCM_TAG_LEN
432 authentication_tag_length = 16
433
434 raw_ciphertext = ciphertext
435 nonce = raw_ciphertext[:nonce_length]
436 ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length]
437 authentication_tag = raw_ciphertext[-authentication_tag_length:]
438
439 return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger)
440
441 else:
f59f5ef8 442 self._cookie_counts['other'] += 1
982ee69a
MB
443 # any other prefix means the data is DPAPI encrypted
444 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
445 return _decrypt_windows_dpapi(encrypted_value, self._logger).decode('utf-8')
446
447
448def _extract_safari_cookies(profile, logger):
449 if profile is not None:
450 logger.error('safari does not support profiles')
451 if sys.platform != 'darwin':
452 raise ValueError('unsupported platform: {}'.format(sys.platform))
453
454 cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')
455
456 if not os.path.isfile(cookies_path):
457 raise FileNotFoundError('could not find safari cookies database')
458
459 with open(cookies_path, 'rb') as f:
460 cookies_data = f.read()
461
462 jar = parse_safari_cookies(cookies_data, logger=logger)
463 logger.info('Extracted {} cookies from safari'.format(len(jar)))
464 return jar
465
466
467class ParserError(Exception):
468 pass
469
470
471class DataParser:
472 def __init__(self, data, logger):
473 self._data = data
474 self.cursor = 0
475 self._logger = logger
476
477 def read_bytes(self, num_bytes):
478 if num_bytes < 0:
479 raise ParserError('invalid read of {} bytes'.format(num_bytes))
480 end = self.cursor + num_bytes
481 if end > len(self._data):
482 raise ParserError('reached end of input')
483 data = self._data[self.cursor:end]
484 self.cursor = end
485 return data
486
487 def expect_bytes(self, expected_value, message):
488 value = self.read_bytes(len(expected_value))
489 if value != expected_value:
490 raise ParserError('unexpected value: {} != {} ({})'.format(value, expected_value, message))
491
492 def read_uint(self, big_endian=False):
493 data_format = '>I' if big_endian else '<I'
494 return struct.unpack(data_format, self.read_bytes(4))[0]
495
496 def read_double(self, big_endian=False):
497 data_format = '>d' if big_endian else '<d'
498 return struct.unpack(data_format, self.read_bytes(8))[0]
499
500 def read_cstring(self):
501 buffer = []
502 while True:
503 c = self.read_bytes(1)
504 if c == b'\x00':
505 return b''.join(buffer).decode('utf-8')
506 else:
507 buffer.append(c)
508
509 def skip(self, num_bytes, description='unknown'):
510 if num_bytes > 0:
511 self._logger.debug('skipping {} bytes ({}): {}'.format(
512 num_bytes, description, self.read_bytes(num_bytes)))
513 elif num_bytes < 0:
514 raise ParserError('invalid skip of {} bytes'.format(num_bytes))
515
516 def skip_to(self, offset, description='unknown'):
517 self.skip(offset - self.cursor, description)
518
519 def skip_to_end(self, description='unknown'):
520 self.skip_to(len(self._data), description)
521
522
523def _mac_absolute_time_to_posix(timestamp):
524 return int((datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc) + timedelta(seconds=timestamp)).timestamp())
525
526
527def _parse_safari_cookies_header(data, logger):
528 p = DataParser(data, logger)
529 p.expect_bytes(b'cook', 'database signature')
530 number_of_pages = p.read_uint(big_endian=True)
531 page_sizes = [p.read_uint(big_endian=True) for _ in range(number_of_pages)]
532 return page_sizes, p.cursor
533
534
535def _parse_safari_cookies_page(data, jar, logger):
536 p = DataParser(data, logger)
537 p.expect_bytes(b'\x00\x00\x01\x00', 'page signature')
538 number_of_cookies = p.read_uint()
539 record_offsets = [p.read_uint() for _ in range(number_of_cookies)]
540 if number_of_cookies == 0:
541 logger.debug('a cookies page of size {} has no cookies'.format(len(data)))
542 return
543
544 p.skip_to(record_offsets[0], 'unknown page header field')
545
546 for record_offset in record_offsets:
547 p.skip_to(record_offset, 'space between records')
548 record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger)
549 p.read_bytes(record_length)
550 p.skip_to_end('space in between pages')
551
552
553def _parse_safari_cookies_record(data, jar, logger):
554 p = DataParser(data, logger)
555 record_size = p.read_uint()
556 p.skip(4, 'unknown record field 1')
557 flags = p.read_uint()
558 is_secure = bool(flags & 0x0001)
559 p.skip(4, 'unknown record field 2')
560 domain_offset = p.read_uint()
561 name_offset = p.read_uint()
562 path_offset = p.read_uint()
563 value_offset = p.read_uint()
564 p.skip(8, 'unknown record field 3')
565 expiration_date = _mac_absolute_time_to_posix(p.read_double())
566 _creation_date = _mac_absolute_time_to_posix(p.read_double()) # noqa: F841
567
568 try:
569 p.skip_to(domain_offset)
570 domain = p.read_cstring()
571
572 p.skip_to(name_offset)
573 name = p.read_cstring()
574
575 p.skip_to(path_offset)
576 path = p.read_cstring()
577
578 p.skip_to(value_offset)
579 value = p.read_cstring()
580 except UnicodeDecodeError:
f440b14f 581 logger.warning('failed to parse Safari cookie because UTF-8 decoding failed', only_once=True)
982ee69a
MB
582 return record_size
583
584 p.skip_to(record_size, 'space at the end of the record')
585
586 cookie = compat_cookiejar_Cookie(
587 version=0, name=name, value=value, port=None, port_specified=False,
588 domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'),
589 path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False,
590 comment=None, comment_url=None, rest={})
591 jar.set_cookie(cookie)
592 return record_size
593
594
595def parse_safari_cookies(data, jar=None, logger=YDLLogger()):
596 """
597 References:
598 - https://github.com/libyal/dtformats/blob/main/documentation/Safari%20Cookies.asciidoc
599 - this data appears to be out of date but the important parts of the database structure is the same
600 - there are a few bytes here and there which are skipped during parsing
601 """
602 if jar is None:
603 jar = YoutubeDLCookieJar()
604 page_sizes, body_start = _parse_safari_cookies_header(data, logger)
605 p = DataParser(data[body_start:], logger)
606 for page_size in page_sizes:
607 _parse_safari_cookies_page(p.read_bytes(page_size), jar, logger)
608 p.skip_to_end('footer')
609 return jar
610
611
f59f5ef8
MB
612class _LinuxDesktopEnvironment(Enum):
613 """
614 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.h
615 DesktopEnvironment
616 """
617 OTHER = auto()
618 CINNAMON = auto()
619 GNOME = auto()
620 KDE = auto()
621 PANTHEON = auto()
622 UNITY = auto()
623 XFCE = auto()
982ee69a
MB
624
625
f59f5ef8
MB
626class _LinuxKeyring(Enum):
627 """
628 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.h
629 SelectedLinuxBackend
630 """
631 KWALLET = auto()
632 GNOMEKEYRING = auto()
633 BASICTEXT = auto()
634
635
636SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys()
637
638
639def _get_linux_desktop_environment(env):
640 """
641 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc
642 GetDesktopEnvironment
643 """
644 xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
645 desktop_session = env.get('DESKTOP_SESSION', None)
646 if xdg_current_desktop is not None:
647 xdg_current_desktop = xdg_current_desktop.split(':')[0].strip()
648
649 if xdg_current_desktop == 'Unity':
650 if desktop_session is not None and 'gnome-fallback' in desktop_session:
651 return _LinuxDesktopEnvironment.GNOME
652 else:
653 return _LinuxDesktopEnvironment.UNITY
654 elif xdg_current_desktop == 'GNOME':
655 return _LinuxDesktopEnvironment.GNOME
656 elif xdg_current_desktop == 'X-Cinnamon':
657 return _LinuxDesktopEnvironment.CINNAMON
658 elif xdg_current_desktop == 'KDE':
659 return _LinuxDesktopEnvironment.KDE
660 elif xdg_current_desktop == 'Pantheon':
661 return _LinuxDesktopEnvironment.PANTHEON
662 elif xdg_current_desktop == 'XFCE':
663 return _LinuxDesktopEnvironment.XFCE
664 elif desktop_session is not None:
665 if desktop_session in ('mate', 'gnome'):
666 return _LinuxDesktopEnvironment.GNOME
667 elif 'kde' in desktop_session:
668 return _LinuxDesktopEnvironment.KDE
669 elif 'xfce' in desktop_session:
670 return _LinuxDesktopEnvironment.XFCE
671 else:
672 if 'GNOME_DESKTOP_SESSION_ID' in env:
673 return _LinuxDesktopEnvironment.GNOME
674 elif 'KDE_FULL_SESSION' in env:
675 return _LinuxDesktopEnvironment.KDE
fa8fd951 676 return _LinuxDesktopEnvironment.OTHER
f59f5ef8
MB
677
678
679def _choose_linux_keyring(logger):
680 """
681 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.cc
682 SelectBackend
683 """
684 desktop_environment = _get_linux_desktop_environment(os.environ)
685 logger.debug('detected desktop environment: {}'.format(desktop_environment.name))
686 if desktop_environment == _LinuxDesktopEnvironment.KDE:
687 linux_keyring = _LinuxKeyring.KWALLET
688 elif desktop_environment == _LinuxDesktopEnvironment.OTHER:
689 linux_keyring = _LinuxKeyring.BASICTEXT
982ee69a 690 else:
f59f5ef8
MB
691 linux_keyring = _LinuxKeyring.GNOMEKEYRING
692 return linux_keyring
693
694
695def _get_kwallet_network_wallet(logger):
696 """ The name of the wallet used to store network passwords.
697
698 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/kwallet_dbus.cc
699 KWalletDBus::NetworkWallet
700 which does a dbus call to the following function:
701 https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html
702 Wallet::NetworkWallet
703 """
704 default_wallet = 'kdewallet'
705 try:
706 proc = Popen([
707 'dbus-send', '--session', '--print-reply=literal',
708 '--dest=org.kde.kwalletd5',
709 '/modules/kwalletd5',
710 'org.kde.KWallet.networkWallet'
711 ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
712
713 stdout, stderr = proc.communicate_or_kill()
714 if proc.returncode != 0:
715 logger.warning('failed to read NetworkWallet')
716 return default_wallet
717 else:
718 network_wallet = stdout.decode('utf-8').strip()
719 logger.debug('NetworkWallet = "{}"'.format(network_wallet))
720 return network_wallet
721 except BaseException as e:
722 logger.warning('exception while obtaining NetworkWallet: {}'.format(e))
723 return default_wallet
724
725
726def _get_kwallet_password(browser_keyring_name, logger):
727 logger.debug('using kwallet-query to obtain password from kwallet')
728
729 if shutil.which('kwallet-query') is None:
730 logger.error('kwallet-query command not found. KWallet and kwallet-query '
731 'must be installed to read from KWallet. kwallet-query should be'
732 'included in the kwallet package for your distribution')
733 return b''
734
735 network_wallet = _get_kwallet_network_wallet(logger)
736
737 try:
738 proc = Popen([
739 'kwallet-query',
740 '--read-password', '{} Safe Storage'.format(browser_keyring_name),
741 '--folder', '{} Keys'.format(browser_keyring_name),
742 network_wallet
743 ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
744
745 stdout, stderr = proc.communicate_or_kill()
746 if proc.returncode != 0:
747 logger.error('kwallet-query failed with return code {}. Please consult '
748 'the kwallet-query man page for details'.format(proc.returncode))
749 return b''
750 else:
751 if stdout.lower().startswith(b'failed to read'):
752 logger.debug('failed to read password from kwallet. Using empty string instead')
753 # this sometimes occurs in KDE because chrome does not check hasEntry and instead
754 # just tries to read the value (which kwallet returns "") whereas kwallet-query
755 # checks hasEntry. To verify this:
756 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
757 # while starting chrome.
758 # this may be a bug as the intended behaviour is to generate a random password and store
759 # it, but that doesn't matter here.
760 return b''
761 else:
762 logger.debug('password found')
763 if stdout[-1:] == b'\n':
764 stdout = stdout[:-1]
765 return stdout
766 except BaseException as e:
767 logger.warning(f'exception running kwallet-query: {type(e).__name__}({e})')
768 return b''
769
770
771def _get_gnome_keyring_password(browser_keyring_name, logger):
772 if not SECRETSTORAGE_AVAILABLE:
773 logger.error('secretstorage not available {}'.format(SECRETSTORAGE_UNAVAILABLE_REASON))
774 return b''
775 # the Gnome keyring does not seem to organise keys in the same way as KWallet,
776 # using `dbus-monitor` during startup, it can be observed that chromium lists all keys
777 # and presumably searches for its key in the list. It appears that we must do the same.
778 # https://github.com/jaraco/keyring/issues/556
779 with contextlib.closing(secretstorage.dbus_init()) as con:
780 col = secretstorage.get_default_collection(con)
781 for item in col.get_all_items():
782 if item.get_label() == '{} Safe Storage'.format(browser_keyring_name):
783 return item.get_secret()
784 else:
785 logger.error('failed to read from keyring')
786 return b''
787
788
789def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
790 # note: chrome/chromium can be run with the following flags to determine which keyring backend
791 # it has chosen to use
792 # chromium --enable-logging=stderr --v=1 2>&1 | grep key_storage_
793 # Chromium supports a flag: --password-store=<basic|gnome|kwallet> so the automatic detection
794 # will not be sufficient in all cases.
795
2c539d49 796 keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger)
f59f5ef8
MB
797 logger.debug(f'Chosen keyring: {keyring.name}')
798
799 if keyring == _LinuxKeyring.KWALLET:
800 return _get_kwallet_password(browser_keyring_name, logger)
801 elif keyring == _LinuxKeyring.GNOMEKEYRING:
802 return _get_gnome_keyring_password(browser_keyring_name, logger)
803 elif keyring == _LinuxKeyring.BASICTEXT:
804 # when basic text is chosen, all cookies are stored as v10 (so no keyring password is required)
805 return None
806 assert False, f'Unknown keyring {keyring}'
807
808
809def _get_mac_keyring_password(browser_keyring_name, logger):
810 logger.debug('using find-generic-password to obtain password from OSX keychain')
811 try:
d3c93ec2 812 proc = Popen(
813 ['security', 'find-generic-password',
814 '-w', # write password to stdout
815 '-a', browser_keyring_name, # match 'account'
816 '-s', '{} Safe Storage'.format(browser_keyring_name)], # match 'service'
817 stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
f59f5ef8
MB
818
819 stdout, stderr = proc.communicate_or_kill()
820 if stdout[-1:] == b'\n':
821 stdout = stdout[:-1]
822 return stdout
823 except BaseException as e:
824 logger.warning(f'exception running find-generic-password: {type(e).__name__}({e})')
825 return None
982ee69a
MB
826
827
828def _get_windows_v10_key(browser_root, logger):
829 path = _find_most_recently_used_file(browser_root, 'Local State')
830 if path is None:
831 logger.error('could not find local state file')
832 return None
ad0090d0 833 with open(path, 'r', encoding='utf8') as f:
982ee69a
MB
834 data = json.load(f)
835 try:
836 base64_key = data['os_crypt']['encrypted_key']
837 except KeyError:
838 logger.error('no encrypted key in Local State')
839 return None
840 encrypted_key = compat_b64decode(base64_key)
841 prefix = b'DPAPI'
842 if not encrypted_key.startswith(prefix):
843 logger.error('invalid key')
844 return None
845 return _decrypt_windows_dpapi(encrypted_key[len(prefix):], logger)
846
847
848def pbkdf2_sha1(password, salt, iterations, key_length):
849 return pbkdf2_hmac('sha1', password, salt, iterations, key_length)
850
851
852def _decrypt_aes_cbc(ciphertext, key, logger, initialization_vector=b' ' * 16):
1d3586d0 853 plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
982ee69a 854 try:
1d3586d0 855 return plaintext.decode('utf-8')
982ee69a 856 except UnicodeDecodeError:
f440b14f 857 logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
858 return None
859
860
861def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
982ee69a 862 try:
09906f55 863 plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce)
982ee69a 864 except ValueError:
f440b14f 865 logger.warning('failed to decrypt cookie (AES-GCM) because the MAC check failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
866 return None
867
868 try:
869 return plaintext.decode('utf-8')
870 except UnicodeDecodeError:
f440b14f 871 logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
872 return None
873
874
875def _decrypt_windows_dpapi(ciphertext, logger):
876 """
877 References:
878 - https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata
879 """
880 from ctypes.wintypes import DWORD
881
882 class DATA_BLOB(ctypes.Structure):
883 _fields_ = [('cbData', DWORD),
884 ('pbData', ctypes.POINTER(ctypes.c_char))]
885
886 buffer = ctypes.create_string_buffer(ciphertext)
887 blob_in = DATA_BLOB(ctypes.sizeof(buffer), buffer)
888 blob_out = DATA_BLOB()
889 ret = ctypes.windll.crypt32.CryptUnprotectData(
890 ctypes.byref(blob_in), # pDataIn
891 None, # ppszDataDescr: human readable description of pDataIn
892 None, # pOptionalEntropy: salt?
893 None, # pvReserved: must be NULL
894 None, # pPromptStruct: information about prompts to display
895 0, # dwFlags
896 ctypes.byref(blob_out) # pDataOut
897 )
898 if not ret:
f9be9cb9 899 logger.warning('failed to decrypt with DPAPI', only_once=True)
982ee69a
MB
900 return None
901
902 result = ctypes.string_at(blob_out.pbData, blob_out.cbData)
903 ctypes.windll.kernel32.LocalFree(blob_out.pbData)
904 return result
905
906
907def _config_home():
908 return os.environ.get('XDG_CONFIG_HOME', os.path.expanduser('~/.config'))
909
910
911def _open_database_copy(database_path, tmpdir):
912 # cannot open sqlite databases if they are already in use (e.g. by the browser)
913 database_copy_path = os.path.join(tmpdir, 'temporary.sqlite')
914 shutil.copy(database_path, database_copy_path)
915 conn = sqlite3.connect(database_copy_path)
916 return conn.cursor()
917
918
919def _get_column_names(cursor, table_name):
920 table_info = cursor.execute('PRAGMA table_info({})'.format(table_name)).fetchall()
921 return [row[1].decode('utf-8') for row in table_info]
922
923
924def _find_most_recently_used_file(root, filename):
925 # if there are multiple browser profiles, take the most recently used one
926 paths = []
927 for root, dirs, files in os.walk(root):
928 for file in files:
929 if file == filename:
930 paths.append(os.path.join(root, file))
931 return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime)
932
933
934def _merge_cookie_jars(jars):
935 output_jar = YoutubeDLCookieJar()
936 for jar in jars:
937 for cookie in jar:
938 output_jar.set_cookie(cookie)
939 if jar.filename is not None:
940 output_jar.filename = jar.filename
941 return output_jar
942
943
944def _is_path(value):
945 return os.path.sep in value
946
947
f59f5ef8 948def _parse_browser_specification(browser_name, profile=None, keyring=None):
982ee69a
MB
949 if browser_name not in SUPPORTED_BROWSERS:
950 raise ValueError(f'unsupported browser: "{browser_name}"')
f59f5ef8
MB
951 if keyring not in (None, *SUPPORTED_KEYRINGS):
952 raise ValueError(f'unsupported keyring: "{keyring}"')
982ee69a
MB
953 if profile is not None and _is_path(profile):
954 profile = os.path.expanduser(profile)
f59f5ef8 955 return browser_name, profile, keyring