]> jfr.im git - yt-dlp.git/blame - yt_dlp/cookies.py
[ruutu] Detect embeds (#3294)
[yt-dlp.git] / yt_dlp / cookies.py
CommitLineData
f59f5ef8 1import contextlib
982ee69a
MB
2import ctypes
3import json
4import os
5import shutil
982ee69a
MB
6import struct
7import subprocess
8import sys
9import tempfile
10from datetime import datetime, timedelta, timezone
f59f5ef8 11from enum import Enum, auto
982ee69a
MB
12from hashlib import pbkdf2_hmac
13
1d3586d0 14from .aes import (
15 aes_cbc_decrypt_bytes,
16 aes_gcm_decrypt_and_verify_bytes,
17 unpad_pkcs7,
18)
09906f55 19from .compat import (
982ee69a
MB
20 compat_b64decode,
21 compat_cookiejar_Cookie,
22)
09906f55 23from .utils import (
982ee69a 24 expand_path,
d3c93ec2 25 Popen,
982ee69a
MB
26 YoutubeDLCookieJar,
27)
28
767b02a9
MB
29try:
30 import sqlite3
31 SQLITE_AVAILABLE = True
32except ImportError:
33 # although sqlite3 is part of the standard library, it is possible to compile python without
34 # sqlite support. See: https://github.com/yt-dlp/yt-dlp/issues/544
35 SQLITE_AVAILABLE = False
36
37
982ee69a 38try:
f59f5ef8
MB
39 import secretstorage
40 SECRETSTORAGE_AVAILABLE = True
982ee69a 41except ImportError:
f59f5ef8
MB
42 SECRETSTORAGE_AVAILABLE = False
43 SECRETSTORAGE_UNAVAILABLE_REASON = (
44 'as the `secretstorage` module is not installed. '
45 'Please install by running `python3 -m pip install secretstorage`.')
063c409d 46except Exception as _err:
f59f5ef8
MB
47 SECRETSTORAGE_AVAILABLE = False
48 SECRETSTORAGE_UNAVAILABLE_REASON = f'as the `secretstorage` module could not be initialized. {_err}'
982ee69a
MB
49
50
51CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
52SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
53
54
55class YDLLogger:
56 def __init__(self, ydl=None):
57 self._ydl = ydl
58
59 def debug(self, message):
60 if self._ydl:
61 self._ydl.write_debug(message)
62
63 def info(self, message):
64 if self._ydl:
65 self._ydl.to_screen(f'[Cookies] {message}')
66
67 def warning(self, message, only_once=False):
68 if self._ydl:
69 self._ydl.report_warning(message, only_once)
70
71 def error(self, message):
72 if self._ydl:
73 self._ydl.report_error(message)
74
75
76def load_cookies(cookie_file, browser_specification, ydl):
77 cookie_jars = []
78 if browser_specification is not None:
f59f5ef8
MB
79 browser_name, profile, keyring = _parse_browser_specification(*browser_specification)
80 cookie_jars.append(extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring))
982ee69a
MB
81
82 if cookie_file is not None:
83 cookie_file = expand_path(cookie_file)
84 jar = YoutubeDLCookieJar(cookie_file)
85 if os.access(cookie_file, os.R_OK):
86 jar.load(ignore_discard=True, ignore_expires=True)
87 cookie_jars.append(jar)
88
89 return _merge_cookie_jars(cookie_jars)
90
91
f59f5ef8 92def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None):
982ee69a
MB
93 if browser_name == 'firefox':
94 return _extract_firefox_cookies(profile, logger)
95 elif browser_name == 'safari':
96 return _extract_safari_cookies(profile, logger)
97 elif browser_name in CHROMIUM_BASED_BROWSERS:
f59f5ef8 98 return _extract_chrome_cookies(browser_name, profile, keyring, logger)
982ee69a
MB
99 else:
100 raise ValueError('unknown browser: {}'.format(browser_name))
101
102
103def _extract_firefox_cookies(profile, logger):
104 logger.info('Extracting cookies from firefox')
767b02a9
MB
105 if not SQLITE_AVAILABLE:
106 logger.warning('Cannot extract cookies from firefox without sqlite3 support. '
107 'Please use a python interpreter compiled with sqlite3 support')
108 return YoutubeDLCookieJar()
982ee69a
MB
109
110 if profile is None:
111 search_root = _firefox_browser_dir()
112 elif _is_path(profile):
113 search_root = profile
114 else:
115 search_root = os.path.join(_firefox_browser_dir(), profile)
116
117 cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite')
118 if cookie_database_path is None:
119 raise FileNotFoundError('could not find firefox cookies database in {}'.format(search_root))
526d74ec 120 logger.debug('Extracting cookies from: "{}"'.format(cookie_database_path))
982ee69a 121
0930b11f 122 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
982ee69a
MB
123 cursor = None
124 try:
125 cursor = _open_database_copy(cookie_database_path, tmpdir)
126 cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
127 jar = YoutubeDLCookieJar()
128 for host, name, value, path, expiry, is_secure in cursor.fetchall():
129 cookie = compat_cookiejar_Cookie(
130 version=0, name=name, value=value, port=None, port_specified=False,
131 domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'),
132 path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False,
133 comment=None, comment_url=None, rest={})
134 jar.set_cookie(cookie)
135 logger.info('Extracted {} cookies from firefox'.format(len(jar)))
136 return jar
137 finally:
138 if cursor is not None:
139 cursor.connection.close()
140
141
142def _firefox_browser_dir():
143 if sys.platform in ('linux', 'linux2'):
144 return os.path.expanduser('~/.mozilla/firefox')
145 elif sys.platform == 'win32':
146 return os.path.expandvars(r'%APPDATA%\Mozilla\Firefox\Profiles')
147 elif sys.platform == 'darwin':
148 return os.path.expanduser('~/Library/Application Support/Firefox')
149 else:
150 raise ValueError('unsupported platform: {}'.format(sys.platform))
151
152
153def _get_chromium_based_browser_settings(browser_name):
154 # https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md
155 if sys.platform in ('linux', 'linux2'):
156 config = _config_home()
157 browser_dir = {
158 'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'),
159 'chrome': os.path.join(config, 'google-chrome'),
160 'chromium': os.path.join(config, 'chromium'),
161 'edge': os.path.join(config, 'microsoft-edge'),
162 'opera': os.path.join(config, 'opera'),
163 'vivaldi': os.path.join(config, 'vivaldi'),
164 }[browser_name]
165
166 elif sys.platform == 'win32':
167 appdata_local = os.path.expandvars('%LOCALAPPDATA%')
168 appdata_roaming = os.path.expandvars('%APPDATA%')
169 browser_dir = {
170 'brave': os.path.join(appdata_local, r'BraveSoftware\Brave-Browser\User Data'),
171 'chrome': os.path.join(appdata_local, r'Google\Chrome\User Data'),
172 'chromium': os.path.join(appdata_local, r'Chromium\User Data'),
173 'edge': os.path.join(appdata_local, r'Microsoft\Edge\User Data'),
174 'opera': os.path.join(appdata_roaming, r'Opera Software\Opera Stable'),
175 'vivaldi': os.path.join(appdata_local, r'Vivaldi\User Data'),
176 }[browser_name]
177
178 elif sys.platform == 'darwin':
179 appdata = os.path.expanduser('~/Library/Application Support')
180 browser_dir = {
181 'brave': os.path.join(appdata, 'BraveSoftware/Brave-Browser'),
182 'chrome': os.path.join(appdata, 'Google/Chrome'),
183 'chromium': os.path.join(appdata, 'Chromium'),
184 'edge': os.path.join(appdata, 'Microsoft Edge'),
185 'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
186 'vivaldi': os.path.join(appdata, 'Vivaldi'),
187 }[browser_name]
188
189 else:
190 raise ValueError('unsupported platform: {}'.format(sys.platform))
191
192 # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
193 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
194 keyring_name = {
195 'brave': 'Brave',
196 'chrome': 'Chrome',
197 'chromium': 'Chromium',
29b208f6 198 'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
982ee69a
MB
199 'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
200 'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
201 }[browser_name]
202
203 browsers_without_profiles = {'opera'}
204
205 return {
206 'browser_dir': browser_dir,
207 'keyring_name': keyring_name,
208 'supports_profiles': browser_name not in browsers_without_profiles
209 }
210
211
f59f5ef8 212def _extract_chrome_cookies(browser_name, profile, keyring, logger):
982ee69a 213 logger.info('Extracting cookies from {}'.format(browser_name))
767b02a9
MB
214
215 if not SQLITE_AVAILABLE:
216 logger.warning(('Cannot extract cookies from {} without sqlite3 support. '
217 'Please use a python interpreter compiled with sqlite3 support').format(browser_name))
218 return YoutubeDLCookieJar()
219
982ee69a
MB
220 config = _get_chromium_based_browser_settings(browser_name)
221
222 if profile is None:
223 search_root = config['browser_dir']
224 elif _is_path(profile):
225 search_root = profile
226 config['browser_dir'] = os.path.dirname(profile) if config['supports_profiles'] else profile
227 else:
228 if config['supports_profiles']:
229 search_root = os.path.join(config['browser_dir'], profile)
230 else:
231 logger.error('{} does not support profiles'.format(browser_name))
232 search_root = config['browser_dir']
233
234 cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies')
235 if cookie_database_path is None:
236 raise FileNotFoundError('could not find {} cookies database in "{}"'.format(browser_name, search_root))
526d74ec 237 logger.debug('Extracting cookies from: "{}"'.format(cookie_database_path))
982ee69a 238
f59f5ef8 239 decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring)
982ee69a 240
0930b11f 241 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
982ee69a
MB
242 cursor = None
243 try:
244 cursor = _open_database_copy(cookie_database_path, tmpdir)
245 cursor.connection.text_factory = bytes
246 column_names = _get_column_names(cursor, 'cookies')
247 secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
248 cursor.execute('SELECT host_key, name, value, encrypted_value, path, '
249 'expires_utc, {} FROM cookies'.format(secure_column))
250 jar = YoutubeDLCookieJar()
251 failed_cookies = 0
f59f5ef8 252 unencrypted_cookies = 0
982ee69a
MB
253 for host_key, name, value, encrypted_value, path, expires_utc, is_secure in cursor.fetchall():
254 host_key = host_key.decode('utf-8')
255 name = name.decode('utf-8')
256 value = value.decode('utf-8')
257 path = path.decode('utf-8')
258
259 if not value and encrypted_value:
260 value = decryptor.decrypt(encrypted_value)
261 if value is None:
262 failed_cookies += 1
263 continue
f59f5ef8
MB
264 else:
265 unencrypted_cookies += 1
982ee69a
MB
266
267 cookie = compat_cookiejar_Cookie(
268 version=0, name=name, value=value, port=None, port_specified=False,
269 domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
270 path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False,
271 comment=None, comment_url=None, rest={})
272 jar.set_cookie(cookie)
273 if failed_cookies > 0:
274 failed_message = ' ({} could not be decrypted)'.format(failed_cookies)
275 else:
276 failed_message = ''
277 logger.info('Extracted {} cookies from {}{}'.format(len(jar), browser_name, failed_message))
f59f5ef8
MB
278 counts = decryptor.cookie_counts.copy()
279 counts['unencrypted'] = unencrypted_cookies
280 logger.debug('cookie version breakdown: {}'.format(counts))
982ee69a
MB
281 return jar
282 finally:
283 if cursor is not None:
284 cursor.connection.close()
285
286
287class ChromeCookieDecryptor:
288 """
289 Overview:
290
291 Linux:
292 - cookies are either v10 or v11
293 - v10: AES-CBC encrypted with a fixed key
294 - v11: AES-CBC encrypted with an OS protected key (keyring)
295 - v11 keys can be stored in various places depending on the activate desktop environment [2]
296
297 Mac:
298 - cookies are either v10 or not v10
299 - v10: AES-CBC encrypted with an OS protected key (keyring) and more key derivation iterations than linux
300 - not v10: 'old data' stored as plaintext
301
302 Windows:
303 - cookies are either v10 or not v10
304 - v10: AES-GCM encrypted with a key which is encrypted with DPAPI
305 - not v10: encrypted with DPAPI
306
307 Sources:
308 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/
309 - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_linux.cc
310 - KeyStorageLinux::CreateService
311 """
312
313 def decrypt(self, encrypted_value):
314 raise NotImplementedError
315
f59f5ef8
MB
316 @property
317 def cookie_counts(self):
318 raise NotImplementedError
319
982ee69a 320
f59f5ef8 321def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None):
982ee69a 322 if sys.platform in ('linux', 'linux2'):
f59f5ef8 323 return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring)
982ee69a
MB
324 elif sys.platform == 'darwin':
325 return MacChromeCookieDecryptor(browser_keyring_name, logger)
326 elif sys.platform == 'win32':
327 return WindowsChromeCookieDecryptor(browser_root, logger)
328 else:
329 raise NotImplementedError('Chrome cookie decryption is not supported '
330 'on this platform: {}'.format(sys.platform))
331
332
333class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
f59f5ef8 334 def __init__(self, browser_keyring_name, logger, *, keyring=None):
982ee69a
MB
335 self._logger = logger
336 self._v10_key = self.derive_key(b'peanuts')
f59f5ef8
MB
337 password = _get_linux_keyring_password(browser_keyring_name, keyring, logger)
338 self._v11_key = None if password is None else self.derive_key(password)
339 self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
982ee69a
MB
340
341 @staticmethod
342 def derive_key(password):
343 # values from
344 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_linux.cc
345 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16)
346
f59f5ef8
MB
347 @property
348 def cookie_counts(self):
349 return self._cookie_counts
350
982ee69a
MB
351 def decrypt(self, encrypted_value):
352 version = encrypted_value[:3]
353 ciphertext = encrypted_value[3:]
354
355 if version == b'v10':
f59f5ef8 356 self._cookie_counts['v10'] += 1
982ee69a
MB
357 return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
358
359 elif version == b'v11':
f59f5ef8 360 self._cookie_counts['v11'] += 1
982ee69a 361 if self._v11_key is None:
f59f5ef8 362 self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
982ee69a
MB
363 return None
364 return _decrypt_aes_cbc(ciphertext, self._v11_key, self._logger)
365
366 else:
f59f5ef8 367 self._cookie_counts['other'] += 1
982ee69a
MB
368 return None
369
370
371class MacChromeCookieDecryptor(ChromeCookieDecryptor):
372 def __init__(self, browser_keyring_name, logger):
373 self._logger = logger
f440b14f 374 password = _get_mac_keyring_password(browser_keyring_name, logger)
982ee69a 375 self._v10_key = None if password is None else self.derive_key(password)
f59f5ef8 376 self._cookie_counts = {'v10': 0, 'other': 0}
982ee69a
MB
377
378 @staticmethod
379 def derive_key(password):
380 # values from
381 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
382 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16)
383
f59f5ef8
MB
384 @property
385 def cookie_counts(self):
386 return self._cookie_counts
387
982ee69a
MB
388 def decrypt(self, encrypted_value):
389 version = encrypted_value[:3]
390 ciphertext = encrypted_value[3:]
391
392 if version == b'v10':
f59f5ef8 393 self._cookie_counts['v10'] += 1
982ee69a
MB
394 if self._v10_key is None:
395 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
396 return None
397
398 return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
399
400 else:
f59f5ef8 401 self._cookie_counts['other'] += 1
982ee69a
MB
402 # other prefixes are considered 'old data' which were stored as plaintext
403 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
404 return encrypted_value
405
406
407class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
408 def __init__(self, browser_root, logger):
409 self._logger = logger
410 self._v10_key = _get_windows_v10_key(browser_root, logger)
f59f5ef8
MB
411 self._cookie_counts = {'v10': 0, 'other': 0}
412
413 @property
414 def cookie_counts(self):
415 return self._cookie_counts
982ee69a
MB
416
417 def decrypt(self, encrypted_value):
418 version = encrypted_value[:3]
419 ciphertext = encrypted_value[3:]
420
421 if version == b'v10':
f59f5ef8 422 self._cookie_counts['v10'] += 1
982ee69a
MB
423 if self._v10_key is None:
424 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
425 return None
982ee69a
MB
426
427 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
428 # kNonceLength
429 nonce_length = 96 // 8
430 # boringssl
431 # EVP_AEAD_AES_GCM_TAG_LEN
432 authentication_tag_length = 16
433
434 raw_ciphertext = ciphertext
435 nonce = raw_ciphertext[:nonce_length]
436 ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length]
437 authentication_tag = raw_ciphertext[-authentication_tag_length:]
438
439 return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger)
440
441 else:
f59f5ef8 442 self._cookie_counts['other'] += 1
982ee69a
MB
443 # any other prefix means the data is DPAPI encrypted
444 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
445 return _decrypt_windows_dpapi(encrypted_value, self._logger).decode('utf-8')
446
447
448def _extract_safari_cookies(profile, logger):
449 if profile is not None:
450 logger.error('safari does not support profiles')
451 if sys.platform != 'darwin':
452 raise ValueError('unsupported platform: {}'.format(sys.platform))
453
454 cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')
455
456 if not os.path.isfile(cookies_path):
1f7db853
MP
457 logger.debug('Trying secondary cookie location')
458 cookies_path = os.path.expanduser('~/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies')
459 if not os.path.isfile(cookies_path):
460 raise FileNotFoundError('could not find safari cookies database')
982ee69a
MB
461
462 with open(cookies_path, 'rb') as f:
463 cookies_data = f.read()
464
465 jar = parse_safari_cookies(cookies_data, logger=logger)
466 logger.info('Extracted {} cookies from safari'.format(len(jar)))
467 return jar
468
469
470class ParserError(Exception):
471 pass
472
473
474class DataParser:
475 def __init__(self, data, logger):
476 self._data = data
477 self.cursor = 0
478 self._logger = logger
479
480 def read_bytes(self, num_bytes):
481 if num_bytes < 0:
482 raise ParserError('invalid read of {} bytes'.format(num_bytes))
483 end = self.cursor + num_bytes
484 if end > len(self._data):
485 raise ParserError('reached end of input')
486 data = self._data[self.cursor:end]
487 self.cursor = end
488 return data
489
490 def expect_bytes(self, expected_value, message):
491 value = self.read_bytes(len(expected_value))
492 if value != expected_value:
493 raise ParserError('unexpected value: {} != {} ({})'.format(value, expected_value, message))
494
495 def read_uint(self, big_endian=False):
496 data_format = '>I' if big_endian else '<I'
497 return struct.unpack(data_format, self.read_bytes(4))[0]
498
499 def read_double(self, big_endian=False):
500 data_format = '>d' if big_endian else '<d'
501 return struct.unpack(data_format, self.read_bytes(8))[0]
502
503 def read_cstring(self):
504 buffer = []
505 while True:
506 c = self.read_bytes(1)
507 if c == b'\x00':
508 return b''.join(buffer).decode('utf-8')
509 else:
510 buffer.append(c)
511
512 def skip(self, num_bytes, description='unknown'):
513 if num_bytes > 0:
514 self._logger.debug('skipping {} bytes ({}): {}'.format(
515 num_bytes, description, self.read_bytes(num_bytes)))
516 elif num_bytes < 0:
517 raise ParserError('invalid skip of {} bytes'.format(num_bytes))
518
519 def skip_to(self, offset, description='unknown'):
520 self.skip(offset - self.cursor, description)
521
522 def skip_to_end(self, description='unknown'):
523 self.skip_to(len(self._data), description)
524
525
526def _mac_absolute_time_to_posix(timestamp):
527 return int((datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc) + timedelta(seconds=timestamp)).timestamp())
528
529
530def _parse_safari_cookies_header(data, logger):
531 p = DataParser(data, logger)
532 p.expect_bytes(b'cook', 'database signature')
533 number_of_pages = p.read_uint(big_endian=True)
534 page_sizes = [p.read_uint(big_endian=True) for _ in range(number_of_pages)]
535 return page_sizes, p.cursor
536
537
538def _parse_safari_cookies_page(data, jar, logger):
539 p = DataParser(data, logger)
540 p.expect_bytes(b'\x00\x00\x01\x00', 'page signature')
541 number_of_cookies = p.read_uint()
542 record_offsets = [p.read_uint() for _ in range(number_of_cookies)]
543 if number_of_cookies == 0:
544 logger.debug('a cookies page of size {} has no cookies'.format(len(data)))
545 return
546
547 p.skip_to(record_offsets[0], 'unknown page header field')
548
549 for record_offset in record_offsets:
550 p.skip_to(record_offset, 'space between records')
551 record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger)
552 p.read_bytes(record_length)
553 p.skip_to_end('space in between pages')
554
555
556def _parse_safari_cookies_record(data, jar, logger):
557 p = DataParser(data, logger)
558 record_size = p.read_uint()
559 p.skip(4, 'unknown record field 1')
560 flags = p.read_uint()
561 is_secure = bool(flags & 0x0001)
562 p.skip(4, 'unknown record field 2')
563 domain_offset = p.read_uint()
564 name_offset = p.read_uint()
565 path_offset = p.read_uint()
566 value_offset = p.read_uint()
567 p.skip(8, 'unknown record field 3')
568 expiration_date = _mac_absolute_time_to_posix(p.read_double())
569 _creation_date = _mac_absolute_time_to_posix(p.read_double()) # noqa: F841
570
571 try:
572 p.skip_to(domain_offset)
573 domain = p.read_cstring()
574
575 p.skip_to(name_offset)
576 name = p.read_cstring()
577
578 p.skip_to(path_offset)
579 path = p.read_cstring()
580
581 p.skip_to(value_offset)
582 value = p.read_cstring()
583 except UnicodeDecodeError:
f440b14f 584 logger.warning('failed to parse Safari cookie because UTF-8 decoding failed', only_once=True)
982ee69a
MB
585 return record_size
586
587 p.skip_to(record_size, 'space at the end of the record')
588
589 cookie = compat_cookiejar_Cookie(
590 version=0, name=name, value=value, port=None, port_specified=False,
591 domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'),
592 path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False,
593 comment=None, comment_url=None, rest={})
594 jar.set_cookie(cookie)
595 return record_size
596
597
598def parse_safari_cookies(data, jar=None, logger=YDLLogger()):
599 """
600 References:
601 - https://github.com/libyal/dtformats/blob/main/documentation/Safari%20Cookies.asciidoc
602 - this data appears to be out of date but the important parts of the database structure is the same
603 - there are a few bytes here and there which are skipped during parsing
604 """
605 if jar is None:
606 jar = YoutubeDLCookieJar()
607 page_sizes, body_start = _parse_safari_cookies_header(data, logger)
608 p = DataParser(data[body_start:], logger)
609 for page_size in page_sizes:
610 _parse_safari_cookies_page(p.read_bytes(page_size), jar, logger)
611 p.skip_to_end('footer')
612 return jar
613
614
f59f5ef8
MB
615class _LinuxDesktopEnvironment(Enum):
616 """
617 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.h
618 DesktopEnvironment
619 """
620 OTHER = auto()
621 CINNAMON = auto()
622 GNOME = auto()
623 KDE = auto()
624 PANTHEON = auto()
625 UNITY = auto()
626 XFCE = auto()
982ee69a
MB
627
628
f59f5ef8
MB
629class _LinuxKeyring(Enum):
630 """
631 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.h
632 SelectedLinuxBackend
633 """
634 KWALLET = auto()
635 GNOMEKEYRING = auto()
636 BASICTEXT = auto()
637
638
639SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys()
640
641
642def _get_linux_desktop_environment(env):
643 """
644 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc
645 GetDesktopEnvironment
646 """
647 xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
648 desktop_session = env.get('DESKTOP_SESSION', None)
649 if xdg_current_desktop is not None:
650 xdg_current_desktop = xdg_current_desktop.split(':')[0].strip()
651
652 if xdg_current_desktop == 'Unity':
653 if desktop_session is not None and 'gnome-fallback' in desktop_session:
654 return _LinuxDesktopEnvironment.GNOME
655 else:
656 return _LinuxDesktopEnvironment.UNITY
657 elif xdg_current_desktop == 'GNOME':
658 return _LinuxDesktopEnvironment.GNOME
659 elif xdg_current_desktop == 'X-Cinnamon':
660 return _LinuxDesktopEnvironment.CINNAMON
661 elif xdg_current_desktop == 'KDE':
662 return _LinuxDesktopEnvironment.KDE
663 elif xdg_current_desktop == 'Pantheon':
664 return _LinuxDesktopEnvironment.PANTHEON
665 elif xdg_current_desktop == 'XFCE':
666 return _LinuxDesktopEnvironment.XFCE
667 elif desktop_session is not None:
668 if desktop_session in ('mate', 'gnome'):
669 return _LinuxDesktopEnvironment.GNOME
670 elif 'kde' in desktop_session:
671 return _LinuxDesktopEnvironment.KDE
672 elif 'xfce' in desktop_session:
673 return _LinuxDesktopEnvironment.XFCE
674 else:
675 if 'GNOME_DESKTOP_SESSION_ID' in env:
676 return _LinuxDesktopEnvironment.GNOME
677 elif 'KDE_FULL_SESSION' in env:
678 return _LinuxDesktopEnvironment.KDE
fa8fd951 679 return _LinuxDesktopEnvironment.OTHER
f59f5ef8
MB
680
681
682def _choose_linux_keyring(logger):
683 """
684 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.cc
685 SelectBackend
686 """
687 desktop_environment = _get_linux_desktop_environment(os.environ)
688 logger.debug('detected desktop environment: {}'.format(desktop_environment.name))
689 if desktop_environment == _LinuxDesktopEnvironment.KDE:
690 linux_keyring = _LinuxKeyring.KWALLET
691 elif desktop_environment == _LinuxDesktopEnvironment.OTHER:
692 linux_keyring = _LinuxKeyring.BASICTEXT
982ee69a 693 else:
f59f5ef8
MB
694 linux_keyring = _LinuxKeyring.GNOMEKEYRING
695 return linux_keyring
696
697
698def _get_kwallet_network_wallet(logger):
699 """ The name of the wallet used to store network passwords.
700
701 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/kwallet_dbus.cc
702 KWalletDBus::NetworkWallet
703 which does a dbus call to the following function:
704 https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html
705 Wallet::NetworkWallet
706 """
707 default_wallet = 'kdewallet'
708 try:
709 proc = Popen([
710 'dbus-send', '--session', '--print-reply=literal',
711 '--dest=org.kde.kwalletd5',
712 '/modules/kwalletd5',
713 'org.kde.KWallet.networkWallet'
714 ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
715
716 stdout, stderr = proc.communicate_or_kill()
717 if proc.returncode != 0:
718 logger.warning('failed to read NetworkWallet')
719 return default_wallet
720 else:
721 network_wallet = stdout.decode('utf-8').strip()
722 logger.debug('NetworkWallet = "{}"'.format(network_wallet))
723 return network_wallet
724 except BaseException as e:
725 logger.warning('exception while obtaining NetworkWallet: {}'.format(e))
726 return default_wallet
727
728
729def _get_kwallet_password(browser_keyring_name, logger):
730 logger.debug('using kwallet-query to obtain password from kwallet')
731
732 if shutil.which('kwallet-query') is None:
733 logger.error('kwallet-query command not found. KWallet and kwallet-query '
734 'must be installed to read from KWallet. kwallet-query should be'
735 'included in the kwallet package for your distribution')
736 return b''
737
738 network_wallet = _get_kwallet_network_wallet(logger)
739
740 try:
741 proc = Popen([
742 'kwallet-query',
743 '--read-password', '{} Safe Storage'.format(browser_keyring_name),
744 '--folder', '{} Keys'.format(browser_keyring_name),
745 network_wallet
746 ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
747
748 stdout, stderr = proc.communicate_or_kill()
749 if proc.returncode != 0:
750 logger.error('kwallet-query failed with return code {}. Please consult '
751 'the kwallet-query man page for details'.format(proc.returncode))
752 return b''
753 else:
754 if stdout.lower().startswith(b'failed to read'):
755 logger.debug('failed to read password from kwallet. Using empty string instead')
756 # this sometimes occurs in KDE because chrome does not check hasEntry and instead
757 # just tries to read the value (which kwallet returns "") whereas kwallet-query
758 # checks hasEntry. To verify this:
759 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
760 # while starting chrome.
761 # this may be a bug as the intended behaviour is to generate a random password and store
762 # it, but that doesn't matter here.
763 return b''
764 else:
765 logger.debug('password found')
766 if stdout[-1:] == b'\n':
767 stdout = stdout[:-1]
768 return stdout
769 except BaseException as e:
770 logger.warning(f'exception running kwallet-query: {type(e).__name__}({e})')
771 return b''
772
773
774def _get_gnome_keyring_password(browser_keyring_name, logger):
775 if not SECRETSTORAGE_AVAILABLE:
776 logger.error('secretstorage not available {}'.format(SECRETSTORAGE_UNAVAILABLE_REASON))
777 return b''
778 # the Gnome keyring does not seem to organise keys in the same way as KWallet,
779 # using `dbus-monitor` during startup, it can be observed that chromium lists all keys
780 # and presumably searches for its key in the list. It appears that we must do the same.
781 # https://github.com/jaraco/keyring/issues/556
782 with contextlib.closing(secretstorage.dbus_init()) as con:
783 col = secretstorage.get_default_collection(con)
784 for item in col.get_all_items():
785 if item.get_label() == '{} Safe Storage'.format(browser_keyring_name):
786 return item.get_secret()
787 else:
788 logger.error('failed to read from keyring')
789 return b''
790
791
792def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
793 # note: chrome/chromium can be run with the following flags to determine which keyring backend
794 # it has chosen to use
795 # chromium --enable-logging=stderr --v=1 2>&1 | grep key_storage_
796 # Chromium supports a flag: --password-store=<basic|gnome|kwallet> so the automatic detection
797 # will not be sufficient in all cases.
798
2c539d49 799 keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger)
f59f5ef8
MB
800 logger.debug(f'Chosen keyring: {keyring.name}')
801
802 if keyring == _LinuxKeyring.KWALLET:
803 return _get_kwallet_password(browser_keyring_name, logger)
804 elif keyring == _LinuxKeyring.GNOMEKEYRING:
805 return _get_gnome_keyring_password(browser_keyring_name, logger)
806 elif keyring == _LinuxKeyring.BASICTEXT:
807 # when basic text is chosen, all cookies are stored as v10 (so no keyring password is required)
808 return None
809 assert False, f'Unknown keyring {keyring}'
810
811
812def _get_mac_keyring_password(browser_keyring_name, logger):
813 logger.debug('using find-generic-password to obtain password from OSX keychain')
814 try:
d3c93ec2 815 proc = Popen(
816 ['security', 'find-generic-password',
817 '-w', # write password to stdout
818 '-a', browser_keyring_name, # match 'account'
819 '-s', '{} Safe Storage'.format(browser_keyring_name)], # match 'service'
820 stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
f59f5ef8
MB
821
822 stdout, stderr = proc.communicate_or_kill()
823 if stdout[-1:] == b'\n':
824 stdout = stdout[:-1]
825 return stdout
826 except BaseException as e:
827 logger.warning(f'exception running find-generic-password: {type(e).__name__}({e})')
828 return None
982ee69a
MB
829
830
831def _get_windows_v10_key(browser_root, logger):
832 path = _find_most_recently_used_file(browser_root, 'Local State')
833 if path is None:
834 logger.error('could not find local state file')
835 return None
ad0090d0 836 with open(path, 'r', encoding='utf8') as f:
982ee69a
MB
837 data = json.load(f)
838 try:
839 base64_key = data['os_crypt']['encrypted_key']
840 except KeyError:
841 logger.error('no encrypted key in Local State')
842 return None
843 encrypted_key = compat_b64decode(base64_key)
844 prefix = b'DPAPI'
845 if not encrypted_key.startswith(prefix):
846 logger.error('invalid key')
847 return None
848 return _decrypt_windows_dpapi(encrypted_key[len(prefix):], logger)
849
850
851def pbkdf2_sha1(password, salt, iterations, key_length):
852 return pbkdf2_hmac('sha1', password, salt, iterations, key_length)
853
854
855def _decrypt_aes_cbc(ciphertext, key, logger, initialization_vector=b' ' * 16):
1d3586d0 856 plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
982ee69a 857 try:
1d3586d0 858 return plaintext.decode('utf-8')
982ee69a 859 except UnicodeDecodeError:
f440b14f 860 logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
861 return None
862
863
864def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
982ee69a 865 try:
09906f55 866 plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce)
982ee69a 867 except ValueError:
f440b14f 868 logger.warning('failed to decrypt cookie (AES-GCM) because the MAC check failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
869 return None
870
871 try:
872 return plaintext.decode('utf-8')
873 except UnicodeDecodeError:
f440b14f 874 logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
875 return None
876
877
878def _decrypt_windows_dpapi(ciphertext, logger):
879 """
880 References:
881 - https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata
882 """
883 from ctypes.wintypes import DWORD
884
885 class DATA_BLOB(ctypes.Structure):
886 _fields_ = [('cbData', DWORD),
887 ('pbData', ctypes.POINTER(ctypes.c_char))]
888
889 buffer = ctypes.create_string_buffer(ciphertext)
890 blob_in = DATA_BLOB(ctypes.sizeof(buffer), buffer)
891 blob_out = DATA_BLOB()
892 ret = ctypes.windll.crypt32.CryptUnprotectData(
893 ctypes.byref(blob_in), # pDataIn
894 None, # ppszDataDescr: human readable description of pDataIn
895 None, # pOptionalEntropy: salt?
896 None, # pvReserved: must be NULL
897 None, # pPromptStruct: information about prompts to display
898 0, # dwFlags
899 ctypes.byref(blob_out) # pDataOut
900 )
901 if not ret:
f9be9cb9 902 logger.warning('failed to decrypt with DPAPI', only_once=True)
982ee69a
MB
903 return None
904
905 result = ctypes.string_at(blob_out.pbData, blob_out.cbData)
906 ctypes.windll.kernel32.LocalFree(blob_out.pbData)
907 return result
908
909
910def _config_home():
911 return os.environ.get('XDG_CONFIG_HOME', os.path.expanduser('~/.config'))
912
913
914def _open_database_copy(database_path, tmpdir):
915 # cannot open sqlite databases if they are already in use (e.g. by the browser)
916 database_copy_path = os.path.join(tmpdir, 'temporary.sqlite')
917 shutil.copy(database_path, database_copy_path)
918 conn = sqlite3.connect(database_copy_path)
919 return conn.cursor()
920
921
922def _get_column_names(cursor, table_name):
923 table_info = cursor.execute('PRAGMA table_info({})'.format(table_name)).fetchall()
924 return [row[1].decode('utf-8') for row in table_info]
925
926
927def _find_most_recently_used_file(root, filename):
928 # if there are multiple browser profiles, take the most recently used one
929 paths = []
930 for root, dirs, files in os.walk(root):
931 for file in files:
932 if file == filename:
933 paths.append(os.path.join(root, file))
934 return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime)
935
936
937def _merge_cookie_jars(jars):
938 output_jar = YoutubeDLCookieJar()
939 for jar in jars:
940 for cookie in jar:
941 output_jar.set_cookie(cookie)
942 if jar.filename is not None:
943 output_jar.filename = jar.filename
944 return output_jar
945
946
947def _is_path(value):
948 return os.path.sep in value
949
950
f59f5ef8 951def _parse_browser_specification(browser_name, profile=None, keyring=None):
982ee69a
MB
952 if browser_name not in SUPPORTED_BROWSERS:
953 raise ValueError(f'unsupported browser: "{browser_name}"')
f59f5ef8
MB
954 if keyring not in (None, *SUPPORTED_KEYRINGS):
955 raise ValueError(f'unsupported keyring: "{keyring}"')
982ee69a
MB
956 if profile is not None and _is_path(profile):
957 profile = os.path.expanduser(profile)
f59f5ef8 958 return browser_name, profile, keyring