]> jfr.im git - yt-dlp.git/blame - yt_dlp/cookies.py
[GoogleSearch] Fix extractor
[yt-dlp.git] / yt_dlp / cookies.py
CommitLineData
f59f5ef8 1import contextlib
982ee69a
MB
2import ctypes
3import json
4import os
5import shutil
982ee69a
MB
6import struct
7import subprocess
8import sys
9import tempfile
10from datetime import datetime, timedelta, timezone
f59f5ef8 11from enum import Enum, auto
982ee69a
MB
12from hashlib import pbkdf2_hmac
13
09906f55
ÁS
14from .aes import aes_cbc_decrypt_bytes, aes_gcm_decrypt_and_verify_bytes
15from .compat import (
982ee69a
MB
16 compat_b64decode,
17 compat_cookiejar_Cookie,
18)
09906f55 19from .utils import (
982ee69a 20 expand_path,
d3c93ec2 21 Popen,
982ee69a
MB
22 YoutubeDLCookieJar,
23)
24
767b02a9
MB
25try:
26 import sqlite3
27 SQLITE_AVAILABLE = True
28except ImportError:
29 # although sqlite3 is part of the standard library, it is possible to compile python without
30 # sqlite support. See: https://github.com/yt-dlp/yt-dlp/issues/544
31 SQLITE_AVAILABLE = False
32
33
982ee69a 34try:
f59f5ef8
MB
35 import secretstorage
36 SECRETSTORAGE_AVAILABLE = True
982ee69a 37except ImportError:
f59f5ef8
MB
38 SECRETSTORAGE_AVAILABLE = False
39 SECRETSTORAGE_UNAVAILABLE_REASON = (
40 'as the `secretstorage` module is not installed. '
41 'Please install by running `python3 -m pip install secretstorage`.')
063c409d 42except Exception as _err:
f59f5ef8
MB
43 SECRETSTORAGE_AVAILABLE = False
44 SECRETSTORAGE_UNAVAILABLE_REASON = f'as the `secretstorage` module could not be initialized. {_err}'
982ee69a
MB
45
46
47CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
48SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
49
50
51class YDLLogger:
52 def __init__(self, ydl=None):
53 self._ydl = ydl
54
55 def debug(self, message):
56 if self._ydl:
57 self._ydl.write_debug(message)
58
59 def info(self, message):
60 if self._ydl:
61 self._ydl.to_screen(f'[Cookies] {message}')
62
63 def warning(self, message, only_once=False):
64 if self._ydl:
65 self._ydl.report_warning(message, only_once)
66
67 def error(self, message):
68 if self._ydl:
69 self._ydl.report_error(message)
70
71
72def load_cookies(cookie_file, browser_specification, ydl):
73 cookie_jars = []
74 if browser_specification is not None:
f59f5ef8
MB
75 browser_name, profile, keyring = _parse_browser_specification(*browser_specification)
76 cookie_jars.append(extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring))
982ee69a
MB
77
78 if cookie_file is not None:
79 cookie_file = expand_path(cookie_file)
80 jar = YoutubeDLCookieJar(cookie_file)
81 if os.access(cookie_file, os.R_OK):
82 jar.load(ignore_discard=True, ignore_expires=True)
83 cookie_jars.append(jar)
84
85 return _merge_cookie_jars(cookie_jars)
86
87
f59f5ef8 88def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None):
982ee69a
MB
89 if browser_name == 'firefox':
90 return _extract_firefox_cookies(profile, logger)
91 elif browser_name == 'safari':
92 return _extract_safari_cookies(profile, logger)
93 elif browser_name in CHROMIUM_BASED_BROWSERS:
f59f5ef8 94 return _extract_chrome_cookies(browser_name, profile, keyring, logger)
982ee69a
MB
95 else:
96 raise ValueError('unknown browser: {}'.format(browser_name))
97
98
99def _extract_firefox_cookies(profile, logger):
100 logger.info('Extracting cookies from firefox')
767b02a9
MB
101 if not SQLITE_AVAILABLE:
102 logger.warning('Cannot extract cookies from firefox without sqlite3 support. '
103 'Please use a python interpreter compiled with sqlite3 support')
104 return YoutubeDLCookieJar()
982ee69a
MB
105
106 if profile is None:
107 search_root = _firefox_browser_dir()
108 elif _is_path(profile):
109 search_root = profile
110 else:
111 search_root = os.path.join(_firefox_browser_dir(), profile)
112
113 cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite')
114 if cookie_database_path is None:
115 raise FileNotFoundError('could not find firefox cookies database in {}'.format(search_root))
526d74ec 116 logger.debug('Extracting cookies from: "{}"'.format(cookie_database_path))
982ee69a 117
0930b11f 118 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
982ee69a
MB
119 cursor = None
120 try:
121 cursor = _open_database_copy(cookie_database_path, tmpdir)
122 cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
123 jar = YoutubeDLCookieJar()
124 for host, name, value, path, expiry, is_secure in cursor.fetchall():
125 cookie = compat_cookiejar_Cookie(
126 version=0, name=name, value=value, port=None, port_specified=False,
127 domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'),
128 path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False,
129 comment=None, comment_url=None, rest={})
130 jar.set_cookie(cookie)
131 logger.info('Extracted {} cookies from firefox'.format(len(jar)))
132 return jar
133 finally:
134 if cursor is not None:
135 cursor.connection.close()
136
137
138def _firefox_browser_dir():
139 if sys.platform in ('linux', 'linux2'):
140 return os.path.expanduser('~/.mozilla/firefox')
141 elif sys.platform == 'win32':
142 return os.path.expandvars(r'%APPDATA%\Mozilla\Firefox\Profiles')
143 elif sys.platform == 'darwin':
144 return os.path.expanduser('~/Library/Application Support/Firefox')
145 else:
146 raise ValueError('unsupported platform: {}'.format(sys.platform))
147
148
149def _get_chromium_based_browser_settings(browser_name):
150 # https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md
151 if sys.platform in ('linux', 'linux2'):
152 config = _config_home()
153 browser_dir = {
154 'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'),
155 'chrome': os.path.join(config, 'google-chrome'),
156 'chromium': os.path.join(config, 'chromium'),
157 'edge': os.path.join(config, 'microsoft-edge'),
158 'opera': os.path.join(config, 'opera'),
159 'vivaldi': os.path.join(config, 'vivaldi'),
160 }[browser_name]
161
162 elif sys.platform == 'win32':
163 appdata_local = os.path.expandvars('%LOCALAPPDATA%')
164 appdata_roaming = os.path.expandvars('%APPDATA%')
165 browser_dir = {
166 'brave': os.path.join(appdata_local, r'BraveSoftware\Brave-Browser\User Data'),
167 'chrome': os.path.join(appdata_local, r'Google\Chrome\User Data'),
168 'chromium': os.path.join(appdata_local, r'Chromium\User Data'),
169 'edge': os.path.join(appdata_local, r'Microsoft\Edge\User Data'),
170 'opera': os.path.join(appdata_roaming, r'Opera Software\Opera Stable'),
171 'vivaldi': os.path.join(appdata_local, r'Vivaldi\User Data'),
172 }[browser_name]
173
174 elif sys.platform == 'darwin':
175 appdata = os.path.expanduser('~/Library/Application Support')
176 browser_dir = {
177 'brave': os.path.join(appdata, 'BraveSoftware/Brave-Browser'),
178 'chrome': os.path.join(appdata, 'Google/Chrome'),
179 'chromium': os.path.join(appdata, 'Chromium'),
180 'edge': os.path.join(appdata, 'Microsoft Edge'),
181 'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
182 'vivaldi': os.path.join(appdata, 'Vivaldi'),
183 }[browser_name]
184
185 else:
186 raise ValueError('unsupported platform: {}'.format(sys.platform))
187
188 # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
189 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
190 keyring_name = {
191 'brave': 'Brave',
192 'chrome': 'Chrome',
193 'chromium': 'Chromium',
29b208f6 194 'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
982ee69a
MB
195 'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
196 'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
197 }[browser_name]
198
199 browsers_without_profiles = {'opera'}
200
201 return {
202 'browser_dir': browser_dir,
203 'keyring_name': keyring_name,
204 'supports_profiles': browser_name not in browsers_without_profiles
205 }
206
207
f59f5ef8 208def _extract_chrome_cookies(browser_name, profile, keyring, logger):
982ee69a 209 logger.info('Extracting cookies from {}'.format(browser_name))
767b02a9
MB
210
211 if not SQLITE_AVAILABLE:
212 logger.warning(('Cannot extract cookies from {} without sqlite3 support. '
213 'Please use a python interpreter compiled with sqlite3 support').format(browser_name))
214 return YoutubeDLCookieJar()
215
982ee69a
MB
216 config = _get_chromium_based_browser_settings(browser_name)
217
218 if profile is None:
219 search_root = config['browser_dir']
220 elif _is_path(profile):
221 search_root = profile
222 config['browser_dir'] = os.path.dirname(profile) if config['supports_profiles'] else profile
223 else:
224 if config['supports_profiles']:
225 search_root = os.path.join(config['browser_dir'], profile)
226 else:
227 logger.error('{} does not support profiles'.format(browser_name))
228 search_root = config['browser_dir']
229
230 cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies')
231 if cookie_database_path is None:
232 raise FileNotFoundError('could not find {} cookies database in "{}"'.format(browser_name, search_root))
526d74ec 233 logger.debug('Extracting cookies from: "{}"'.format(cookie_database_path))
982ee69a 234
f59f5ef8 235 decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring)
982ee69a 236
0930b11f 237 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
982ee69a
MB
238 cursor = None
239 try:
240 cursor = _open_database_copy(cookie_database_path, tmpdir)
241 cursor.connection.text_factory = bytes
242 column_names = _get_column_names(cursor, 'cookies')
243 secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
244 cursor.execute('SELECT host_key, name, value, encrypted_value, path, '
245 'expires_utc, {} FROM cookies'.format(secure_column))
246 jar = YoutubeDLCookieJar()
247 failed_cookies = 0
f59f5ef8 248 unencrypted_cookies = 0
982ee69a
MB
249 for host_key, name, value, encrypted_value, path, expires_utc, is_secure in cursor.fetchall():
250 host_key = host_key.decode('utf-8')
251 name = name.decode('utf-8')
252 value = value.decode('utf-8')
253 path = path.decode('utf-8')
254
255 if not value and encrypted_value:
256 value = decryptor.decrypt(encrypted_value)
257 if value is None:
258 failed_cookies += 1
259 continue
f59f5ef8
MB
260 else:
261 unencrypted_cookies += 1
982ee69a
MB
262
263 cookie = compat_cookiejar_Cookie(
264 version=0, name=name, value=value, port=None, port_specified=False,
265 domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
266 path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False,
267 comment=None, comment_url=None, rest={})
268 jar.set_cookie(cookie)
269 if failed_cookies > 0:
270 failed_message = ' ({} could not be decrypted)'.format(failed_cookies)
271 else:
272 failed_message = ''
273 logger.info('Extracted {} cookies from {}{}'.format(len(jar), browser_name, failed_message))
f59f5ef8
MB
274 counts = decryptor.cookie_counts.copy()
275 counts['unencrypted'] = unencrypted_cookies
276 logger.debug('cookie version breakdown: {}'.format(counts))
982ee69a
MB
277 return jar
278 finally:
279 if cursor is not None:
280 cursor.connection.close()
281
282
283class ChromeCookieDecryptor:
284 """
285 Overview:
286
287 Linux:
288 - cookies are either v10 or v11
289 - v10: AES-CBC encrypted with a fixed key
290 - v11: AES-CBC encrypted with an OS protected key (keyring)
291 - v11 keys can be stored in various places depending on the activate desktop environment [2]
292
293 Mac:
294 - cookies are either v10 or not v10
295 - v10: AES-CBC encrypted with an OS protected key (keyring) and more key derivation iterations than linux
296 - not v10: 'old data' stored as plaintext
297
298 Windows:
299 - cookies are either v10 or not v10
300 - v10: AES-GCM encrypted with a key which is encrypted with DPAPI
301 - not v10: encrypted with DPAPI
302
303 Sources:
304 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/
305 - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_linux.cc
306 - KeyStorageLinux::CreateService
307 """
308
309 def decrypt(self, encrypted_value):
310 raise NotImplementedError
311
f59f5ef8
MB
312 @property
313 def cookie_counts(self):
314 raise NotImplementedError
315
982ee69a 316
f59f5ef8 317def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None):
982ee69a 318 if sys.platform in ('linux', 'linux2'):
f59f5ef8 319 return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring)
982ee69a
MB
320 elif sys.platform == 'darwin':
321 return MacChromeCookieDecryptor(browser_keyring_name, logger)
322 elif sys.platform == 'win32':
323 return WindowsChromeCookieDecryptor(browser_root, logger)
324 else:
325 raise NotImplementedError('Chrome cookie decryption is not supported '
326 'on this platform: {}'.format(sys.platform))
327
328
329class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
f59f5ef8 330 def __init__(self, browser_keyring_name, logger, *, keyring=None):
982ee69a
MB
331 self._logger = logger
332 self._v10_key = self.derive_key(b'peanuts')
f59f5ef8
MB
333 password = _get_linux_keyring_password(browser_keyring_name, keyring, logger)
334 self._v11_key = None if password is None else self.derive_key(password)
335 self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
982ee69a
MB
336
337 @staticmethod
338 def derive_key(password):
339 # values from
340 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_linux.cc
341 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16)
342
f59f5ef8
MB
343 @property
344 def cookie_counts(self):
345 return self._cookie_counts
346
982ee69a
MB
347 def decrypt(self, encrypted_value):
348 version = encrypted_value[:3]
349 ciphertext = encrypted_value[3:]
350
351 if version == b'v10':
f59f5ef8 352 self._cookie_counts['v10'] += 1
982ee69a
MB
353 return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
354
355 elif version == b'v11':
f59f5ef8 356 self._cookie_counts['v11'] += 1
982ee69a 357 if self._v11_key is None:
f59f5ef8 358 self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
982ee69a
MB
359 return None
360 return _decrypt_aes_cbc(ciphertext, self._v11_key, self._logger)
361
362 else:
f59f5ef8 363 self._cookie_counts['other'] += 1
982ee69a
MB
364 return None
365
366
367class MacChromeCookieDecryptor(ChromeCookieDecryptor):
368 def __init__(self, browser_keyring_name, logger):
369 self._logger = logger
f440b14f 370 password = _get_mac_keyring_password(browser_keyring_name, logger)
982ee69a 371 self._v10_key = None if password is None else self.derive_key(password)
f59f5ef8 372 self._cookie_counts = {'v10': 0, 'other': 0}
982ee69a
MB
373
374 @staticmethod
375 def derive_key(password):
376 # values from
377 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
378 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16)
379
f59f5ef8
MB
380 @property
381 def cookie_counts(self):
382 return self._cookie_counts
383
982ee69a
MB
384 def decrypt(self, encrypted_value):
385 version = encrypted_value[:3]
386 ciphertext = encrypted_value[3:]
387
388 if version == b'v10':
f59f5ef8 389 self._cookie_counts['v10'] += 1
982ee69a
MB
390 if self._v10_key is None:
391 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
392 return None
393
394 return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
395
396 else:
f59f5ef8 397 self._cookie_counts['other'] += 1
982ee69a
MB
398 # other prefixes are considered 'old data' which were stored as plaintext
399 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
400 return encrypted_value
401
402
403class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
404 def __init__(self, browser_root, logger):
405 self._logger = logger
406 self._v10_key = _get_windows_v10_key(browser_root, logger)
f59f5ef8
MB
407 self._cookie_counts = {'v10': 0, 'other': 0}
408
409 @property
410 def cookie_counts(self):
411 return self._cookie_counts
982ee69a
MB
412
413 def decrypt(self, encrypted_value):
414 version = encrypted_value[:3]
415 ciphertext = encrypted_value[3:]
416
417 if version == b'v10':
f59f5ef8 418 self._cookie_counts['v10'] += 1
982ee69a
MB
419 if self._v10_key is None:
420 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
421 return None
982ee69a
MB
422
423 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
424 # kNonceLength
425 nonce_length = 96 // 8
426 # boringssl
427 # EVP_AEAD_AES_GCM_TAG_LEN
428 authentication_tag_length = 16
429
430 raw_ciphertext = ciphertext
431 nonce = raw_ciphertext[:nonce_length]
432 ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length]
433 authentication_tag = raw_ciphertext[-authentication_tag_length:]
434
435 return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger)
436
437 else:
f59f5ef8 438 self._cookie_counts['other'] += 1
982ee69a
MB
439 # any other prefix means the data is DPAPI encrypted
440 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
441 return _decrypt_windows_dpapi(encrypted_value, self._logger).decode('utf-8')
442
443
444def _extract_safari_cookies(profile, logger):
445 if profile is not None:
446 logger.error('safari does not support profiles')
447 if sys.platform != 'darwin':
448 raise ValueError('unsupported platform: {}'.format(sys.platform))
449
450 cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')
451
452 if not os.path.isfile(cookies_path):
453 raise FileNotFoundError('could not find safari cookies database')
454
455 with open(cookies_path, 'rb') as f:
456 cookies_data = f.read()
457
458 jar = parse_safari_cookies(cookies_data, logger=logger)
459 logger.info('Extracted {} cookies from safari'.format(len(jar)))
460 return jar
461
462
463class ParserError(Exception):
464 pass
465
466
467class DataParser:
468 def __init__(self, data, logger):
469 self._data = data
470 self.cursor = 0
471 self._logger = logger
472
473 def read_bytes(self, num_bytes):
474 if num_bytes < 0:
475 raise ParserError('invalid read of {} bytes'.format(num_bytes))
476 end = self.cursor + num_bytes
477 if end > len(self._data):
478 raise ParserError('reached end of input')
479 data = self._data[self.cursor:end]
480 self.cursor = end
481 return data
482
483 def expect_bytes(self, expected_value, message):
484 value = self.read_bytes(len(expected_value))
485 if value != expected_value:
486 raise ParserError('unexpected value: {} != {} ({})'.format(value, expected_value, message))
487
488 def read_uint(self, big_endian=False):
489 data_format = '>I' if big_endian else '<I'
490 return struct.unpack(data_format, self.read_bytes(4))[0]
491
492 def read_double(self, big_endian=False):
493 data_format = '>d' if big_endian else '<d'
494 return struct.unpack(data_format, self.read_bytes(8))[0]
495
496 def read_cstring(self):
497 buffer = []
498 while True:
499 c = self.read_bytes(1)
500 if c == b'\x00':
501 return b''.join(buffer).decode('utf-8')
502 else:
503 buffer.append(c)
504
505 def skip(self, num_bytes, description='unknown'):
506 if num_bytes > 0:
507 self._logger.debug('skipping {} bytes ({}): {}'.format(
508 num_bytes, description, self.read_bytes(num_bytes)))
509 elif num_bytes < 0:
510 raise ParserError('invalid skip of {} bytes'.format(num_bytes))
511
512 def skip_to(self, offset, description='unknown'):
513 self.skip(offset - self.cursor, description)
514
515 def skip_to_end(self, description='unknown'):
516 self.skip_to(len(self._data), description)
517
518
519def _mac_absolute_time_to_posix(timestamp):
520 return int((datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc) + timedelta(seconds=timestamp)).timestamp())
521
522
523def _parse_safari_cookies_header(data, logger):
524 p = DataParser(data, logger)
525 p.expect_bytes(b'cook', 'database signature')
526 number_of_pages = p.read_uint(big_endian=True)
527 page_sizes = [p.read_uint(big_endian=True) for _ in range(number_of_pages)]
528 return page_sizes, p.cursor
529
530
531def _parse_safari_cookies_page(data, jar, logger):
532 p = DataParser(data, logger)
533 p.expect_bytes(b'\x00\x00\x01\x00', 'page signature')
534 number_of_cookies = p.read_uint()
535 record_offsets = [p.read_uint() for _ in range(number_of_cookies)]
536 if number_of_cookies == 0:
537 logger.debug('a cookies page of size {} has no cookies'.format(len(data)))
538 return
539
540 p.skip_to(record_offsets[0], 'unknown page header field')
541
542 for record_offset in record_offsets:
543 p.skip_to(record_offset, 'space between records')
544 record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger)
545 p.read_bytes(record_length)
546 p.skip_to_end('space in between pages')
547
548
549def _parse_safari_cookies_record(data, jar, logger):
550 p = DataParser(data, logger)
551 record_size = p.read_uint()
552 p.skip(4, 'unknown record field 1')
553 flags = p.read_uint()
554 is_secure = bool(flags & 0x0001)
555 p.skip(4, 'unknown record field 2')
556 domain_offset = p.read_uint()
557 name_offset = p.read_uint()
558 path_offset = p.read_uint()
559 value_offset = p.read_uint()
560 p.skip(8, 'unknown record field 3')
561 expiration_date = _mac_absolute_time_to_posix(p.read_double())
562 _creation_date = _mac_absolute_time_to_posix(p.read_double()) # noqa: F841
563
564 try:
565 p.skip_to(domain_offset)
566 domain = p.read_cstring()
567
568 p.skip_to(name_offset)
569 name = p.read_cstring()
570
571 p.skip_to(path_offset)
572 path = p.read_cstring()
573
574 p.skip_to(value_offset)
575 value = p.read_cstring()
576 except UnicodeDecodeError:
f440b14f 577 logger.warning('failed to parse Safari cookie because UTF-8 decoding failed', only_once=True)
982ee69a
MB
578 return record_size
579
580 p.skip_to(record_size, 'space at the end of the record')
581
582 cookie = compat_cookiejar_Cookie(
583 version=0, name=name, value=value, port=None, port_specified=False,
584 domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'),
585 path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False,
586 comment=None, comment_url=None, rest={})
587 jar.set_cookie(cookie)
588 return record_size
589
590
591def parse_safari_cookies(data, jar=None, logger=YDLLogger()):
592 """
593 References:
594 - https://github.com/libyal/dtformats/blob/main/documentation/Safari%20Cookies.asciidoc
595 - this data appears to be out of date but the important parts of the database structure is the same
596 - there are a few bytes here and there which are skipped during parsing
597 """
598 if jar is None:
599 jar = YoutubeDLCookieJar()
600 page_sizes, body_start = _parse_safari_cookies_header(data, logger)
601 p = DataParser(data[body_start:], logger)
602 for page_size in page_sizes:
603 _parse_safari_cookies_page(p.read_bytes(page_size), jar, logger)
604 p.skip_to_end('footer')
605 return jar
606
607
f59f5ef8
MB
608class _LinuxDesktopEnvironment(Enum):
609 """
610 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.h
611 DesktopEnvironment
612 """
613 OTHER = auto()
614 CINNAMON = auto()
615 GNOME = auto()
616 KDE = auto()
617 PANTHEON = auto()
618 UNITY = auto()
619 XFCE = auto()
982ee69a
MB
620
621
f59f5ef8
MB
622class _LinuxKeyring(Enum):
623 """
624 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.h
625 SelectedLinuxBackend
626 """
627 KWALLET = auto()
628 GNOMEKEYRING = auto()
629 BASICTEXT = auto()
630
631
632SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys()
633
634
635def _get_linux_desktop_environment(env):
636 """
637 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc
638 GetDesktopEnvironment
639 """
640 xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
641 desktop_session = env.get('DESKTOP_SESSION', None)
642 if xdg_current_desktop is not None:
643 xdg_current_desktop = xdg_current_desktop.split(':')[0].strip()
644
645 if xdg_current_desktop == 'Unity':
646 if desktop_session is not None and 'gnome-fallback' in desktop_session:
647 return _LinuxDesktopEnvironment.GNOME
648 else:
649 return _LinuxDesktopEnvironment.UNITY
650 elif xdg_current_desktop == 'GNOME':
651 return _LinuxDesktopEnvironment.GNOME
652 elif xdg_current_desktop == 'X-Cinnamon':
653 return _LinuxDesktopEnvironment.CINNAMON
654 elif xdg_current_desktop == 'KDE':
655 return _LinuxDesktopEnvironment.KDE
656 elif xdg_current_desktop == 'Pantheon':
657 return _LinuxDesktopEnvironment.PANTHEON
658 elif xdg_current_desktop == 'XFCE':
659 return _LinuxDesktopEnvironment.XFCE
660 elif desktop_session is not None:
661 if desktop_session in ('mate', 'gnome'):
662 return _LinuxDesktopEnvironment.GNOME
663 elif 'kde' in desktop_session:
664 return _LinuxDesktopEnvironment.KDE
665 elif 'xfce' in desktop_session:
666 return _LinuxDesktopEnvironment.XFCE
667 else:
668 if 'GNOME_DESKTOP_SESSION_ID' in env:
669 return _LinuxDesktopEnvironment.GNOME
670 elif 'KDE_FULL_SESSION' in env:
671 return _LinuxDesktopEnvironment.KDE
fa8fd951 672 return _LinuxDesktopEnvironment.OTHER
f59f5ef8
MB
673
674
675def _choose_linux_keyring(logger):
676 """
677 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.cc
678 SelectBackend
679 """
680 desktop_environment = _get_linux_desktop_environment(os.environ)
681 logger.debug('detected desktop environment: {}'.format(desktop_environment.name))
682 if desktop_environment == _LinuxDesktopEnvironment.KDE:
683 linux_keyring = _LinuxKeyring.KWALLET
684 elif desktop_environment == _LinuxDesktopEnvironment.OTHER:
685 linux_keyring = _LinuxKeyring.BASICTEXT
982ee69a 686 else:
f59f5ef8
MB
687 linux_keyring = _LinuxKeyring.GNOMEKEYRING
688 return linux_keyring
689
690
691def _get_kwallet_network_wallet(logger):
692 """ The name of the wallet used to store network passwords.
693
694 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/kwallet_dbus.cc
695 KWalletDBus::NetworkWallet
696 which does a dbus call to the following function:
697 https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html
698 Wallet::NetworkWallet
699 """
700 default_wallet = 'kdewallet'
701 try:
702 proc = Popen([
703 'dbus-send', '--session', '--print-reply=literal',
704 '--dest=org.kde.kwalletd5',
705 '/modules/kwalletd5',
706 'org.kde.KWallet.networkWallet'
707 ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
708
709 stdout, stderr = proc.communicate_or_kill()
710 if proc.returncode != 0:
711 logger.warning('failed to read NetworkWallet')
712 return default_wallet
713 else:
714 network_wallet = stdout.decode('utf-8').strip()
715 logger.debug('NetworkWallet = "{}"'.format(network_wallet))
716 return network_wallet
717 except BaseException as e:
718 logger.warning('exception while obtaining NetworkWallet: {}'.format(e))
719 return default_wallet
720
721
722def _get_kwallet_password(browser_keyring_name, logger):
723 logger.debug('using kwallet-query to obtain password from kwallet')
724
725 if shutil.which('kwallet-query') is None:
726 logger.error('kwallet-query command not found. KWallet and kwallet-query '
727 'must be installed to read from KWallet. kwallet-query should be'
728 'included in the kwallet package for your distribution')
729 return b''
730
731 network_wallet = _get_kwallet_network_wallet(logger)
732
733 try:
734 proc = Popen([
735 'kwallet-query',
736 '--read-password', '{} Safe Storage'.format(browser_keyring_name),
737 '--folder', '{} Keys'.format(browser_keyring_name),
738 network_wallet
739 ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
740
741 stdout, stderr = proc.communicate_or_kill()
742 if proc.returncode != 0:
743 logger.error('kwallet-query failed with return code {}. Please consult '
744 'the kwallet-query man page for details'.format(proc.returncode))
745 return b''
746 else:
747 if stdout.lower().startswith(b'failed to read'):
748 logger.debug('failed to read password from kwallet. Using empty string instead')
749 # this sometimes occurs in KDE because chrome does not check hasEntry and instead
750 # just tries to read the value (which kwallet returns "") whereas kwallet-query
751 # checks hasEntry. To verify this:
752 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
753 # while starting chrome.
754 # this may be a bug as the intended behaviour is to generate a random password and store
755 # it, but that doesn't matter here.
756 return b''
757 else:
758 logger.debug('password found')
759 if stdout[-1:] == b'\n':
760 stdout = stdout[:-1]
761 return stdout
762 except BaseException as e:
763 logger.warning(f'exception running kwallet-query: {type(e).__name__}({e})')
764 return b''
765
766
767def _get_gnome_keyring_password(browser_keyring_name, logger):
768 if not SECRETSTORAGE_AVAILABLE:
769 logger.error('secretstorage not available {}'.format(SECRETSTORAGE_UNAVAILABLE_REASON))
770 return b''
771 # the Gnome keyring does not seem to organise keys in the same way as KWallet,
772 # using `dbus-monitor` during startup, it can be observed that chromium lists all keys
773 # and presumably searches for its key in the list. It appears that we must do the same.
774 # https://github.com/jaraco/keyring/issues/556
775 with contextlib.closing(secretstorage.dbus_init()) as con:
776 col = secretstorage.get_default_collection(con)
777 for item in col.get_all_items():
778 if item.get_label() == '{} Safe Storage'.format(browser_keyring_name):
779 return item.get_secret()
780 else:
781 logger.error('failed to read from keyring')
782 return b''
783
784
785def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
786 # note: chrome/chromium can be run with the following flags to determine which keyring backend
787 # it has chosen to use
788 # chromium --enable-logging=stderr --v=1 2>&1 | grep key_storage_
789 # Chromium supports a flag: --password-store=<basic|gnome|kwallet> so the automatic detection
790 # will not be sufficient in all cases.
791
2c539d49 792 keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger)
f59f5ef8
MB
793 logger.debug(f'Chosen keyring: {keyring.name}')
794
795 if keyring == _LinuxKeyring.KWALLET:
796 return _get_kwallet_password(browser_keyring_name, logger)
797 elif keyring == _LinuxKeyring.GNOMEKEYRING:
798 return _get_gnome_keyring_password(browser_keyring_name, logger)
799 elif keyring == _LinuxKeyring.BASICTEXT:
800 # when basic text is chosen, all cookies are stored as v10 (so no keyring password is required)
801 return None
802 assert False, f'Unknown keyring {keyring}'
803
804
805def _get_mac_keyring_password(browser_keyring_name, logger):
806 logger.debug('using find-generic-password to obtain password from OSX keychain')
807 try:
d3c93ec2 808 proc = Popen(
809 ['security', 'find-generic-password',
810 '-w', # write password to stdout
811 '-a', browser_keyring_name, # match 'account'
812 '-s', '{} Safe Storage'.format(browser_keyring_name)], # match 'service'
813 stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
f59f5ef8
MB
814
815 stdout, stderr = proc.communicate_or_kill()
816 if stdout[-1:] == b'\n':
817 stdout = stdout[:-1]
818 return stdout
819 except BaseException as e:
820 logger.warning(f'exception running find-generic-password: {type(e).__name__}({e})')
821 return None
982ee69a
MB
822
823
824def _get_windows_v10_key(browser_root, logger):
825 path = _find_most_recently_used_file(browser_root, 'Local State')
826 if path is None:
827 logger.error('could not find local state file')
828 return None
ad0090d0 829 with open(path, 'r', encoding='utf8') as f:
982ee69a
MB
830 data = json.load(f)
831 try:
832 base64_key = data['os_crypt']['encrypted_key']
833 except KeyError:
834 logger.error('no encrypted key in Local State')
835 return None
836 encrypted_key = compat_b64decode(base64_key)
837 prefix = b'DPAPI'
838 if not encrypted_key.startswith(prefix):
839 logger.error('invalid key')
840 return None
841 return _decrypt_windows_dpapi(encrypted_key[len(prefix):], logger)
842
843
844def pbkdf2_sha1(password, salt, iterations, key_length):
845 return pbkdf2_hmac('sha1', password, salt, iterations, key_length)
846
847
848def _decrypt_aes_cbc(ciphertext, key, logger, initialization_vector=b' ' * 16):
09906f55 849 plaintext = aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector)
982ee69a
MB
850 padding_length = plaintext[-1]
851 try:
09906f55 852 return plaintext[:-padding_length].decode('utf-8')
982ee69a 853 except UnicodeDecodeError:
f440b14f 854 logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
855 return None
856
857
858def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
982ee69a 859 try:
09906f55 860 plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce)
982ee69a 861 except ValueError:
f440b14f 862 logger.warning('failed to decrypt cookie (AES-GCM) because the MAC check failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
863 return None
864
865 try:
866 return plaintext.decode('utf-8')
867 except UnicodeDecodeError:
f440b14f 868 logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
982ee69a
MB
869 return None
870
871
872def _decrypt_windows_dpapi(ciphertext, logger):
873 """
874 References:
875 - https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata
876 """
877 from ctypes.wintypes import DWORD
878
879 class DATA_BLOB(ctypes.Structure):
880 _fields_ = [('cbData', DWORD),
881 ('pbData', ctypes.POINTER(ctypes.c_char))]
882
883 buffer = ctypes.create_string_buffer(ciphertext)
884 blob_in = DATA_BLOB(ctypes.sizeof(buffer), buffer)
885 blob_out = DATA_BLOB()
886 ret = ctypes.windll.crypt32.CryptUnprotectData(
887 ctypes.byref(blob_in), # pDataIn
888 None, # ppszDataDescr: human readable description of pDataIn
889 None, # pOptionalEntropy: salt?
890 None, # pvReserved: must be NULL
891 None, # pPromptStruct: information about prompts to display
892 0, # dwFlags
893 ctypes.byref(blob_out) # pDataOut
894 )
895 if not ret:
f9be9cb9 896 logger.warning('failed to decrypt with DPAPI', only_once=True)
982ee69a
MB
897 return None
898
899 result = ctypes.string_at(blob_out.pbData, blob_out.cbData)
900 ctypes.windll.kernel32.LocalFree(blob_out.pbData)
901 return result
902
903
904def _config_home():
905 return os.environ.get('XDG_CONFIG_HOME', os.path.expanduser('~/.config'))
906
907
908def _open_database_copy(database_path, tmpdir):
909 # cannot open sqlite databases if they are already in use (e.g. by the browser)
910 database_copy_path = os.path.join(tmpdir, 'temporary.sqlite')
911 shutil.copy(database_path, database_copy_path)
912 conn = sqlite3.connect(database_copy_path)
913 return conn.cursor()
914
915
916def _get_column_names(cursor, table_name):
917 table_info = cursor.execute('PRAGMA table_info({})'.format(table_name)).fetchall()
918 return [row[1].decode('utf-8') for row in table_info]
919
920
921def _find_most_recently_used_file(root, filename):
922 # if there are multiple browser profiles, take the most recently used one
923 paths = []
924 for root, dirs, files in os.walk(root):
925 for file in files:
926 if file == filename:
927 paths.append(os.path.join(root, file))
928 return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime)
929
930
931def _merge_cookie_jars(jars):
932 output_jar = YoutubeDLCookieJar()
933 for jar in jars:
934 for cookie in jar:
935 output_jar.set_cookie(cookie)
936 if jar.filename is not None:
937 output_jar.filename = jar.filename
938 return output_jar
939
940
941def _is_path(value):
942 return os.path.sep in value
943
944
f59f5ef8 945def _parse_browser_specification(browser_name, profile=None, keyring=None):
982ee69a
MB
946 if browser_name not in SUPPORTED_BROWSERS:
947 raise ValueError(f'unsupported browser: "{browser_name}"')
f59f5ef8
MB
948 if keyring not in (None, *SUPPORTED_KEYRINGS):
949 raise ValueError(f'unsupported keyring: "{keyring}"')
982ee69a
MB
950 if profile is not None and _is_path(profile):
951 profile = os.path.expanduser(profile)
f59f5ef8 952 return browser_name, profile, keyring