]> jfr.im git - yt-dlp.git/blob - yt_dlp/cookies.py
[cleanup] Misc fixes
[yt-dlp.git] / yt_dlp / cookies.py
1 import contextlib
2 import ctypes
3 import json
4 import os
5 import shutil
6 import struct
7 import subprocess
8 import sys
9 import tempfile
10 from datetime import datetime, timedelta, timezone
11 from enum import Enum, auto
12 from hashlib import pbkdf2_hmac
13
14 from .aes import (
15 aes_cbc_decrypt_bytes,
16 aes_gcm_decrypt_and_verify_bytes,
17 unpad_pkcs7,
18 )
19 from .compat import (
20 compat_b64decode,
21 compat_cookiejar_Cookie,
22 )
23 from .utils import (
24 error_to_str,
25 expand_path,
26 Popen,
27 YoutubeDLCookieJar,
28 )
29
30 try:
31 import sqlite3
32 SQLITE_AVAILABLE = True
33 except ImportError:
34 # although sqlite3 is part of the standard library, it is possible to compile python without
35 # sqlite support. See: https://github.com/yt-dlp/yt-dlp/issues/544
36 SQLITE_AVAILABLE = False
37
38
39 try:
40 import secretstorage
41 SECRETSTORAGE_AVAILABLE = True
42 except ImportError:
43 SECRETSTORAGE_AVAILABLE = False
44 SECRETSTORAGE_UNAVAILABLE_REASON = (
45 'as the `secretstorage` module is not installed. '
46 'Please install by running `python3 -m pip install secretstorage`.')
47 except Exception as _err:
48 SECRETSTORAGE_AVAILABLE = False
49 SECRETSTORAGE_UNAVAILABLE_REASON = f'as the `secretstorage` module could not be initialized. {_err}'
50
51
52 CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
53 SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
54
55
56 class YDLLogger:
57 def __init__(self, ydl=None):
58 self._ydl = ydl
59
60 def debug(self, message):
61 if self._ydl:
62 self._ydl.write_debug(message)
63
64 def info(self, message):
65 if self._ydl:
66 self._ydl.to_screen(f'[Cookies] {message}')
67
68 def warning(self, message, only_once=False):
69 if self._ydl:
70 self._ydl.report_warning(message, only_once)
71
72 def error(self, message):
73 if self._ydl:
74 self._ydl.report_error(message)
75
76
77 def load_cookies(cookie_file, browser_specification, ydl):
78 cookie_jars = []
79 if browser_specification is not None:
80 browser_name, profile, keyring = _parse_browser_specification(*browser_specification)
81 cookie_jars.append(extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring))
82
83 if cookie_file is not None:
84 cookie_file = expand_path(cookie_file)
85 jar = YoutubeDLCookieJar(cookie_file)
86 if os.access(cookie_file, os.R_OK):
87 jar.load(ignore_discard=True, ignore_expires=True)
88 cookie_jars.append(jar)
89
90 return _merge_cookie_jars(cookie_jars)
91
92
93 def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None):
94 if browser_name == 'firefox':
95 return _extract_firefox_cookies(profile, logger)
96 elif browser_name == 'safari':
97 return _extract_safari_cookies(profile, logger)
98 elif browser_name in CHROMIUM_BASED_BROWSERS:
99 return _extract_chrome_cookies(browser_name, profile, keyring, logger)
100 else:
101 raise ValueError('unknown browser: {}'.format(browser_name))
102
103
104 def _extract_firefox_cookies(profile, logger):
105 logger.info('Extracting cookies from firefox')
106 if not SQLITE_AVAILABLE:
107 logger.warning('Cannot extract cookies from firefox without sqlite3 support. '
108 'Please use a python interpreter compiled with sqlite3 support')
109 return YoutubeDLCookieJar()
110
111 if profile is None:
112 search_root = _firefox_browser_dir()
113 elif _is_path(profile):
114 search_root = profile
115 else:
116 search_root = os.path.join(_firefox_browser_dir(), profile)
117
118 cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite')
119 if cookie_database_path is None:
120 raise FileNotFoundError('could not find firefox cookies database in {}'.format(search_root))
121 logger.debug('Extracting cookies from: "{}"'.format(cookie_database_path))
122
123 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
124 cursor = None
125 try:
126 cursor = _open_database_copy(cookie_database_path, tmpdir)
127 cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
128 jar = YoutubeDLCookieJar()
129 for host, name, value, path, expiry, is_secure in cursor.fetchall():
130 cookie = compat_cookiejar_Cookie(
131 version=0, name=name, value=value, port=None, port_specified=False,
132 domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'),
133 path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False,
134 comment=None, comment_url=None, rest={})
135 jar.set_cookie(cookie)
136 logger.info('Extracted {} cookies from firefox'.format(len(jar)))
137 return jar
138 finally:
139 if cursor is not None:
140 cursor.connection.close()
141
142
143 def _firefox_browser_dir():
144 if sys.platform in ('linux', 'linux2'):
145 return os.path.expanduser('~/.mozilla/firefox')
146 elif sys.platform == 'win32':
147 return os.path.expandvars(r'%APPDATA%\Mozilla\Firefox\Profiles')
148 elif sys.platform == 'darwin':
149 return os.path.expanduser('~/Library/Application Support/Firefox')
150 else:
151 raise ValueError('unsupported platform: {}'.format(sys.platform))
152
153
154 def _get_chromium_based_browser_settings(browser_name):
155 # https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md
156 if sys.platform in ('linux', 'linux2'):
157 config = _config_home()
158 browser_dir = {
159 'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'),
160 'chrome': os.path.join(config, 'google-chrome'),
161 'chromium': os.path.join(config, 'chromium'),
162 'edge': os.path.join(config, 'microsoft-edge'),
163 'opera': os.path.join(config, 'opera'),
164 'vivaldi': os.path.join(config, 'vivaldi'),
165 }[browser_name]
166
167 elif sys.platform == 'win32':
168 appdata_local = os.path.expandvars('%LOCALAPPDATA%')
169 appdata_roaming = os.path.expandvars('%APPDATA%')
170 browser_dir = {
171 'brave': os.path.join(appdata_local, r'BraveSoftware\Brave-Browser\User Data'),
172 'chrome': os.path.join(appdata_local, r'Google\Chrome\User Data'),
173 'chromium': os.path.join(appdata_local, r'Chromium\User Data'),
174 'edge': os.path.join(appdata_local, r'Microsoft\Edge\User Data'),
175 'opera': os.path.join(appdata_roaming, r'Opera Software\Opera Stable'),
176 'vivaldi': os.path.join(appdata_local, r'Vivaldi\User Data'),
177 }[browser_name]
178
179 elif sys.platform == 'darwin':
180 appdata = os.path.expanduser('~/Library/Application Support')
181 browser_dir = {
182 'brave': os.path.join(appdata, 'BraveSoftware/Brave-Browser'),
183 'chrome': os.path.join(appdata, 'Google/Chrome'),
184 'chromium': os.path.join(appdata, 'Chromium'),
185 'edge': os.path.join(appdata, 'Microsoft Edge'),
186 'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
187 'vivaldi': os.path.join(appdata, 'Vivaldi'),
188 }[browser_name]
189
190 else:
191 raise ValueError('unsupported platform: {}'.format(sys.platform))
192
193 # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
194 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
195 keyring_name = {
196 'brave': 'Brave',
197 'chrome': 'Chrome',
198 'chromium': 'Chromium',
199 'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
200 'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
201 'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
202 }[browser_name]
203
204 browsers_without_profiles = {'opera'}
205
206 return {
207 'browser_dir': browser_dir,
208 'keyring_name': keyring_name,
209 'supports_profiles': browser_name not in browsers_without_profiles
210 }
211
212
213 def _extract_chrome_cookies(browser_name, profile, keyring, logger):
214 logger.info('Extracting cookies from {}'.format(browser_name))
215
216 if not SQLITE_AVAILABLE:
217 logger.warning(('Cannot extract cookies from {} without sqlite3 support. '
218 'Please use a python interpreter compiled with sqlite3 support').format(browser_name))
219 return YoutubeDLCookieJar()
220
221 config = _get_chromium_based_browser_settings(browser_name)
222
223 if profile is None:
224 search_root = config['browser_dir']
225 elif _is_path(profile):
226 search_root = profile
227 config['browser_dir'] = os.path.dirname(profile) if config['supports_profiles'] else profile
228 else:
229 if config['supports_profiles']:
230 search_root = os.path.join(config['browser_dir'], profile)
231 else:
232 logger.error('{} does not support profiles'.format(browser_name))
233 search_root = config['browser_dir']
234
235 cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies')
236 if cookie_database_path is None:
237 raise FileNotFoundError('could not find {} cookies database in "{}"'.format(browser_name, search_root))
238 logger.debug('Extracting cookies from: "{}"'.format(cookie_database_path))
239
240 decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring)
241
242 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
243 cursor = None
244 try:
245 cursor = _open_database_copy(cookie_database_path, tmpdir)
246 cursor.connection.text_factory = bytes
247 column_names = _get_column_names(cursor, 'cookies')
248 secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
249 cursor.execute('SELECT host_key, name, value, encrypted_value, path, '
250 'expires_utc, {} FROM cookies'.format(secure_column))
251 jar = YoutubeDLCookieJar()
252 failed_cookies = 0
253 unencrypted_cookies = 0
254 for host_key, name, value, encrypted_value, path, expires_utc, is_secure in cursor.fetchall():
255 host_key = host_key.decode('utf-8')
256 name = name.decode('utf-8')
257 value = value.decode('utf-8')
258 path = path.decode('utf-8')
259
260 if not value and encrypted_value:
261 value = decryptor.decrypt(encrypted_value)
262 if value is None:
263 failed_cookies += 1
264 continue
265 else:
266 unencrypted_cookies += 1
267
268 cookie = compat_cookiejar_Cookie(
269 version=0, name=name, value=value, port=None, port_specified=False,
270 domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
271 path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False,
272 comment=None, comment_url=None, rest={})
273 jar.set_cookie(cookie)
274 if failed_cookies > 0:
275 failed_message = ' ({} could not be decrypted)'.format(failed_cookies)
276 else:
277 failed_message = ''
278 logger.info('Extracted {} cookies from {}{}'.format(len(jar), browser_name, failed_message))
279 counts = decryptor.cookie_counts.copy()
280 counts['unencrypted'] = unencrypted_cookies
281 logger.debug('cookie version breakdown: {}'.format(counts))
282 return jar
283 finally:
284 if cursor is not None:
285 cursor.connection.close()
286
287
288 class ChromeCookieDecryptor:
289 """
290 Overview:
291
292 Linux:
293 - cookies are either v10 or v11
294 - v10: AES-CBC encrypted with a fixed key
295 - v11: AES-CBC encrypted with an OS protected key (keyring)
296 - v11 keys can be stored in various places depending on the activate desktop environment [2]
297
298 Mac:
299 - cookies are either v10 or not v10
300 - v10: AES-CBC encrypted with an OS protected key (keyring) and more key derivation iterations than linux
301 - not v10: 'old data' stored as plaintext
302
303 Windows:
304 - cookies are either v10 or not v10
305 - v10: AES-GCM encrypted with a key which is encrypted with DPAPI
306 - not v10: encrypted with DPAPI
307
308 Sources:
309 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/
310 - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_linux.cc
311 - KeyStorageLinux::CreateService
312 """
313
314 def decrypt(self, encrypted_value):
315 raise NotImplementedError
316
317 @property
318 def cookie_counts(self):
319 raise NotImplementedError
320
321
322 def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None):
323 if sys.platform in ('linux', 'linux2'):
324 return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring)
325 elif sys.platform == 'darwin':
326 return MacChromeCookieDecryptor(browser_keyring_name, logger)
327 elif sys.platform == 'win32':
328 return WindowsChromeCookieDecryptor(browser_root, logger)
329 else:
330 raise NotImplementedError('Chrome cookie decryption is not supported '
331 'on this platform: {}'.format(sys.platform))
332
333
334 class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
335 def __init__(self, browser_keyring_name, logger, *, keyring=None):
336 self._logger = logger
337 self._v10_key = self.derive_key(b'peanuts')
338 password = _get_linux_keyring_password(browser_keyring_name, keyring, logger)
339 self._v11_key = None if password is None else self.derive_key(password)
340 self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
341
342 @staticmethod
343 def derive_key(password):
344 # values from
345 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_linux.cc
346 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16)
347
348 @property
349 def cookie_counts(self):
350 return self._cookie_counts
351
352 def decrypt(self, encrypted_value):
353 version = encrypted_value[:3]
354 ciphertext = encrypted_value[3:]
355
356 if version == b'v10':
357 self._cookie_counts['v10'] += 1
358 return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
359
360 elif version == b'v11':
361 self._cookie_counts['v11'] += 1
362 if self._v11_key is None:
363 self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
364 return None
365 return _decrypt_aes_cbc(ciphertext, self._v11_key, self._logger)
366
367 else:
368 self._cookie_counts['other'] += 1
369 return None
370
371
372 class MacChromeCookieDecryptor(ChromeCookieDecryptor):
373 def __init__(self, browser_keyring_name, logger):
374 self._logger = logger
375 password = _get_mac_keyring_password(browser_keyring_name, logger)
376 self._v10_key = None if password is None else self.derive_key(password)
377 self._cookie_counts = {'v10': 0, 'other': 0}
378
379 @staticmethod
380 def derive_key(password):
381 # values from
382 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
383 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16)
384
385 @property
386 def cookie_counts(self):
387 return self._cookie_counts
388
389 def decrypt(self, encrypted_value):
390 version = encrypted_value[:3]
391 ciphertext = encrypted_value[3:]
392
393 if version == b'v10':
394 self._cookie_counts['v10'] += 1
395 if self._v10_key is None:
396 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
397 return None
398
399 return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
400
401 else:
402 self._cookie_counts['other'] += 1
403 # other prefixes are considered 'old data' which were stored as plaintext
404 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
405 return encrypted_value
406
407
408 class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
409 def __init__(self, browser_root, logger):
410 self._logger = logger
411 self._v10_key = _get_windows_v10_key(browser_root, logger)
412 self._cookie_counts = {'v10': 0, 'other': 0}
413
414 @property
415 def cookie_counts(self):
416 return self._cookie_counts
417
418 def decrypt(self, encrypted_value):
419 version = encrypted_value[:3]
420 ciphertext = encrypted_value[3:]
421
422 if version == b'v10':
423 self._cookie_counts['v10'] += 1
424 if self._v10_key is None:
425 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
426 return None
427
428 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
429 # kNonceLength
430 nonce_length = 96 // 8
431 # boringssl
432 # EVP_AEAD_AES_GCM_TAG_LEN
433 authentication_tag_length = 16
434
435 raw_ciphertext = ciphertext
436 nonce = raw_ciphertext[:nonce_length]
437 ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length]
438 authentication_tag = raw_ciphertext[-authentication_tag_length:]
439
440 return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger)
441
442 else:
443 self._cookie_counts['other'] += 1
444 # any other prefix means the data is DPAPI encrypted
445 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
446 return _decrypt_windows_dpapi(encrypted_value, self._logger).decode('utf-8')
447
448
449 def _extract_safari_cookies(profile, logger):
450 if profile is not None:
451 logger.error('safari does not support profiles')
452 if sys.platform != 'darwin':
453 raise ValueError('unsupported platform: {}'.format(sys.platform))
454
455 cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')
456
457 if not os.path.isfile(cookies_path):
458 logger.debug('Trying secondary cookie location')
459 cookies_path = os.path.expanduser('~/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies')
460 if not os.path.isfile(cookies_path):
461 raise FileNotFoundError('could not find safari cookies database')
462
463 with open(cookies_path, 'rb') as f:
464 cookies_data = f.read()
465
466 jar = parse_safari_cookies(cookies_data, logger=logger)
467 logger.info('Extracted {} cookies from safari'.format(len(jar)))
468 return jar
469
470
471 class ParserError(Exception):
472 pass
473
474
475 class DataParser:
476 def __init__(self, data, logger):
477 self._data = data
478 self.cursor = 0
479 self._logger = logger
480
481 def read_bytes(self, num_bytes):
482 if num_bytes < 0:
483 raise ParserError('invalid read of {} bytes'.format(num_bytes))
484 end = self.cursor + num_bytes
485 if end > len(self._data):
486 raise ParserError('reached end of input')
487 data = self._data[self.cursor:end]
488 self.cursor = end
489 return data
490
491 def expect_bytes(self, expected_value, message):
492 value = self.read_bytes(len(expected_value))
493 if value != expected_value:
494 raise ParserError('unexpected value: {} != {} ({})'.format(value, expected_value, message))
495
496 def read_uint(self, big_endian=False):
497 data_format = '>I' if big_endian else '<I'
498 return struct.unpack(data_format, self.read_bytes(4))[0]
499
500 def read_double(self, big_endian=False):
501 data_format = '>d' if big_endian else '<d'
502 return struct.unpack(data_format, self.read_bytes(8))[0]
503
504 def read_cstring(self):
505 buffer = []
506 while True:
507 c = self.read_bytes(1)
508 if c == b'\x00':
509 return b''.join(buffer).decode('utf-8')
510 else:
511 buffer.append(c)
512
513 def skip(self, num_bytes, description='unknown'):
514 if num_bytes > 0:
515 self._logger.debug('skipping {} bytes ({}): {}'.format(
516 num_bytes, description, self.read_bytes(num_bytes)))
517 elif num_bytes < 0:
518 raise ParserError('invalid skip of {} bytes'.format(num_bytes))
519
520 def skip_to(self, offset, description='unknown'):
521 self.skip(offset - self.cursor, description)
522
523 def skip_to_end(self, description='unknown'):
524 self.skip_to(len(self._data), description)
525
526
527 def _mac_absolute_time_to_posix(timestamp):
528 return int((datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc) + timedelta(seconds=timestamp)).timestamp())
529
530
531 def _parse_safari_cookies_header(data, logger):
532 p = DataParser(data, logger)
533 p.expect_bytes(b'cook', 'database signature')
534 number_of_pages = p.read_uint(big_endian=True)
535 page_sizes = [p.read_uint(big_endian=True) for _ in range(number_of_pages)]
536 return page_sizes, p.cursor
537
538
539 def _parse_safari_cookies_page(data, jar, logger):
540 p = DataParser(data, logger)
541 p.expect_bytes(b'\x00\x00\x01\x00', 'page signature')
542 number_of_cookies = p.read_uint()
543 record_offsets = [p.read_uint() for _ in range(number_of_cookies)]
544 if number_of_cookies == 0:
545 logger.debug('a cookies page of size {} has no cookies'.format(len(data)))
546 return
547
548 p.skip_to(record_offsets[0], 'unknown page header field')
549
550 for record_offset in record_offsets:
551 p.skip_to(record_offset, 'space between records')
552 record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger)
553 p.read_bytes(record_length)
554 p.skip_to_end('space in between pages')
555
556
557 def _parse_safari_cookies_record(data, jar, logger):
558 p = DataParser(data, logger)
559 record_size = p.read_uint()
560 p.skip(4, 'unknown record field 1')
561 flags = p.read_uint()
562 is_secure = bool(flags & 0x0001)
563 p.skip(4, 'unknown record field 2')
564 domain_offset = p.read_uint()
565 name_offset = p.read_uint()
566 path_offset = p.read_uint()
567 value_offset = p.read_uint()
568 p.skip(8, 'unknown record field 3')
569 expiration_date = _mac_absolute_time_to_posix(p.read_double())
570 _creation_date = _mac_absolute_time_to_posix(p.read_double()) # noqa: F841
571
572 try:
573 p.skip_to(domain_offset)
574 domain = p.read_cstring()
575
576 p.skip_to(name_offset)
577 name = p.read_cstring()
578
579 p.skip_to(path_offset)
580 path = p.read_cstring()
581
582 p.skip_to(value_offset)
583 value = p.read_cstring()
584 except UnicodeDecodeError:
585 logger.warning('failed to parse Safari cookie because UTF-8 decoding failed', only_once=True)
586 return record_size
587
588 p.skip_to(record_size, 'space at the end of the record')
589
590 cookie = compat_cookiejar_Cookie(
591 version=0, name=name, value=value, port=None, port_specified=False,
592 domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'),
593 path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False,
594 comment=None, comment_url=None, rest={})
595 jar.set_cookie(cookie)
596 return record_size
597
598
599 def parse_safari_cookies(data, jar=None, logger=YDLLogger()):
600 """
601 References:
602 - https://github.com/libyal/dtformats/blob/main/documentation/Safari%20Cookies.asciidoc
603 - this data appears to be out of date but the important parts of the database structure is the same
604 - there are a few bytes here and there which are skipped during parsing
605 """
606 if jar is None:
607 jar = YoutubeDLCookieJar()
608 page_sizes, body_start = _parse_safari_cookies_header(data, logger)
609 p = DataParser(data[body_start:], logger)
610 for page_size in page_sizes:
611 _parse_safari_cookies_page(p.read_bytes(page_size), jar, logger)
612 p.skip_to_end('footer')
613 return jar
614
615
616 class _LinuxDesktopEnvironment(Enum):
617 """
618 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.h
619 DesktopEnvironment
620 """
621 OTHER = auto()
622 CINNAMON = auto()
623 GNOME = auto()
624 KDE = auto()
625 PANTHEON = auto()
626 UNITY = auto()
627 XFCE = auto()
628
629
630 class _LinuxKeyring(Enum):
631 """
632 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.h
633 SelectedLinuxBackend
634 """
635 KWALLET = auto()
636 GNOMEKEYRING = auto()
637 BASICTEXT = auto()
638
639
640 SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys()
641
642
643 def _get_linux_desktop_environment(env):
644 """
645 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc
646 GetDesktopEnvironment
647 """
648 xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
649 desktop_session = env.get('DESKTOP_SESSION', None)
650 if xdg_current_desktop is not None:
651 xdg_current_desktop = xdg_current_desktop.split(':')[0].strip()
652
653 if xdg_current_desktop == 'Unity':
654 if desktop_session is not None and 'gnome-fallback' in desktop_session:
655 return _LinuxDesktopEnvironment.GNOME
656 else:
657 return _LinuxDesktopEnvironment.UNITY
658 elif xdg_current_desktop == 'GNOME':
659 return _LinuxDesktopEnvironment.GNOME
660 elif xdg_current_desktop == 'X-Cinnamon':
661 return _LinuxDesktopEnvironment.CINNAMON
662 elif xdg_current_desktop == 'KDE':
663 return _LinuxDesktopEnvironment.KDE
664 elif xdg_current_desktop == 'Pantheon':
665 return _LinuxDesktopEnvironment.PANTHEON
666 elif xdg_current_desktop == 'XFCE':
667 return _LinuxDesktopEnvironment.XFCE
668 elif desktop_session is not None:
669 if desktop_session in ('mate', 'gnome'):
670 return _LinuxDesktopEnvironment.GNOME
671 elif 'kde' in desktop_session:
672 return _LinuxDesktopEnvironment.KDE
673 elif 'xfce' in desktop_session:
674 return _LinuxDesktopEnvironment.XFCE
675 else:
676 if 'GNOME_DESKTOP_SESSION_ID' in env:
677 return _LinuxDesktopEnvironment.GNOME
678 elif 'KDE_FULL_SESSION' in env:
679 return _LinuxDesktopEnvironment.KDE
680 return _LinuxDesktopEnvironment.OTHER
681
682
683 def _choose_linux_keyring(logger):
684 """
685 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.cc
686 SelectBackend
687 """
688 desktop_environment = _get_linux_desktop_environment(os.environ)
689 logger.debug('detected desktop environment: {}'.format(desktop_environment.name))
690 if desktop_environment == _LinuxDesktopEnvironment.KDE:
691 linux_keyring = _LinuxKeyring.KWALLET
692 elif desktop_environment == _LinuxDesktopEnvironment.OTHER:
693 linux_keyring = _LinuxKeyring.BASICTEXT
694 else:
695 linux_keyring = _LinuxKeyring.GNOMEKEYRING
696 return linux_keyring
697
698
699 def _get_kwallet_network_wallet(logger):
700 """ The name of the wallet used to store network passwords.
701
702 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/kwallet_dbus.cc
703 KWalletDBus::NetworkWallet
704 which does a dbus call to the following function:
705 https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html
706 Wallet::NetworkWallet
707 """
708 default_wallet = 'kdewallet'
709 try:
710 proc = Popen([
711 'dbus-send', '--session', '--print-reply=literal',
712 '--dest=org.kde.kwalletd5',
713 '/modules/kwalletd5',
714 'org.kde.KWallet.networkWallet'
715 ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
716
717 stdout, stderr = proc.communicate_or_kill()
718 if proc.returncode != 0:
719 logger.warning('failed to read NetworkWallet')
720 return default_wallet
721 else:
722 network_wallet = stdout.decode('utf-8').strip()
723 logger.debug('NetworkWallet = "{}"'.format(network_wallet))
724 return network_wallet
725 except Exception as e:
726 logger.warning('exception while obtaining NetworkWallet: {}'.format(e))
727 return default_wallet
728
729
730 def _get_kwallet_password(browser_keyring_name, logger):
731 logger.debug('using kwallet-query to obtain password from kwallet')
732
733 if shutil.which('kwallet-query') is None:
734 logger.error('kwallet-query command not found. KWallet and kwallet-query '
735 'must be installed to read from KWallet. kwallet-query should be'
736 'included in the kwallet package for your distribution')
737 return b''
738
739 network_wallet = _get_kwallet_network_wallet(logger)
740
741 try:
742 proc = Popen([
743 'kwallet-query',
744 '--read-password', '{} Safe Storage'.format(browser_keyring_name),
745 '--folder', '{} Keys'.format(browser_keyring_name),
746 network_wallet
747 ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
748
749 stdout, stderr = proc.communicate_or_kill()
750 if proc.returncode != 0:
751 logger.error('kwallet-query failed with return code {}. Please consult '
752 'the kwallet-query man page for details'.format(proc.returncode))
753 return b''
754 else:
755 if stdout.lower().startswith(b'failed to read'):
756 logger.debug('failed to read password from kwallet. Using empty string instead')
757 # this sometimes occurs in KDE because chrome does not check hasEntry and instead
758 # just tries to read the value (which kwallet returns "") whereas kwallet-query
759 # checks hasEntry. To verify this:
760 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
761 # while starting chrome.
762 # this may be a bug as the intended behaviour is to generate a random password and store
763 # it, but that doesn't matter here.
764 return b''
765 else:
766 logger.debug('password found')
767 if stdout[-1:] == b'\n':
768 stdout = stdout[:-1]
769 return stdout
770 except Exception as e:
771 logger.warning(f'exception running kwallet-query: {error_to_str(e)}')
772 return b''
773
774
775 def _get_gnome_keyring_password(browser_keyring_name, logger):
776 if not SECRETSTORAGE_AVAILABLE:
777 logger.error('secretstorage not available {}'.format(SECRETSTORAGE_UNAVAILABLE_REASON))
778 return b''
779 # the Gnome keyring does not seem to organise keys in the same way as KWallet,
780 # using `dbus-monitor` during startup, it can be observed that chromium lists all keys
781 # and presumably searches for its key in the list. It appears that we must do the same.
782 # https://github.com/jaraco/keyring/issues/556
783 with contextlib.closing(secretstorage.dbus_init()) as con:
784 col = secretstorage.get_default_collection(con)
785 for item in col.get_all_items():
786 if item.get_label() == '{} Safe Storage'.format(browser_keyring_name):
787 return item.get_secret()
788 else:
789 logger.error('failed to read from keyring')
790 return b''
791
792
793 def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
794 # note: chrome/chromium can be run with the following flags to determine which keyring backend
795 # it has chosen to use
796 # chromium --enable-logging=stderr --v=1 2>&1 | grep key_storage_
797 # Chromium supports a flag: --password-store=<basic|gnome|kwallet> so the automatic detection
798 # will not be sufficient in all cases.
799
800 keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger)
801 logger.debug(f'Chosen keyring: {keyring.name}')
802
803 if keyring == _LinuxKeyring.KWALLET:
804 return _get_kwallet_password(browser_keyring_name, logger)
805 elif keyring == _LinuxKeyring.GNOMEKEYRING:
806 return _get_gnome_keyring_password(browser_keyring_name, logger)
807 elif keyring == _LinuxKeyring.BASICTEXT:
808 # when basic text is chosen, all cookies are stored as v10 (so no keyring password is required)
809 return None
810 assert False, f'Unknown keyring {keyring}'
811
812
813 def _get_mac_keyring_password(browser_keyring_name, logger):
814 logger.debug('using find-generic-password to obtain password from OSX keychain')
815 try:
816 proc = Popen(
817 ['security', 'find-generic-password',
818 '-w', # write password to stdout
819 '-a', browser_keyring_name, # match 'account'
820 '-s', '{} Safe Storage'.format(browser_keyring_name)], # match 'service'
821 stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
822
823 stdout, stderr = proc.communicate_or_kill()
824 if stdout[-1:] == b'\n':
825 stdout = stdout[:-1]
826 return stdout
827 except Exception as e:
828 logger.warning(f'exception running find-generic-password: {error_to_str(e)}')
829 return None
830
831
832 def _get_windows_v10_key(browser_root, logger):
833 path = _find_most_recently_used_file(browser_root, 'Local State')
834 if path is None:
835 logger.error('could not find local state file')
836 return None
837 with open(path, 'r', encoding='utf8') as f:
838 data = json.load(f)
839 try:
840 base64_key = data['os_crypt']['encrypted_key']
841 except KeyError:
842 logger.error('no encrypted key in Local State')
843 return None
844 encrypted_key = compat_b64decode(base64_key)
845 prefix = b'DPAPI'
846 if not encrypted_key.startswith(prefix):
847 logger.error('invalid key')
848 return None
849 return _decrypt_windows_dpapi(encrypted_key[len(prefix):], logger)
850
851
852 def pbkdf2_sha1(password, salt, iterations, key_length):
853 return pbkdf2_hmac('sha1', password, salt, iterations, key_length)
854
855
856 def _decrypt_aes_cbc(ciphertext, key, logger, initialization_vector=b' ' * 16):
857 plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
858 try:
859 return plaintext.decode('utf-8')
860 except UnicodeDecodeError:
861 logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
862 return None
863
864
865 def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
866 try:
867 plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce)
868 except ValueError:
869 logger.warning('failed to decrypt cookie (AES-GCM) because the MAC check failed. Possibly the key is wrong?', only_once=True)
870 return None
871
872 try:
873 return plaintext.decode('utf-8')
874 except UnicodeDecodeError:
875 logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
876 return None
877
878
879 def _decrypt_windows_dpapi(ciphertext, logger):
880 """
881 References:
882 - https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata
883 """
884 from ctypes.wintypes import DWORD
885
886 class DATA_BLOB(ctypes.Structure):
887 _fields_ = [('cbData', DWORD),
888 ('pbData', ctypes.POINTER(ctypes.c_char))]
889
890 buffer = ctypes.create_string_buffer(ciphertext)
891 blob_in = DATA_BLOB(ctypes.sizeof(buffer), buffer)
892 blob_out = DATA_BLOB()
893 ret = ctypes.windll.crypt32.CryptUnprotectData(
894 ctypes.byref(blob_in), # pDataIn
895 None, # ppszDataDescr: human readable description of pDataIn
896 None, # pOptionalEntropy: salt?
897 None, # pvReserved: must be NULL
898 None, # pPromptStruct: information about prompts to display
899 0, # dwFlags
900 ctypes.byref(blob_out) # pDataOut
901 )
902 if not ret:
903 logger.warning('failed to decrypt with DPAPI', only_once=True)
904 return None
905
906 result = ctypes.string_at(blob_out.pbData, blob_out.cbData)
907 ctypes.windll.kernel32.LocalFree(blob_out.pbData)
908 return result
909
910
911 def _config_home():
912 return os.environ.get('XDG_CONFIG_HOME', os.path.expanduser('~/.config'))
913
914
915 def _open_database_copy(database_path, tmpdir):
916 # cannot open sqlite databases if they are already in use (e.g. by the browser)
917 database_copy_path = os.path.join(tmpdir, 'temporary.sqlite')
918 shutil.copy(database_path, database_copy_path)
919 conn = sqlite3.connect(database_copy_path)
920 return conn.cursor()
921
922
923 def _get_column_names(cursor, table_name):
924 table_info = cursor.execute('PRAGMA table_info({})'.format(table_name)).fetchall()
925 return [row[1].decode('utf-8') for row in table_info]
926
927
928 def _find_most_recently_used_file(root, filename):
929 # if there are multiple browser profiles, take the most recently used one
930 paths = []
931 for root, dirs, files in os.walk(root):
932 for file in files:
933 if file == filename:
934 paths.append(os.path.join(root, file))
935 return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime)
936
937
938 def _merge_cookie_jars(jars):
939 output_jar = YoutubeDLCookieJar()
940 for jar in jars:
941 for cookie in jar:
942 output_jar.set_cookie(cookie)
943 if jar.filename is not None:
944 output_jar.filename = jar.filename
945 return output_jar
946
947
948 def _is_path(value):
949 return os.path.sep in value
950
951
952 def _parse_browser_specification(browser_name, profile=None, keyring=None):
953 if browser_name not in SUPPORTED_BROWSERS:
954 raise ValueError(f'unsupported browser: "{browser_name}"')
955 if keyring not in (None, *SUPPORTED_KEYRINGS):
956 raise ValueError(f'unsupported keyring: "{keyring}"')
957 if profile is not None and _is_path(profile):
958 profile = os.path.expanduser(profile)
959 return browser_name, profile, keyring