]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/cookies.py
[extractor/youtube] Do not warn on duplicate chapters
[yt-dlp.git] / yt_dlp / cookies.py
index c5fb5ab68cbe48221c2694fac7838766f66911a5..3032d07122c1a1a60c83a9bc2b364d38a4cb2601 100644 (file)
@@ -1,6 +1,7 @@
 import base64
 import contextlib
 import http.cookiejar
+import http.cookies
 import json
 import os
 import re
     sqlite3,
 )
 from .minicurses import MultilinePrinter, QuietMultilinePrinter
-from .utils import Popen, YoutubeDLCookieJar, error_to_str, expand_path, try_call
+from .utils import (
+    Popen,
+    YoutubeDLCookieJar,
+    error_to_str,
+    expand_path,
+    is_path_like,
+    try_call,
+)
 
 CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
 SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
@@ -91,7 +99,7 @@ def load_cookies(cookie_file, browser_specification, ydl):
             extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring, container=container))
 
     if cookie_file is not None:
-        is_filename = YoutubeDLCookieJar.is_path(cookie_file)
+        is_filename = is_path_like(cookie_file)
         if is_filename:
             cookie_file = expand_path(cookie_file)
 
@@ -128,12 +136,17 @@ def _extract_firefox_cookies(profile, container, logger):
     else:
         search_root = os.path.join(_firefox_browser_dir(), profile)
 
+    cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite', logger)
+    if cookie_database_path is None:
+        raise FileNotFoundError(f'could not find firefox cookies database in {search_root}')
+    logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
+
     container_id = None
-    if container is not None:
-        containers_path = os.path.join(search_root, 'containers.json')
+    if container not in (None, 'none'):
+        containers_path = os.path.join(os.path.dirname(cookie_database_path), 'containers.json')
         if not os.path.isfile(containers_path) or not os.access(containers_path, os.R_OK):
             raise FileNotFoundError(f'could not read containers.json in {search_root}')
-        with open(containers_path, 'r') as containers:
+        with open(containers_path) as containers:
             identities = json.load(containers).get('identities', [])
         container_id = next((context.get('userContextId') for context in identities if container in (
             context.get('name'),
@@ -142,26 +155,21 @@ def _extract_firefox_cookies(profile, container, logger):
         if not isinstance(container_id, int):
             raise ValueError(f'could not find firefox container "{container}" in containers.json')
 
-    cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite', logger)
-    if cookie_database_path is None:
-        raise FileNotFoundError(f'could not find firefox cookies database in {search_root}')
-    logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
-
     with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
         cursor = None
         try:
             cursor = _open_database_copy(cookie_database_path, tmpdir)
-            origin_attributes = ''
             if isinstance(container_id, int):
-                origin_attributes = f'^userContextId={container_id}'
                 logger.debug(
                     f'Only loading cookies from firefox container "{container}", ID {container_id}')
-            try:
                 cursor.execute(
-                    'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE originAttributes=?',
-                    (origin_attributes, ))
-            except sqlite3.OperationalError:
-                logger.debug('Database exception, loading all cookies')
+                    'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE originAttributes LIKE ? OR originAttributes LIKE ?',
+                    (f'%userContextId={container_id}', f'%userContextId={container_id}&%'))
+            elif container == 'none':
+                logger.debug('Only loading cookies not belonging to any container')
+                cursor.execute(
+                    'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE NOT INSTR(originAttributes,"userContextId=")')
+            else:
                 cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
             jar = YoutubeDLCookieJar()
             with _create_progress_bar(logger) as progress_bar:
@@ -837,12 +845,15 @@ def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
 def _get_mac_keyring_password(browser_keyring_name, logger):
     logger.debug('using find-generic-password to obtain password from OSX keychain')
     try:
-        stdout, _, _ = Popen.run(
+        stdout, _, returncode = Popen.run(
             ['security', 'find-generic-password',
              '-w',  # write password to stdout
              '-a', browser_keyring_name,  # match 'account'
              '-s', f'{browser_keyring_name} Safe Storage'],  # match 'service'
             stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
+        if returncode:
+            logger.warning('find-generic-password failed')
+            return None
         return stdout.rstrip(b'\n')
     except Exception as e:
         logger.warning(f'exception running find-generic-password: {error_to_str(e)}')
@@ -980,6 +991,101 @@ def _parse_browser_specification(browser_name, profile=None, keyring=None, conta
         raise ValueError(f'unsupported browser: "{browser_name}"')
     if keyring not in (None, *SUPPORTED_KEYRINGS):
         raise ValueError(f'unsupported keyring: "{keyring}"')
-    if profile is not None and _is_path(profile):
-        profile = os.path.expanduser(profile)
+    if profile is not None and _is_path(expand_path(profile)):
+        profile = expand_path(profile)
     return browser_name, profile, keyring, container
+
+
+class LenientSimpleCookie(http.cookies.SimpleCookie):
+    """More lenient version of http.cookies.SimpleCookie"""
+    # From https://github.com/python/cpython/blob/v3.10.7/Lib/http/cookies.py
+    _LEGAL_KEY_CHARS = r"\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\="
+    _LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + r"\[\]"
+
+    _RESERVED = {
+        "expires",
+        "path",
+        "comment",
+        "domain",
+        "max-age",
+        "secure",
+        "httponly",
+        "version",
+        "samesite",
+    }
+
+    _FLAGS = {"secure", "httponly"}
+
+    # Added 'bad' group to catch the remaining value
+    _COOKIE_PATTERN = re.compile(r"""
+        \s*                            # Optional whitespace at start of cookie
+        (?P<key>                       # Start of group 'key'
+        [""" + _LEGAL_KEY_CHARS + r"""]+?# Any word of at least one letter
+        )                              # End of group 'key'
+        (                              # Optional group: there may not be a value.
+        \s*=\s*                          # Equal Sign
+        (                                # Start of potential value
+        (?P<val>                           # Start of group 'val'
+        "(?:[^\\"]|\\.)*"                    # Any doublequoted string
+        |                                    # or
+        \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr
+        |                                    # or
+        [""" + _LEGAL_VALUE_CHARS + r"""]*     # Any word or empty string
+        )                                  # End of group 'val'
+        |                                  # or
+        (?P<bad>(?:\\;|[^;])*?)            # 'bad' group fallback for invalid values
+        )                                # End of potential value
+        )?                             # End of optional value group
+        \s*                            # Any number of spaces.
+        (\s+|;|$)                      # Ending either at space, semicolon, or EOS.
+        """, re.ASCII | re.VERBOSE)
+
+    def load(self, data):
+        # Workaround for https://github.com/yt-dlp/yt-dlp/issues/4776
+        if not isinstance(data, str):
+            return super().load(data)
+
+        morsel = None
+        index = 0
+        length = len(data)
+
+        while 0 <= index < length:
+            match = self._COOKIE_PATTERN.search(data, index)
+            if not match:
+                break
+
+            index = match.end(0)
+            if match.group("bad"):
+                morsel = None
+                continue
+
+            key, value = match.group("key", "val")
+
+            if key[0] == "$":
+                if morsel is not None:
+                    morsel[key[1:]] = True
+                continue
+
+            lower_key = key.lower()
+            if lower_key in self._RESERVED:
+                if morsel is None:
+                    continue
+
+                if value is None:
+                    if lower_key not in self._FLAGS:
+                        morsel = None
+                        continue
+                    value = True
+                else:
+                    value, _ = self.value_decode(value)
+
+                morsel[key] = value
+
+            elif value is not None:
+                morsel = self.get(key, http.cookies.Morsel())
+                real_value, coded_value = self.value_decode(value)
+                morsel.set(key, real_value, coded_value)
+                self[key] = morsel
+
+            else:
+                morsel = None