]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/cookies.py
[cookies] Parse cookies leniently (#4780)
[yt-dlp.git] / yt_dlp / cookies.py
index c3b14f03bb670e9bc55470bffecccfcdad175c8f..d502e91da6cea38ef4b752c99509ff2d6f1cc1b1 100644 (file)
@@ -1,6 +1,7 @@
 import base64
 import contextlib
 import http.cookiejar
+import http.cookies
 import json
 import os
 import re
@@ -990,3 +991,98 @@ def _parse_browser_specification(browser_name, profile=None, keyring=None, conta
     if profile is not None and _is_path(profile):
         profile = os.path.expanduser(profile)
     return browser_name, profile, keyring, container
+
+
+class LenientSimpleCookie(http.cookies.SimpleCookie):
+    """More lenient version of http.cookies.SimpleCookie"""
+    # From https://github.com/python/cpython/blob/v3.10.7/Lib/http/cookies.py
+    _LEGAL_KEY_CHARS = r"\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\="
+    _LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + r"\[\]"
+
+    _RESERVED = {
+        "expires",
+        "path",
+        "comment",
+        "domain",
+        "max-age",
+        "secure",
+        "httponly",
+        "version",
+        "samesite",
+    }
+
+    _FLAGS = {"secure", "httponly"}
+
+    # Added 'bad' group to catch the remaining value
+    _COOKIE_PATTERN = re.compile(r"""
+        \s*                            # Optional whitespace at start of cookie
+        (?P<key>                       # Start of group 'key'
+        [""" + _LEGAL_KEY_CHARS + r"""]+?# Any word of at least one letter
+        )                              # End of group 'key'
+        (                              # Optional group: there may not be a value.
+        \s*=\s*                          # Equal Sign
+        (                                # Start of potential value
+        (?P<val>                           # Start of group 'val'
+        "(?:[^\\"]|\\.)*"                    # Any doublequoted string
+        |                                    # or
+        \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr
+        |                                    # or
+        [""" + _LEGAL_VALUE_CHARS + r"""]*     # Any word or empty string
+        )                                  # End of group 'val'
+        |                                  # or
+        (?P<bad>(?:\\;|[^;])*?)            # 'bad' group fallback for invalid values
+        )                                # End of potential value
+        )?                             # End of optional value group
+        \s*                            # Any number of spaces.
+        (\s+|;|$)                      # Ending either at space, semicolon, or EOS.
+        """, re.ASCII | re.VERBOSE)
+
+    def load(self, data):
+        # Workaround for https://github.com/yt-dlp/yt-dlp/issues/4776
+        if not isinstance(data, str):
+            return super().load(data)
+
+        morsel = None
+        index = 0
+        length = len(data)
+
+        while 0 <= index < length:
+            match = self._COOKIE_PATTERN.search(data, index)
+            if not match:
+                break
+
+            index = match.end(0)
+            if match.group("bad"):
+                morsel = None
+                continue
+
+            key, value = match.group("key", "val")
+
+            if key[0] == "$":
+                if morsel is not None:
+                    morsel[key[1:]] = True
+                continue
+
+            lower_key = key.lower()
+            if lower_key in self._RESERVED:
+                if morsel is None:
+                    continue
+
+                if value is None:
+                    if lower_key not in self._FLAGS:
+                        morsel = None
+                        continue
+                    value = True
+                else:
+                    value, _ = self.value_decode(value)
+
+                morsel[key] = value
+
+            elif value is not None:
+                morsel = self.get(key, http.cookies.Morsel())
+                real_value, coded_value = self.value_decode(value)
+                morsel.set(key, real_value, coded_value)
+                self[key] = morsel
+
+            else:
+                morsel = None