[core] Change how `Cookie` headers are handled

[yt-dlp.git] / yt_dlp / YoutubeDL.py
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py

index cf0122d4ba6526bbfade88cf916bb09559deb49f..7f557166694c7fec7686b6f1ed10596c5a49c82e 100644 (file)
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -1,9 +1,11 @@
  import collections
  import contextlib
+import copy
  import datetime
  import errno
  import fileinput
  import functools
+import http.cookiejar
  import io
  import itertools
  import json
@@ -25,7 +27,7 @@
  from .cache import Cache
  from .compat import urllib  # isort: split
  from .compat import compat_os_name, compat_shlex_quote
-from .cookies import load_cookies
+from .cookies import LenientSimpleCookie, load_cookies
  from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
  from .downloader.rtmp import rtmpdump_version
  from .extractor import gen_extractor_classes, get_info_extractor
@@ -673,6 +675,9 @@ def process_color_policy(stream):
          if auto_init and auto_init != 'no_verbose_header':
              self.print_debug_header()
  
+        self.__header_cookies = []
+        self._load_cookies(traverse_obj(self.params.get('http_headers'), 'cookie', casesense=False))  # compat
+
          def check_deprecated(param, option, suggestion):
              if self.params.get(param) is not None:
                  self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
@@ -1625,8 +1630,60 @@ def progress(msg):
                  self.to_screen('')
              raise
  
+    def _load_cookies(self, data, *, from_headers=True):
+        """Loads cookies from a `Cookie` header
+
+        This tries to work around the security vulnerability of passing cookies to every domain.
+        See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
+        The unscoped cookies are saved for later to be stored in the jar with a limited scope.
+
+        @param data         The Cookie header as string to load the cookies from
+        @param from_headers If `False`, allows Set-Cookie syntax in the cookie string (at least a domain will be required)
+        """
+        for cookie in LenientSimpleCookie(data).values():
+            if from_headers and any(cookie.values()):
+                raise ValueError('Invalid syntax in Cookie Header')
+
+            domain = cookie.get('domain') or ''
+            expiry = cookie.get('expires')
+            if expiry == '':  # 0 is valid
+                expiry = None
+            prepared_cookie = http.cookiejar.Cookie(
+                cookie.get('version') or 0, cookie.key, cookie.value, None, False,
+                domain, True, True, cookie.get('path') or '', bool(cookie.get('path')),
+                cookie.get('secure') or False, expiry, False, None, None, {})
+
+            if domain:
+                self.cookiejar.set_cookie(prepared_cookie)
+            elif from_headers:
+                self.deprecated_feature(
+                    'Passing cookies as a header is a potential security risk; '
+                    'they will be scoped to the domain of the downloaded urls. '
+                    'Please consider loading cookies from a file or browser instead.')
+                self.__header_cookies.append(prepared_cookie)
+            else:
+                self.report_error('Unscoped cookies are not allowed; please specify some sort of scoping',
+                                  tb=False, is_error=False)
+
+    def _apply_header_cookies(self, url):
+        """Applies stray header cookies to the provided url
+
+        This loads header cookies and scopes them to the domain provided in `url`.
+        While this is not ideal, it helps reduce the risk of them being sent
+        to an unintended destination while mostly maintaining compatibility.
+        """
+        parsed = urllib.parse.urlparse(url)
+        if not parsed.hostname:
+            return
+
+        for cookie in map(copy.copy, self.__header_cookies):
+            cookie.domain = f'.{parsed.hostname}'
+            self.cookiejar.set_cookie(cookie)
+
      @_handle_extraction_exceptions
      def __extract_info(self, url, ie, download, extra_info, process):
+        self._apply_header_cookies(url)
+
          try:
              ie_result = ie.extract(url)
          except UserNotLive as e:
@@ -2414,9 +2471,24 @@ def _calc_headers(self, info_dict):
          if 'Youtubedl-No-Compression' in res:  # deprecated
              res.pop('Youtubedl-No-Compression', None)
              res['Accept-Encoding'] = 'identity'
-        cookies = self.cookiejar.get_cookie_header(info_dict['url'])
+        cookies = self.cookiejar.get_cookies_for_url(info_dict['url'])
          if cookies:
-            res['Cookie'] = cookies
+            encoder = LenientSimpleCookie()
+            values = []
+            for cookie in cookies:
+                _, value = encoder.value_encode(cookie.value)
+                values.append(f'{cookie.name}={value}')
+                if cookie.domain:
+                    values.append(f'Domain={cookie.domain}')
+                if cookie.path:
+                    values.append(f'Path={cookie.path}')
+                if cookie.secure:
+                    values.append('Secure')
+                if cookie.expires:
+                    values.append(f'Expires={cookie.expires}')
+                if cookie.version:
+                    values.append(f'Version={cookie.version}')
+            info_dict['cookies'] = '; '.join(values)
  
          if 'X-Forwarded-For' not in res:
              x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
@@ -3423,6 +3495,8 @@ def download_with_info_file(self, info_filename):
              infos = [self.sanitize_info(info, self.params.get('clean_infojson', True))
                       for info in variadic(json.loads('\n'.join(f)))]
          for info in infos:
+            self._load_cookies(info.get('cookies'), from_headers=False)
+            self._load_cookies(traverse_obj(info.get('http_headers'), 'Cookie', casesense=False))  # compat
              try:
                  self.__download_wrapper(self.process_ie_result)(info, download=True)
              except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e: