[core] Fix support for upcoming Python 3.12 (#8130)

[yt-dlp.git] / yt_dlp / networking / _urllib.py
diff --git a/yt_dlp/networking/_urllib.py b/yt_dlp/networking/_urllib.py

index 2c5f09872afe6f45644d9d19a6c49e34f41d523f..3c0647ecf909b4e0bbd0d25ab6a1c52a85f11171 100644 (file)
--- a/yt_dlp/networking/_urllib.py
+++ b/yt_dlp/networking/_urllib.py
@@ -1,7 +1,6 @@
  from __future__ import annotations
  
  import functools
-import gzip
  import http.client
  import io
  import socket
@@ -28,7 +27,7 @@
      make_socks_proxy_opts,
      select_proxy,
  )
-from .common import Features, RequestHandler, Response, register
+from .common import Features, RequestHandler, Response, register_rh
  from .exceptions import (
      CertificateVerifyError,
      HTTPError,
@@ -41,7 +40,8 @@
  from ..dependencies import brotli
  from ..socks import ProxyError as SocksProxyError
  from ..socks import sockssocket
-from ..utils import escape_url, update_url_query
+from ..utils import update_url_query
+from ..utils.networking import normalize_url
  
  SUPPORTED_ENCODINGS = ['gzip', 'deflate']
  CONTENT_DECODE_ERRORS = [zlib.error, OSError]
@@ -154,20 +154,11 @@ def brotli(data):
  
      @staticmethod
      def gz(data):
-        gz = gzip.GzipFile(fileobj=io.BytesIO(data), mode='rb')
-        try:
-            return gz.read()
-        except OSError as original_oserror:
-            # There may be junk add the end of the file
-            # See http://stackoverflow.com/q/4928560/35070 for details
-            for i in range(1, 1024):
-                try:
-                    gz = gzip.GzipFile(fileobj=io.BytesIO(data[:-i]), mode='rb')
-                    return gz.read()
-                except OSError:
-                    continue
-            else:
-                raise original_oserror
+        # There may be junk added the end of the file
+        # We ignore it by only ever decoding a single gzip payload
+        if not data:
+            return data
+        return zlib.decompress(data, wbits=zlib.MAX_WBITS | 16)
  
      def http_request(self, req):
          # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
@@ -179,7 +170,7 @@ def http_request(self, req):
          # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
          # the code of this workaround has been moved here from YoutubeDL.urlopen()
          url = req.get_full_url()
-        url_escaped = escape_url(url)
+        url_escaped = normalize_url(url)
  
          # Substitute URL if any change after escaping
          if url != url_escaped:
@@ -212,7 +203,7 @@ def http_response(self, req, resp):
              if location:
                  # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
                  location = location.encode('iso-8859-1').decode()
-                location_escaped = escape_url(location)
+                location_escaped = normalize_url(location)
                  if location != location_escaped:
                      del resp.headers['Location']
                      resp.headers['Location'] = location_escaped
@@ -315,7 +306,7 @@ def get_method(self):
  def update_Request(req, url=None, data=None, headers=None, query=None):
      req_headers = req.headers.copy()
      req_headers.update(headers or {})
-    req_data = data or req.data
+    req_data = data if data is not None else req.data
      req_url = update_url_query(url or req.get_full_url(), query)
      req_get_method = req.get_method()
      if req_get_method == 'HEAD':
@@ -372,7 +363,7 @@ def handle_response_read_exceptions(e):
          raise TransportError(cause=e) from e
  
  
-@register
+@register_rh
  class UrllibRH(RequestHandler, InstanceStoreMixin):
      _SUPPORTED_URL_SCHEMES = ('http', 'https', 'data', 'ftp')
      _SUPPORTED_PROXY_SCHEMES = ('http', 'socks4', 'socks4a', 'socks5', 'socks5h')
@@ -385,6 +376,11 @@ def __init__(self, *, enable_file_urls: bool = False, **kwargs):
          if self.enable_file_urls:
              self._SUPPORTED_URL_SCHEMES = (*self._SUPPORTED_URL_SCHEMES, 'file')
  
+    def _check_extensions(self, extensions):
+        super()._check_extensions(extensions)
+        extensions.pop('cookiejar', None)
+        extensions.pop('timeout', None)
+
      def _create_instance(self, proxies, cookiejar):
          opener = urllib.request.OpenerDirector()
          handlers = [
@@ -433,7 +429,7 @@ def _send(self, request):
          except urllib.error.HTTPError as e:
              if isinstance(e.fp, (http.client.HTTPResponse, urllib.response.addinfourl)):
                  # Prevent file object from being closed when urllib.error.HTTPError is destroyed.
-                e._closer.file = None
+                e._closer.close_called = True
                  raise HTTPError(UrllibResponseAdapter(e.fp), redirect_loop='redirect error' in str(e)) from e
              raise  # unexpected
          except urllib.error.URLError as e: