X-Git-Url: https://jfr.im/git/yt-dlp.git/blobdiff_plain/8a8b54523addf46dfd50ef599761a81bc22362e6..c36513f1be2ef3d3cec864accbffda1afaa06ffd:/yt_dlp/networking/_requests.py diff --git a/yt_dlp/networking/_requests.py b/yt_dlp/networking/_requests.py index 27974357a..75eee8824 100644 --- a/yt_dlp/networking/_requests.py +++ b/yt_dlp/networking/_requests.py @@ -8,6 +8,7 @@ from ..dependencies import brotli, requests, urllib3 from ..utils import bug_reports_message, int_or_none, variadic +from ..utils.networking import normalize_url if requests is None: raise ImportError('requests module is not installed') @@ -20,8 +21,8 @@ if urllib3_version < (1, 26, 17): raise ImportError('Only urllib3 >= 1.26.17 is supported') -if requests.__build__ < 0x023100: - raise ImportError('Only requests >= 2.31.0 is supported') +if requests.__build__ < 0x023200: + raise ImportError('Only requests >= 2.32.0 is supported') import requests.adapters import requests.utils @@ -115,7 +116,7 @@ def subn(self, repl, string, *args, **kwargs): """ if urllib3_version < (2, 0, 0): - with contextlib.suppress(): + with contextlib.suppress(Exception): urllib3.util.IS_SECURETRANSPORT = urllib3.util.ssl_.IS_SECURETRANSPORT = True @@ -142,18 +143,17 @@ def read(self, amt: int = None): except urllib3.exceptions.SSLError as e: raise SSLError(cause=e) from e - except urllib3.exceptions.IncompleteRead as e: - # urllib3 IncompleteRead.partial is always an integer - raise IncompleteRead(partial=e.partial, expected=e.expected) from e - except urllib3.exceptions.ProtocolError as e: - # http.client.IncompleteRead may be contained within ProtocolError + # IncompleteRead is always contained within ProtocolError # See urllib3.response.HTTPResponse._error_catcher() ir_err = next( (err for err in (e.__context__, e.__cause__, *variadic(e.args)) if isinstance(err, http.client.IncompleteRead)), None) if ir_err is not None: - raise IncompleteRead(partial=len(ir_err.partial), expected=ir_err.expected) from e + # `urllib3.exceptions.IncompleteRead` is subclass of `http.client.IncompleteRead` + # but uses an `int` for its `partial` property. + partial = ir_err.partial if isinstance(ir_err.partial, int) else len(ir_err.partial) + raise IncompleteRead(partial=partial, expected=ir_err.expected) from e raise TransportError(cause=e) from e except urllib3.exceptions.HTTPError as e: @@ -181,14 +181,19 @@ def proxy_manager_for(self, proxy, **proxy_kwargs): return super().proxy_manager_for(proxy, **proxy_kwargs, **self._pm_args, **extra_kwargs) def cert_verify(*args, **kwargs): - # lean on SSLContext for cert verification + # Lean on our SSLContext for cert verification pass + def _get_connection(self, request, *_, proxies=None, **__): + # Lean on our SSLContext for cert verification + return self.get_connection(request.url, proxies) + class RequestsSession(requests.sessions.Session): """ Ensure unified redirect method handling with our urllib redirect handler. """ + def rebuild_method(self, prepared_request, response): new_method = get_redirect_method(prepared_request.method, response.status_code) @@ -199,6 +204,10 @@ def rebuild_method(self, prepared_request, response): prepared_request.method = new_method + # Requests fails to resolve dot segments on absolute redirect locations + # See: https://github.com/yt-dlp/yt-dlp/issues/9020 + prepared_request.url = normalize_url(prepared_request.url) + def rebuild_auth(self, prepared_request, response): # HACK: undo status code change from rebuild_method, if applicable. # rebuild_auth runs after requests would remove headers/body based on status code @@ -219,6 +228,7 @@ def filter(self, record): class Urllib3LoggingHandler(logging.Handler): """Redirect urllib3 logs to our logger""" + def __init__(self, logger, *args, **kwargs): super().__init__(*args, **kwargs) self._logger = logger @@ -252,11 +262,12 @@ def __init__(self, *args, **kwargs): # Forward urllib3 debug messages to our logger logger = logging.getLogger('urllib3') - handler = Urllib3LoggingHandler(logger=self._logger) - handler.setFormatter(logging.Formatter('requests: %(message)s')) - handler.addFilter(Urllib3LoggingFilter()) - logger.addHandler(handler) - logger.setLevel(logging.WARNING) + self.__logging_handler = Urllib3LoggingHandler(logger=self._logger) + self.__logging_handler.setFormatter(logging.Formatter('requests: %(message)s')) + self.__logging_handler.addFilter(Urllib3LoggingFilter()) + logger.addHandler(self.__logging_handler) + # TODO: Use a logger filter to suppress pool reuse warning instead + logger.setLevel(logging.ERROR) if self.verbose: # Setting this globally is not ideal, but is easier than hacking with urllib3. @@ -269,6 +280,9 @@ def __init__(self, *args, **kwargs): def close(self): self._clear_instances() + # Remove the logging handler that contains a reference to our logger + # See: https://github.com/yt-dlp/yt-dlp/issues/8922 + logging.getLogger('urllib3').removeHandler(self.__logging_handler) def _check_extensions(self, extensions): super()._check_extensions(extensions) @@ -297,8 +311,7 @@ def _send(self, request): max_redirects_exceeded = False - session = self._get_instance( - cookiejar=request.extensions.get('cookiejar') or self.cookiejar) + session = self._get_instance(cookiejar=self._get_cookiejar(request)) try: requests_res = session.request( @@ -306,8 +319,8 @@ def _send(self, request): url=request.url, data=request.data, headers=headers, - timeout=float(request.extensions.get('timeout') or self.timeout), - proxies=request.proxies or self.proxies, + timeout=self._calculate_timeout(request), + proxies=self._get_proxies(request), allow_redirects=True, stream=True ) @@ -367,7 +380,7 @@ def _new_conn(self): self, f'Connection to {self.host} timed out. (connect timeout={self.timeout})') from e except SocksProxyError as e: raise urllib3.exceptions.ProxyError(str(e), e) from e - except (OSError, socket.error) as e: + except OSError as e: raise urllib3.exceptions.NewConnectionError( self, f'Failed to establish a new connection: {e}') from e