]> jfr.im git - yt-dlp.git/blame - yt_dlp/networking/_requests.py
[rh:requests] Update to `requests` 2.32.0 (#9980)
[yt-dlp.git] / yt_dlp / networking / _requests.py
CommitLineData
8a8b5452 1import contextlib
2import functools
3import http.client
4import logging
5import re
6import socket
7import warnings
8
9from ..dependencies import brotli, requests, urllib3
10from ..utils import bug_reports_message, int_or_none, variadic
35f4f764 11from ..utils.networking import normalize_url
8a8b5452 12
13if requests is None:
14 raise ImportError('requests module is not installed')
15
16if urllib3 is None:
17 raise ImportError('urllib3 module is not installed')
18
19urllib3_version = tuple(int_or_none(x, default=0) for x in urllib3.__version__.split('.'))
20
21if urllib3_version < (1, 26, 17):
22 raise ImportError('Only urllib3 >= 1.26.17 is supported')
23
c36513f1 24if requests.__build__ < 0x023200:
25 raise ImportError('Only requests >= 2.32.0 is supported')
8a8b5452 26
27import requests.adapters
28import requests.utils
29import urllib3.connection
30import urllib3.exceptions
31
32from ._helper import (
33 InstanceStoreMixin,
34 add_accept_encoding_header,
35 create_connection,
36 create_socks_proxy_socket,
37 get_redirect_method,
38 make_socks_proxy_opts,
39 select_proxy,
40)
41from .common import (
42 Features,
43 RequestHandler,
44 Response,
45 register_preference,
46 register_rh,
47)
48from .exceptions import (
49 CertificateVerifyError,
50 HTTPError,
51 IncompleteRead,
52 ProxyError,
53 RequestError,
54 SSLError,
55 TransportError,
56)
57from ..socks import ProxyError as SocksProxyError
58
59SUPPORTED_ENCODINGS = [
60 'gzip', 'deflate'
61]
62
63if brotli is not None:
64 SUPPORTED_ENCODINGS.append('br')
65
66"""
67Override urllib3's behavior to not convert lower-case percent-encoded characters
68to upper-case during url normalization process.
69
70RFC3986 defines that the lower or upper case percent-encoded hexidecimal characters are equivalent
71and normalizers should convert them to uppercase for consistency [1].
72
73However, some sites may have an incorrect implementation where they provide
74a percent-encoded url that is then compared case-sensitively.[2]
75
76While this is a very rare case, since urllib does not do this normalization step, it
77is best to avoid it in requests too for compatability reasons.
78
791: https://tools.ietf.org/html/rfc3986#section-2.1
802: https://github.com/streamlink/streamlink/pull/4003
81"""
82
83
84class Urllib3PercentREOverride:
85 def __init__(self, r: re.Pattern):
86 self.re = r
87
88 # pass through all other attribute calls to the original re
89 def __getattr__(self, item):
90 return self.re.__getattribute__(item)
91
92 def subn(self, repl, string, *args, **kwargs):
93 return string, self.re.subn(repl, string, *args, **kwargs)[1]
94
95
96# urllib3 >= 1.25.8 uses subn:
97# https://github.com/urllib3/urllib3/commit/a2697e7c6b275f05879b60f593c5854a816489f0
98import urllib3.util.url # noqa: E305
99
100if hasattr(urllib3.util.url, 'PERCENT_RE'):
101 urllib3.util.url.PERCENT_RE = Urllib3PercentREOverride(urllib3.util.url.PERCENT_RE)
102elif hasattr(urllib3.util.url, '_PERCENT_RE'): # urllib3 >= 2.0.0
103 urllib3.util.url._PERCENT_RE = Urllib3PercentREOverride(urllib3.util.url._PERCENT_RE)
104else:
105 warnings.warn('Failed to patch PERCENT_RE in urllib3 (does the attribute exist?)' + bug_reports_message())
106
107"""
108Workaround for issue in urllib.util.ssl_.py: ssl_wrap_context does not pass
109server_hostname to SSLContext.wrap_socket if server_hostname is an IP,
110however this is an issue because we set check_hostname to True in our SSLContext.
111
112Monkey-patching IS_SECURETRANSPORT forces ssl_wrap_context to pass server_hostname regardless.
113
114This has been fixed in urllib3 2.0+.
115See: https://github.com/urllib3/urllib3/issues/517
116"""
117
118if urllib3_version < (2, 0, 0):
93240fc1 119 with contextlib.suppress(Exception):
8a8b5452 120 urllib3.util.IS_SECURETRANSPORT = urllib3.util.ssl_.IS_SECURETRANSPORT = True
121
122
123# Requests will not automatically handle no_proxy by default
124# due to buggy no_proxy handling with proxy dict [1].
125# 1. https://github.com/psf/requests/issues/5000
126requests.adapters.select_proxy = select_proxy
127
128
129class RequestsResponseAdapter(Response):
130 def __init__(self, res: requests.models.Response):
131 super().__init__(
132 fp=res.raw, headers=res.headers, url=res.url,
133 status=res.status_code, reason=res.reason)
134
135 self._requests_response = res
136
137 def read(self, amt: int = None):
138 try:
139 # Interact with urllib3 response directly.
140 return self.fp.read(amt, decode_content=True)
141
142 # See urllib3.response.HTTPResponse.read() for exceptions raised on read
143 except urllib3.exceptions.SSLError as e:
144 raise SSLError(cause=e) from e
145
8a8b5452 146 except urllib3.exceptions.ProtocolError as e:
4e38e2ae 147 # IncompleteRead is always contained within ProtocolError
8a8b5452 148 # See urllib3.response.HTTPResponse._error_catcher()
149 ir_err = next(
150 (err for err in (e.__context__, e.__cause__, *variadic(e.args))
151 if isinstance(err, http.client.IncompleteRead)), None)
152 if ir_err is not None:
4e38e2ae
SS
153 # `urllib3.exceptions.IncompleteRead` is subclass of `http.client.IncompleteRead`
154 # but uses an `int` for its `partial` property.
155 partial = ir_err.partial if isinstance(ir_err.partial, int) else len(ir_err.partial)
156 raise IncompleteRead(partial=partial, expected=ir_err.expected) from e
8a8b5452 157 raise TransportError(cause=e) from e
158
159 except urllib3.exceptions.HTTPError as e:
160 # catch-all for any other urllib3 response exceptions
161 raise TransportError(cause=e) from e
162
163
164class RequestsHTTPAdapter(requests.adapters.HTTPAdapter):
165 def __init__(self, ssl_context=None, proxy_ssl_context=None, source_address=None, **kwargs):
166 self._pm_args = {}
167 if ssl_context:
168 self._pm_args['ssl_context'] = ssl_context
169 if source_address:
170 self._pm_args['source_address'] = (source_address, 0)
171 self._proxy_ssl_context = proxy_ssl_context or ssl_context
172 super().__init__(**kwargs)
173
174 def init_poolmanager(self, *args, **kwargs):
175 return super().init_poolmanager(*args, **kwargs, **self._pm_args)
176
177 def proxy_manager_for(self, proxy, **proxy_kwargs):
178 extra_kwargs = {}
179 if not proxy.lower().startswith('socks') and self._proxy_ssl_context:
180 extra_kwargs['proxy_ssl_context'] = self._proxy_ssl_context
181 return super().proxy_manager_for(proxy, **proxy_kwargs, **self._pm_args, **extra_kwargs)
182
183 def cert_verify(*args, **kwargs):
c36513f1 184 # Lean on our SSLContext for cert verification
8a8b5452 185 pass
186
c36513f1 187 def _get_connection(self, request, *_, proxies=None, **__):
188 # Lean on our SSLContext for cert verification
189 return self.get_connection(request.url, proxies)
190
8a8b5452 191
192class RequestsSession(requests.sessions.Session):
193 """
194 Ensure unified redirect method handling with our urllib redirect handler.
195 """
f9fb3ce8 196
8a8b5452 197 def rebuild_method(self, prepared_request, response):
198 new_method = get_redirect_method(prepared_request.method, response.status_code)
199
200 # HACK: requests removes headers/body on redirect unless code was a 307/308.
201 if new_method == prepared_request.method:
202 response._real_status_code = response.status_code
203 response.status_code = 308
204
205 prepared_request.method = new_method
206
35f4f764 207 # Requests fails to resolve dot segments on absolute redirect locations
208 # See: https://github.com/yt-dlp/yt-dlp/issues/9020
209 prepared_request.url = normalize_url(prepared_request.url)
210
8a8b5452 211 def rebuild_auth(self, prepared_request, response):
212 # HACK: undo status code change from rebuild_method, if applicable.
213 # rebuild_auth runs after requests would remove headers/body based on status code
214 if hasattr(response, '_real_status_code'):
215 response.status_code = response._real_status_code
216 del response._real_status_code
217 return super().rebuild_auth(prepared_request, response)
218
219
220class Urllib3LoggingFilter(logging.Filter):
221
222 def filter(self, record):
223 # Ignore HTTP request messages since HTTPConnection prints those
224 if record.msg == '%s://%s:%s "%s %s %s" %s %s':
225 return False
226 return True
227
228
229class Urllib3LoggingHandler(logging.Handler):
230 """Redirect urllib3 logs to our logger"""
f9fb3ce8 231
8a8b5452 232 def __init__(self, logger, *args, **kwargs):
233 super().__init__(*args, **kwargs)
234 self._logger = logger
235
236 def emit(self, record):
237 try:
238 msg = self.format(record)
239 if record.levelno >= logging.ERROR:
240 self._logger.error(msg)
241 else:
242 self._logger.stdout(msg)
243
244 except Exception:
245 self.handleError(record)
246
247
248@register_rh
249class RequestsRH(RequestHandler, InstanceStoreMixin):
250
251 """Requests RequestHandler
252 https://github.com/psf/requests
253 """
254 _SUPPORTED_URL_SCHEMES = ('http', 'https')
255 _SUPPORTED_ENCODINGS = tuple(SUPPORTED_ENCODINGS)
256 _SUPPORTED_PROXY_SCHEMES = ('http', 'https', 'socks4', 'socks4a', 'socks5', 'socks5h')
257 _SUPPORTED_FEATURES = (Features.NO_PROXY, Features.ALL_PROXY)
258 RH_NAME = 'requests'
259
260 def __init__(self, *args, **kwargs):
261 super().__init__(*args, **kwargs)
262
263 # Forward urllib3 debug messages to our logger
264 logger = logging.getLogger('urllib3')
0085e2ba 265 self.__logging_handler = Urllib3LoggingHandler(logger=self._logger)
266 self.__logging_handler.setFormatter(logging.Formatter('requests: %(message)s'))
267 self.__logging_handler.addFilter(Urllib3LoggingFilter())
268 logger.addHandler(self.__logging_handler)
b012271d
SS
269 # TODO: Use a logger filter to suppress pool reuse warning instead
270 logger.setLevel(logging.ERROR)
8a8b5452 271
272 if self.verbose:
273 # Setting this globally is not ideal, but is easier than hacking with urllib3.
274 # It could technically be problematic for scripts embedding yt-dlp.
275 # However, it is unlikely debug traffic is used in that context in a way this will cause problems.
276 urllib3.connection.HTTPConnection.debuglevel = 1
277 logger.setLevel(logging.DEBUG)
278 # this is expected if we are using --no-check-certificate
279 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
280
281 def close(self):
282 self._clear_instances()
0085e2ba 283 # Remove the logging handler that contains a reference to our logger
284 # See: https://github.com/yt-dlp/yt-dlp/issues/8922
285 logging.getLogger('urllib3').removeHandler(self.__logging_handler)
8a8b5452 286
287 def _check_extensions(self, extensions):
288 super()._check_extensions(extensions)
289 extensions.pop('cookiejar', None)
290 extensions.pop('timeout', None)
291
292 def _create_instance(self, cookiejar):
293 session = RequestsSession()
294 http_adapter = RequestsHTTPAdapter(
295 ssl_context=self._make_sslcontext(),
296 source_address=self.source_address,
297 max_retries=urllib3.util.retry.Retry(False),
298 )
299 session.adapters.clear()
300 session.headers = requests.models.CaseInsensitiveDict({'Connection': 'keep-alive'})
301 session.mount('https://', http_adapter)
302 session.mount('http://', http_adapter)
303 session.cookies = cookiejar
304 session.trust_env = False # no need, we already load proxies from env
305 return session
306
307 def _send(self, request):
308
309 headers = self._merge_headers(request.headers)
310 add_accept_encoding_header(headers, SUPPORTED_ENCODINGS)
311
312 max_redirects_exceeded = False
313
52f5be1f 314 session = self._get_instance(cookiejar=self._get_cookiejar(request))
8a8b5452 315
316 try:
317 requests_res = session.request(
318 method=request.method,
319 url=request.url,
320 data=request.data,
321 headers=headers,
52f5be1f 322 timeout=self._calculate_timeout(request),
323 proxies=self._get_proxies(request),
8a8b5452 324 allow_redirects=True,
325 stream=True
326 )
327
328 except requests.exceptions.TooManyRedirects as e:
329 max_redirects_exceeded = True
330 requests_res = e.response
331
332 except requests.exceptions.SSLError as e:
333 if 'CERTIFICATE_VERIFY_FAILED' in str(e):
334 raise CertificateVerifyError(cause=e) from e
335 raise SSLError(cause=e) from e
336
337 except requests.exceptions.ProxyError as e:
338 raise ProxyError(cause=e) from e
339
340 except (requests.exceptions.ConnectionError, requests.exceptions.Timeout) as e:
341 raise TransportError(cause=e) from e
342
343 except urllib3.exceptions.HTTPError as e:
344 # Catch any urllib3 exceptions that may leak through
345 raise TransportError(cause=e) from e
346
347 except requests.exceptions.RequestException as e:
348 # Miscellaneous Requests exceptions. May not necessary be network related e.g. InvalidURL
349 raise RequestError(cause=e) from e
350
351 res = RequestsResponseAdapter(requests_res)
352
353 if not 200 <= res.status < 300:
354 raise HTTPError(res, redirect_loop=max_redirects_exceeded)
355
356 return res
357
358
359@register_preference(RequestsRH)
360def requests_preference(rh, request):
361 return 100
362
363
364# Use our socks proxy implementation with requests to avoid an extra dependency.
365class SocksHTTPConnection(urllib3.connection.HTTPConnection):
366 def __init__(self, _socks_options, *args, **kwargs): # must use _socks_options to pass PoolKey checks
367 self._proxy_args = _socks_options
368 super().__init__(*args, **kwargs)
369
370 def _new_conn(self):
371 try:
372 return create_connection(
373 address=(self._proxy_args['addr'], self._proxy_args['port']),
374 timeout=self.timeout,
375 source_address=self.source_address,
376 _create_socket_func=functools.partial(
377 create_socks_proxy_socket, (self.host, self.port), self._proxy_args))
378 except (socket.timeout, TimeoutError) as e:
379 raise urllib3.exceptions.ConnectTimeoutError(
380 self, f'Connection to {self.host} timed out. (connect timeout={self.timeout})') from e
381 except SocksProxyError as e:
382 raise urllib3.exceptions.ProxyError(str(e), e) from e
f9fb3ce8 383 except OSError as e:
8a8b5452 384 raise urllib3.exceptions.NewConnectionError(
385 self, f'Failed to establish a new connection: {e}') from e
386
387
388class SocksHTTPSConnection(SocksHTTPConnection, urllib3.connection.HTTPSConnection):
389 pass
390
391
392class SocksHTTPConnectionPool(urllib3.HTTPConnectionPool):
393 ConnectionCls = SocksHTTPConnection
394
395
396class SocksHTTPSConnectionPool(urllib3.HTTPSConnectionPool):
397 ConnectionCls = SocksHTTPSConnection
398
399
400class SocksProxyManager(urllib3.PoolManager):
401
402 def __init__(self, socks_proxy, username=None, password=None, num_pools=10, headers=None, **connection_pool_kw):
403 connection_pool_kw['_socks_options'] = make_socks_proxy_opts(socks_proxy)
404 super().__init__(num_pools, headers, **connection_pool_kw)
405 self.pool_classes_by_scheme = {
406 'http': SocksHTTPConnectionPool,
407 'https': SocksHTTPSConnectionPool
408 }
409
410
411requests.adapters.SOCKSProxyManager = SocksProxyManager