]> jfr.im git - yt-dlp.git/blame - yt_dlp/networking/_requests.py
[fd/hls] Apply `extra_param_to_key_url` from info dict
[yt-dlp.git] / yt_dlp / networking / _requests.py
CommitLineData
add96eb9 1from __future__ import annotations
2
8a8b5452 3import contextlib
4import functools
5import http.client
6import logging
7import re
8import socket
9import warnings
10
11from ..dependencies import brotli, requests, urllib3
12from ..utils import bug_reports_message, int_or_none, variadic
35f4f764 13from ..utils.networking import normalize_url
8a8b5452 14
15if requests is None:
16 raise ImportError('requests module is not installed')
17
18if urllib3 is None:
19 raise ImportError('urllib3 module is not installed')
20
21urllib3_version = tuple(int_or_none(x, default=0) for x in urllib3.__version__.split('.'))
22
23if urllib3_version < (1, 26, 17):
24 raise ImportError('Only urllib3 >= 1.26.17 is supported')
25
db50f19d 26if requests.__build__ < 0x023202:
27 raise ImportError('Only requests >= 2.32.2 is supported')
8a8b5452 28
29import requests.adapters
30import requests.utils
31import urllib3.connection
32import urllib3.exceptions
3f799953 33import urllib3.util
8a8b5452 34
35from ._helper import (
36 InstanceStoreMixin,
37 add_accept_encoding_header,
38 create_connection,
39 create_socks_proxy_socket,
40 get_redirect_method,
41 make_socks_proxy_opts,
42 select_proxy,
43)
44from .common import (
45 Features,
46 RequestHandler,
47 Response,
48 register_preference,
49 register_rh,
50)
51from .exceptions import (
52 CertificateVerifyError,
53 HTTPError,
54 IncompleteRead,
55 ProxyError,
56 RequestError,
57 SSLError,
58 TransportError,
59)
60from ..socks import ProxyError as SocksProxyError
61
62SUPPORTED_ENCODINGS = [
add96eb9 63 'gzip', 'deflate',
8a8b5452 64]
65
66if brotli is not None:
67 SUPPORTED_ENCODINGS.append('br')
68
add96eb9 69'''
8a8b5452 70Override urllib3's behavior to not convert lower-case percent-encoded characters
71to upper-case during url normalization process.
72
73RFC3986 defines that the lower or upper case percent-encoded hexidecimal characters are equivalent
74and normalizers should convert them to uppercase for consistency [1].
75
76However, some sites may have an incorrect implementation where they provide
77a percent-encoded url that is then compared case-sensitively.[2]
78
79While this is a very rare case, since urllib does not do this normalization step, it
80is best to avoid it in requests too for compatability reasons.
81
821: https://tools.ietf.org/html/rfc3986#section-2.1
832: https://github.com/streamlink/streamlink/pull/4003
add96eb9 84'''
8a8b5452 85
86
87class Urllib3PercentREOverride:
88 def __init__(self, r: re.Pattern):
89 self.re = r
90
91 # pass through all other attribute calls to the original re
92 def __getattr__(self, item):
93 return self.re.__getattribute__(item)
94
95 def subn(self, repl, string, *args, **kwargs):
96 return string, self.re.subn(repl, string, *args, **kwargs)[1]
97
98
99# urllib3 >= 1.25.8 uses subn:
100# https://github.com/urllib3/urllib3/commit/a2697e7c6b275f05879b60f593c5854a816489f0
add96eb9 101import urllib3.util.url
8a8b5452 102
103if hasattr(urllib3.util.url, 'PERCENT_RE'):
104 urllib3.util.url.PERCENT_RE = Urllib3PercentREOverride(urllib3.util.url.PERCENT_RE)
105elif hasattr(urllib3.util.url, '_PERCENT_RE'): # urllib3 >= 2.0.0
106 urllib3.util.url._PERCENT_RE = Urllib3PercentREOverride(urllib3.util.url._PERCENT_RE)
107else:
108 warnings.warn('Failed to patch PERCENT_RE in urllib3 (does the attribute exist?)' + bug_reports_message())
109
add96eb9 110'''
8a8b5452 111Workaround for issue in urllib.util.ssl_.py: ssl_wrap_context does not pass
112server_hostname to SSLContext.wrap_socket if server_hostname is an IP,
113however this is an issue because we set check_hostname to True in our SSLContext.
114
115Monkey-patching IS_SECURETRANSPORT forces ssl_wrap_context to pass server_hostname regardless.
116
117This has been fixed in urllib3 2.0+.
118See: https://github.com/urllib3/urllib3/issues/517
add96eb9 119'''
8a8b5452 120
121if urllib3_version < (2, 0, 0):
93240fc1 122 with contextlib.suppress(Exception):
8a8b5452 123 urllib3.util.IS_SECURETRANSPORT = urllib3.util.ssl_.IS_SECURETRANSPORT = True
124
125
126# Requests will not automatically handle no_proxy by default
127# due to buggy no_proxy handling with proxy dict [1].
128# 1. https://github.com/psf/requests/issues/5000
129requests.adapters.select_proxy = select_proxy
130
131
132class RequestsResponseAdapter(Response):
133 def __init__(self, res: requests.models.Response):
134 super().__init__(
135 fp=res.raw, headers=res.headers, url=res.url,
136 status=res.status_code, reason=res.reason)
137
138 self._requests_response = res
139
add96eb9 140 def read(self, amt: int | None = None):
8a8b5452 141 try:
142 # Interact with urllib3 response directly.
143 return self.fp.read(amt, decode_content=True)
144
145 # See urllib3.response.HTTPResponse.read() for exceptions raised on read
146 except urllib3.exceptions.SSLError as e:
147 raise SSLError(cause=e) from e
148
8a8b5452 149 except urllib3.exceptions.ProtocolError as e:
4e38e2ae 150 # IncompleteRead is always contained within ProtocolError
8a8b5452 151 # See urllib3.response.HTTPResponse._error_catcher()
152 ir_err = next(
153 (err for err in (e.__context__, e.__cause__, *variadic(e.args))
154 if isinstance(err, http.client.IncompleteRead)), None)
155 if ir_err is not None:
4e38e2ae
SS
156 # `urllib3.exceptions.IncompleteRead` is subclass of `http.client.IncompleteRead`
157 # but uses an `int` for its `partial` property.
158 partial = ir_err.partial if isinstance(ir_err.partial, int) else len(ir_err.partial)
159 raise IncompleteRead(partial=partial, expected=ir_err.expected) from e
8a8b5452 160 raise TransportError(cause=e) from e
161
162 except urllib3.exceptions.HTTPError as e:
163 # catch-all for any other urllib3 response exceptions
164 raise TransportError(cause=e) from e
165
166
167class RequestsHTTPAdapter(requests.adapters.HTTPAdapter):
168 def __init__(self, ssl_context=None, proxy_ssl_context=None, source_address=None, **kwargs):
169 self._pm_args = {}
170 if ssl_context:
171 self._pm_args['ssl_context'] = ssl_context
172 if source_address:
173 self._pm_args['source_address'] = (source_address, 0)
174 self._proxy_ssl_context = proxy_ssl_context or ssl_context
175 super().__init__(**kwargs)
176
177 def init_poolmanager(self, *args, **kwargs):
178 return super().init_poolmanager(*args, **kwargs, **self._pm_args)
179
180 def proxy_manager_for(self, proxy, **proxy_kwargs):
181 extra_kwargs = {}
182 if not proxy.lower().startswith('socks') and self._proxy_ssl_context:
183 extra_kwargs['proxy_ssl_context'] = self._proxy_ssl_context
184 return super().proxy_manager_for(proxy, **proxy_kwargs, **self._pm_args, **extra_kwargs)
185
3f799953 186 # Skip `requests` internal verification; we use our own SSLContext
8a8b5452 187 def cert_verify(*args, **kwargs):
8a8b5452 188 pass
189
3f799953
SS
190 # requests 2.32.2+: Reimplementation without `_urllib3_request_context`
191 def get_connection_with_tls_context(self, request, verify, proxies=None, cert=None):
192 url = urllib3.util.parse_url(request.url).url
193
194 manager = self.poolmanager
195 if proxy := select_proxy(url, proxies):
196 manager = self.proxy_manager_for(proxy)
197
198 return manager.connection_from_url(url)
199
8a8b5452 200
201class RequestsSession(requests.sessions.Session):
202 """
203 Ensure unified redirect method handling with our urllib redirect handler.
204 """
f9fb3ce8 205
8a8b5452 206 def rebuild_method(self, prepared_request, response):
207 new_method = get_redirect_method(prepared_request.method, response.status_code)
208
209 # HACK: requests removes headers/body on redirect unless code was a 307/308.
210 if new_method == prepared_request.method:
211 response._real_status_code = response.status_code
212 response.status_code = 308
213
214 prepared_request.method = new_method
215
35f4f764 216 # Requests fails to resolve dot segments on absolute redirect locations
217 # See: https://github.com/yt-dlp/yt-dlp/issues/9020
218 prepared_request.url = normalize_url(prepared_request.url)
219
8a8b5452 220 def rebuild_auth(self, prepared_request, response):
221 # HACK: undo status code change from rebuild_method, if applicable.
222 # rebuild_auth runs after requests would remove headers/body based on status code
223 if hasattr(response, '_real_status_code'):
224 response.status_code = response._real_status_code
225 del response._real_status_code
226 return super().rebuild_auth(prepared_request, response)
227
228
229class Urllib3LoggingFilter(logging.Filter):
230
231 def filter(self, record):
232 # Ignore HTTP request messages since HTTPConnection prints those
233 if record.msg == '%s://%s:%s "%s %s %s" %s %s':
234 return False
235 return True
236
237
238class Urllib3LoggingHandler(logging.Handler):
239 """Redirect urllib3 logs to our logger"""
f9fb3ce8 240
8a8b5452 241 def __init__(self, logger, *args, **kwargs):
242 super().__init__(*args, **kwargs)
243 self._logger = logger
244
245 def emit(self, record):
246 try:
247 msg = self.format(record)
248 if record.levelno >= logging.ERROR:
249 self._logger.error(msg)
250 else:
251 self._logger.stdout(msg)
252
253 except Exception:
254 self.handleError(record)
255
256
257@register_rh
258class RequestsRH(RequestHandler, InstanceStoreMixin):
259
260 """Requests RequestHandler
261 https://github.com/psf/requests
262 """
263 _SUPPORTED_URL_SCHEMES = ('http', 'https')
264 _SUPPORTED_ENCODINGS = tuple(SUPPORTED_ENCODINGS)
265 _SUPPORTED_PROXY_SCHEMES = ('http', 'https', 'socks4', 'socks4a', 'socks5', 'socks5h')
266 _SUPPORTED_FEATURES = (Features.NO_PROXY, Features.ALL_PROXY)
267 RH_NAME = 'requests'
268
269 def __init__(self, *args, **kwargs):
270 super().__init__(*args, **kwargs)
271
272 # Forward urllib3 debug messages to our logger
273 logger = logging.getLogger('urllib3')
0085e2ba 274 self.__logging_handler = Urllib3LoggingHandler(logger=self._logger)
275 self.__logging_handler.setFormatter(logging.Formatter('requests: %(message)s'))
276 self.__logging_handler.addFilter(Urllib3LoggingFilter())
277 logger.addHandler(self.__logging_handler)
b012271d
SS
278 # TODO: Use a logger filter to suppress pool reuse warning instead
279 logger.setLevel(logging.ERROR)
8a8b5452 280
281 if self.verbose:
282 # Setting this globally is not ideal, but is easier than hacking with urllib3.
283 # It could technically be problematic for scripts embedding yt-dlp.
284 # However, it is unlikely debug traffic is used in that context in a way this will cause problems.
285 urllib3.connection.HTTPConnection.debuglevel = 1
286 logger.setLevel(logging.DEBUG)
287 # this is expected if we are using --no-check-certificate
288 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
289
290 def close(self):
291 self._clear_instances()
0085e2ba 292 # Remove the logging handler that contains a reference to our logger
293 # See: https://github.com/yt-dlp/yt-dlp/issues/8922
294 logging.getLogger('urllib3').removeHandler(self.__logging_handler)
8a8b5452 295
296 def _check_extensions(self, extensions):
297 super()._check_extensions(extensions)
298 extensions.pop('cookiejar', None)
299 extensions.pop('timeout', None)
300
301 def _create_instance(self, cookiejar):
302 session = RequestsSession()
303 http_adapter = RequestsHTTPAdapter(
304 ssl_context=self._make_sslcontext(),
305 source_address=self.source_address,
306 max_retries=urllib3.util.retry.Retry(False),
307 )
308 session.adapters.clear()
309 session.headers = requests.models.CaseInsensitiveDict({'Connection': 'keep-alive'})
310 session.mount('https://', http_adapter)
311 session.mount('http://', http_adapter)
312 session.cookies = cookiejar
313 session.trust_env = False # no need, we already load proxies from env
314 return session
315
316 def _send(self, request):
317
318 headers = self._merge_headers(request.headers)
319 add_accept_encoding_header(headers, SUPPORTED_ENCODINGS)
320
321 max_redirects_exceeded = False
322
52f5be1f 323 session = self._get_instance(cookiejar=self._get_cookiejar(request))
8a8b5452 324
325 try:
326 requests_res = session.request(
327 method=request.method,
328 url=request.url,
329 data=request.data,
330 headers=headers,
52f5be1f 331 timeout=self._calculate_timeout(request),
332 proxies=self._get_proxies(request),
8a8b5452 333 allow_redirects=True,
add96eb9 334 stream=True,
8a8b5452 335 )
336
337 except requests.exceptions.TooManyRedirects as e:
338 max_redirects_exceeded = True
339 requests_res = e.response
340
341 except requests.exceptions.SSLError as e:
342 if 'CERTIFICATE_VERIFY_FAILED' in str(e):
343 raise CertificateVerifyError(cause=e) from e
344 raise SSLError(cause=e) from e
345
346 except requests.exceptions.ProxyError as e:
347 raise ProxyError(cause=e) from e
348
349 except (requests.exceptions.ConnectionError, requests.exceptions.Timeout) as e:
350 raise TransportError(cause=e) from e
351
352 except urllib3.exceptions.HTTPError as e:
353 # Catch any urllib3 exceptions that may leak through
354 raise TransportError(cause=e) from e
355
356 except requests.exceptions.RequestException as e:
357 # Miscellaneous Requests exceptions. May not necessary be network related e.g. InvalidURL
358 raise RequestError(cause=e) from e
359
360 res = RequestsResponseAdapter(requests_res)
361
362 if not 200 <= res.status < 300:
363 raise HTTPError(res, redirect_loop=max_redirects_exceeded)
364
365 return res
366
367
368@register_preference(RequestsRH)
369def requests_preference(rh, request):
370 return 100
371
372
373# Use our socks proxy implementation with requests to avoid an extra dependency.
374class SocksHTTPConnection(urllib3.connection.HTTPConnection):
375 def __init__(self, _socks_options, *args, **kwargs): # must use _socks_options to pass PoolKey checks
376 self._proxy_args = _socks_options
377 super().__init__(*args, **kwargs)
378
379 def _new_conn(self):
380 try:
381 return create_connection(
382 address=(self._proxy_args['addr'], self._proxy_args['port']),
383 timeout=self.timeout,
384 source_address=self.source_address,
385 _create_socket_func=functools.partial(
386 create_socks_proxy_socket, (self.host, self.port), self._proxy_args))
387 except (socket.timeout, TimeoutError) as e:
388 raise urllib3.exceptions.ConnectTimeoutError(
389 self, f'Connection to {self.host} timed out. (connect timeout={self.timeout})') from e
390 except SocksProxyError as e:
391 raise urllib3.exceptions.ProxyError(str(e), e) from e
f9fb3ce8 392 except OSError as e:
8a8b5452 393 raise urllib3.exceptions.NewConnectionError(
394 self, f'Failed to establish a new connection: {e}') from e
395
396
397class SocksHTTPSConnection(SocksHTTPConnection, urllib3.connection.HTTPSConnection):
398 pass
399
400
401class SocksHTTPConnectionPool(urllib3.HTTPConnectionPool):
402 ConnectionCls = SocksHTTPConnection
403
404
405class SocksHTTPSConnectionPool(urllib3.HTTPSConnectionPool):
406 ConnectionCls = SocksHTTPSConnection
407
408
409class SocksProxyManager(urllib3.PoolManager):
410
411 def __init__(self, socks_proxy, username=None, password=None, num_pools=10, headers=None, **connection_pool_kw):
412 connection_pool_kw['_socks_options'] = make_socks_proxy_opts(socks_proxy)
413 super().__init__(num_pools, headers, **connection_pool_kw)
414 self.pool_classes_by_scheme = {
415 'http': SocksHTTPConnectionPool,
add96eb9 416 'https': SocksHTTPSConnectionPool,
8a8b5452 417 }
418
419
420requests.adapters.SOCKSProxyManager = SocksProxyManager