]>
Commit | Line | Data |
---|---|---|
add96eb9 | 1 | from __future__ import annotations |
2 | ||
8a8b5452 | 3 | import contextlib |
4 | import functools | |
5 | import http.client | |
6 | import logging | |
7 | import re | |
8 | import socket | |
9 | import warnings | |
10 | ||
11 | from ..dependencies import brotli, requests, urllib3 | |
12 | from ..utils import bug_reports_message, int_or_none, variadic | |
35f4f764 | 13 | from ..utils.networking import normalize_url |
8a8b5452 | 14 | |
15 | if requests is None: | |
16 | raise ImportError('requests module is not installed') | |
17 | ||
18 | if urllib3 is None: | |
19 | raise ImportError('urllib3 module is not installed') | |
20 | ||
21 | urllib3_version = tuple(int_or_none(x, default=0) for x in urllib3.__version__.split('.')) | |
22 | ||
23 | if urllib3_version < (1, 26, 17): | |
24 | raise ImportError('Only urllib3 >= 1.26.17 is supported') | |
25 | ||
db50f19d | 26 | if requests.__build__ < 0x023202: |
27 | raise ImportError('Only requests >= 2.32.2 is supported') | |
8a8b5452 | 28 | |
29 | import requests.adapters | |
30 | import requests.utils | |
31 | import urllib3.connection | |
32 | import urllib3.exceptions | |
3f799953 | 33 | import urllib3.util |
8a8b5452 | 34 | |
35 | from ._helper import ( | |
36 | InstanceStoreMixin, | |
37 | add_accept_encoding_header, | |
38 | create_connection, | |
39 | create_socks_proxy_socket, | |
40 | get_redirect_method, | |
41 | make_socks_proxy_opts, | |
42 | select_proxy, | |
43 | ) | |
44 | from .common import ( | |
45 | Features, | |
46 | RequestHandler, | |
47 | Response, | |
48 | register_preference, | |
49 | register_rh, | |
50 | ) | |
51 | from .exceptions import ( | |
52 | CertificateVerifyError, | |
53 | HTTPError, | |
54 | IncompleteRead, | |
55 | ProxyError, | |
56 | RequestError, | |
57 | SSLError, | |
58 | TransportError, | |
59 | ) | |
60 | from ..socks import ProxyError as SocksProxyError | |
61 | ||
62 | SUPPORTED_ENCODINGS = [ | |
add96eb9 | 63 | 'gzip', 'deflate', |
8a8b5452 | 64 | ] |
65 | ||
66 | if brotli is not None: | |
67 | SUPPORTED_ENCODINGS.append('br') | |
68 | ||
add96eb9 | 69 | ''' |
8a8b5452 | 70 | Override urllib3's behavior to not convert lower-case percent-encoded characters |
71 | to upper-case during url normalization process. | |
72 | ||
73 | RFC3986 defines that the lower or upper case percent-encoded hexidecimal characters are equivalent | |
74 | and normalizers should convert them to uppercase for consistency [1]. | |
75 | ||
76 | However, some sites may have an incorrect implementation where they provide | |
77 | a percent-encoded url that is then compared case-sensitively.[2] | |
78 | ||
79 | While this is a very rare case, since urllib does not do this normalization step, it | |
80 | is best to avoid it in requests too for compatability reasons. | |
81 | ||
82 | 1: https://tools.ietf.org/html/rfc3986#section-2.1 | |
83 | 2: https://github.com/streamlink/streamlink/pull/4003 | |
add96eb9 | 84 | ''' |
8a8b5452 | 85 | |
86 | ||
87 | class Urllib3PercentREOverride: | |
88 | def __init__(self, r: re.Pattern): | |
89 | self.re = r | |
90 | ||
91 | # pass through all other attribute calls to the original re | |
92 | def __getattr__(self, item): | |
93 | return self.re.__getattribute__(item) | |
94 | ||
95 | def subn(self, repl, string, *args, **kwargs): | |
96 | return string, self.re.subn(repl, string, *args, **kwargs)[1] | |
97 | ||
98 | ||
99 | # urllib3 >= 1.25.8 uses subn: | |
100 | # https://github.com/urllib3/urllib3/commit/a2697e7c6b275f05879b60f593c5854a816489f0 | |
add96eb9 | 101 | import urllib3.util.url |
8a8b5452 | 102 | |
103 | if hasattr(urllib3.util.url, 'PERCENT_RE'): | |
104 | urllib3.util.url.PERCENT_RE = Urllib3PercentREOverride(urllib3.util.url.PERCENT_RE) | |
105 | elif hasattr(urllib3.util.url, '_PERCENT_RE'): # urllib3 >= 2.0.0 | |
106 | urllib3.util.url._PERCENT_RE = Urllib3PercentREOverride(urllib3.util.url._PERCENT_RE) | |
107 | else: | |
108 | warnings.warn('Failed to patch PERCENT_RE in urllib3 (does the attribute exist?)' + bug_reports_message()) | |
109 | ||
add96eb9 | 110 | ''' |
8a8b5452 | 111 | Workaround for issue in urllib.util.ssl_.py: ssl_wrap_context does not pass |
112 | server_hostname to SSLContext.wrap_socket if server_hostname is an IP, | |
113 | however this is an issue because we set check_hostname to True in our SSLContext. | |
114 | ||
115 | Monkey-patching IS_SECURETRANSPORT forces ssl_wrap_context to pass server_hostname regardless. | |
116 | ||
117 | This has been fixed in urllib3 2.0+. | |
118 | See: https://github.com/urllib3/urllib3/issues/517 | |
add96eb9 | 119 | ''' |
8a8b5452 | 120 | |
121 | if urllib3_version < (2, 0, 0): | |
93240fc1 | 122 | with contextlib.suppress(Exception): |
8a8b5452 | 123 | urllib3.util.IS_SECURETRANSPORT = urllib3.util.ssl_.IS_SECURETRANSPORT = True |
124 | ||
125 | ||
126 | # Requests will not automatically handle no_proxy by default | |
127 | # due to buggy no_proxy handling with proxy dict [1]. | |
128 | # 1. https://github.com/psf/requests/issues/5000 | |
129 | requests.adapters.select_proxy = select_proxy | |
130 | ||
131 | ||
132 | class RequestsResponseAdapter(Response): | |
133 | def __init__(self, res: requests.models.Response): | |
134 | super().__init__( | |
135 | fp=res.raw, headers=res.headers, url=res.url, | |
136 | status=res.status_code, reason=res.reason) | |
137 | ||
138 | self._requests_response = res | |
139 | ||
add96eb9 | 140 | def read(self, amt: int | None = None): |
8a8b5452 | 141 | try: |
142 | # Interact with urllib3 response directly. | |
143 | return self.fp.read(amt, decode_content=True) | |
144 | ||
145 | # See urllib3.response.HTTPResponse.read() for exceptions raised on read | |
146 | except urllib3.exceptions.SSLError as e: | |
147 | raise SSLError(cause=e) from e | |
148 | ||
8a8b5452 | 149 | except urllib3.exceptions.ProtocolError as e: |
4e38e2ae | 150 | # IncompleteRead is always contained within ProtocolError |
8a8b5452 | 151 | # See urllib3.response.HTTPResponse._error_catcher() |
152 | ir_err = next( | |
153 | (err for err in (e.__context__, e.__cause__, *variadic(e.args)) | |
154 | if isinstance(err, http.client.IncompleteRead)), None) | |
155 | if ir_err is not None: | |
4e38e2ae SS |
156 | # `urllib3.exceptions.IncompleteRead` is subclass of `http.client.IncompleteRead` |
157 | # but uses an `int` for its `partial` property. | |
158 | partial = ir_err.partial if isinstance(ir_err.partial, int) else len(ir_err.partial) | |
159 | raise IncompleteRead(partial=partial, expected=ir_err.expected) from e | |
8a8b5452 | 160 | raise TransportError(cause=e) from e |
161 | ||
162 | except urllib3.exceptions.HTTPError as e: | |
163 | # catch-all for any other urllib3 response exceptions | |
164 | raise TransportError(cause=e) from e | |
165 | ||
166 | ||
167 | class RequestsHTTPAdapter(requests.adapters.HTTPAdapter): | |
168 | def __init__(self, ssl_context=None, proxy_ssl_context=None, source_address=None, **kwargs): | |
169 | self._pm_args = {} | |
170 | if ssl_context: | |
171 | self._pm_args['ssl_context'] = ssl_context | |
172 | if source_address: | |
173 | self._pm_args['source_address'] = (source_address, 0) | |
174 | self._proxy_ssl_context = proxy_ssl_context or ssl_context | |
175 | super().__init__(**kwargs) | |
176 | ||
177 | def init_poolmanager(self, *args, **kwargs): | |
178 | return super().init_poolmanager(*args, **kwargs, **self._pm_args) | |
179 | ||
180 | def proxy_manager_for(self, proxy, **proxy_kwargs): | |
181 | extra_kwargs = {} | |
182 | if not proxy.lower().startswith('socks') and self._proxy_ssl_context: | |
183 | extra_kwargs['proxy_ssl_context'] = self._proxy_ssl_context | |
184 | return super().proxy_manager_for(proxy, **proxy_kwargs, **self._pm_args, **extra_kwargs) | |
185 | ||
3f799953 | 186 | # Skip `requests` internal verification; we use our own SSLContext |
8a8b5452 | 187 | def cert_verify(*args, **kwargs): |
8a8b5452 | 188 | pass |
189 | ||
3f799953 SS |
190 | # requests 2.32.2+: Reimplementation without `_urllib3_request_context` |
191 | def get_connection_with_tls_context(self, request, verify, proxies=None, cert=None): | |
192 | url = urllib3.util.parse_url(request.url).url | |
193 | ||
194 | manager = self.poolmanager | |
195 | if proxy := select_proxy(url, proxies): | |
196 | manager = self.proxy_manager_for(proxy) | |
197 | ||
198 | return manager.connection_from_url(url) | |
199 | ||
8a8b5452 | 200 | |
201 | class RequestsSession(requests.sessions.Session): | |
202 | """ | |
203 | Ensure unified redirect method handling with our urllib redirect handler. | |
204 | """ | |
f9fb3ce8 | 205 | |
8a8b5452 | 206 | def rebuild_method(self, prepared_request, response): |
207 | new_method = get_redirect_method(prepared_request.method, response.status_code) | |
208 | ||
209 | # HACK: requests removes headers/body on redirect unless code was a 307/308. | |
210 | if new_method == prepared_request.method: | |
211 | response._real_status_code = response.status_code | |
212 | response.status_code = 308 | |
213 | ||
214 | prepared_request.method = new_method | |
215 | ||
35f4f764 | 216 | # Requests fails to resolve dot segments on absolute redirect locations |
217 | # See: https://github.com/yt-dlp/yt-dlp/issues/9020 | |
218 | prepared_request.url = normalize_url(prepared_request.url) | |
219 | ||
8a8b5452 | 220 | def rebuild_auth(self, prepared_request, response): |
221 | # HACK: undo status code change from rebuild_method, if applicable. | |
222 | # rebuild_auth runs after requests would remove headers/body based on status code | |
223 | if hasattr(response, '_real_status_code'): | |
224 | response.status_code = response._real_status_code | |
225 | del response._real_status_code | |
226 | return super().rebuild_auth(prepared_request, response) | |
227 | ||
228 | ||
229 | class Urllib3LoggingFilter(logging.Filter): | |
230 | ||
231 | def filter(self, record): | |
232 | # Ignore HTTP request messages since HTTPConnection prints those | |
233 | if record.msg == '%s://%s:%s "%s %s %s" %s %s': | |
234 | return False | |
235 | return True | |
236 | ||
237 | ||
238 | class Urllib3LoggingHandler(logging.Handler): | |
239 | """Redirect urllib3 logs to our logger""" | |
f9fb3ce8 | 240 | |
8a8b5452 | 241 | def __init__(self, logger, *args, **kwargs): |
242 | super().__init__(*args, **kwargs) | |
243 | self._logger = logger | |
244 | ||
245 | def emit(self, record): | |
246 | try: | |
247 | msg = self.format(record) | |
248 | if record.levelno >= logging.ERROR: | |
249 | self._logger.error(msg) | |
250 | else: | |
251 | self._logger.stdout(msg) | |
252 | ||
253 | except Exception: | |
254 | self.handleError(record) | |
255 | ||
256 | ||
257 | @register_rh | |
258 | class RequestsRH(RequestHandler, InstanceStoreMixin): | |
259 | ||
260 | """Requests RequestHandler | |
261 | https://github.com/psf/requests | |
262 | """ | |
263 | _SUPPORTED_URL_SCHEMES = ('http', 'https') | |
264 | _SUPPORTED_ENCODINGS = tuple(SUPPORTED_ENCODINGS) | |
265 | _SUPPORTED_PROXY_SCHEMES = ('http', 'https', 'socks4', 'socks4a', 'socks5', 'socks5h') | |
266 | _SUPPORTED_FEATURES = (Features.NO_PROXY, Features.ALL_PROXY) | |
267 | RH_NAME = 'requests' | |
268 | ||
269 | def __init__(self, *args, **kwargs): | |
270 | super().__init__(*args, **kwargs) | |
271 | ||
272 | # Forward urllib3 debug messages to our logger | |
273 | logger = logging.getLogger('urllib3') | |
0085e2ba | 274 | self.__logging_handler = Urllib3LoggingHandler(logger=self._logger) |
275 | self.__logging_handler.setFormatter(logging.Formatter('requests: %(message)s')) | |
276 | self.__logging_handler.addFilter(Urllib3LoggingFilter()) | |
277 | logger.addHandler(self.__logging_handler) | |
b012271d SS |
278 | # TODO: Use a logger filter to suppress pool reuse warning instead |
279 | logger.setLevel(logging.ERROR) | |
8a8b5452 | 280 | |
281 | if self.verbose: | |
282 | # Setting this globally is not ideal, but is easier than hacking with urllib3. | |
283 | # It could technically be problematic for scripts embedding yt-dlp. | |
284 | # However, it is unlikely debug traffic is used in that context in a way this will cause problems. | |
285 | urllib3.connection.HTTPConnection.debuglevel = 1 | |
286 | logger.setLevel(logging.DEBUG) | |
287 | # this is expected if we are using --no-check-certificate | |
288 | urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) | |
289 | ||
290 | def close(self): | |
291 | self._clear_instances() | |
0085e2ba | 292 | # Remove the logging handler that contains a reference to our logger |
293 | # See: https://github.com/yt-dlp/yt-dlp/issues/8922 | |
294 | logging.getLogger('urllib3').removeHandler(self.__logging_handler) | |
8a8b5452 | 295 | |
296 | def _check_extensions(self, extensions): | |
297 | super()._check_extensions(extensions) | |
298 | extensions.pop('cookiejar', None) | |
299 | extensions.pop('timeout', None) | |
300 | ||
301 | def _create_instance(self, cookiejar): | |
302 | session = RequestsSession() | |
303 | http_adapter = RequestsHTTPAdapter( | |
304 | ssl_context=self._make_sslcontext(), | |
305 | source_address=self.source_address, | |
306 | max_retries=urllib3.util.retry.Retry(False), | |
307 | ) | |
308 | session.adapters.clear() | |
309 | session.headers = requests.models.CaseInsensitiveDict({'Connection': 'keep-alive'}) | |
310 | session.mount('https://', http_adapter) | |
311 | session.mount('http://', http_adapter) | |
312 | session.cookies = cookiejar | |
313 | session.trust_env = False # no need, we already load proxies from env | |
314 | return session | |
315 | ||
316 | def _send(self, request): | |
317 | ||
318 | headers = self._merge_headers(request.headers) | |
319 | add_accept_encoding_header(headers, SUPPORTED_ENCODINGS) | |
320 | ||
321 | max_redirects_exceeded = False | |
322 | ||
52f5be1f | 323 | session = self._get_instance(cookiejar=self._get_cookiejar(request)) |
8a8b5452 | 324 | |
325 | try: | |
326 | requests_res = session.request( | |
327 | method=request.method, | |
328 | url=request.url, | |
329 | data=request.data, | |
330 | headers=headers, | |
52f5be1f | 331 | timeout=self._calculate_timeout(request), |
332 | proxies=self._get_proxies(request), | |
8a8b5452 | 333 | allow_redirects=True, |
add96eb9 | 334 | stream=True, |
8a8b5452 | 335 | ) |
336 | ||
337 | except requests.exceptions.TooManyRedirects as e: | |
338 | max_redirects_exceeded = True | |
339 | requests_res = e.response | |
340 | ||
341 | except requests.exceptions.SSLError as e: | |
342 | if 'CERTIFICATE_VERIFY_FAILED' in str(e): | |
343 | raise CertificateVerifyError(cause=e) from e | |
344 | raise SSLError(cause=e) from e | |
345 | ||
346 | except requests.exceptions.ProxyError as e: | |
347 | raise ProxyError(cause=e) from e | |
348 | ||
349 | except (requests.exceptions.ConnectionError, requests.exceptions.Timeout) as e: | |
350 | raise TransportError(cause=e) from e | |
351 | ||
352 | except urllib3.exceptions.HTTPError as e: | |
353 | # Catch any urllib3 exceptions that may leak through | |
354 | raise TransportError(cause=e) from e | |
355 | ||
356 | except requests.exceptions.RequestException as e: | |
357 | # Miscellaneous Requests exceptions. May not necessary be network related e.g. InvalidURL | |
358 | raise RequestError(cause=e) from e | |
359 | ||
360 | res = RequestsResponseAdapter(requests_res) | |
361 | ||
362 | if not 200 <= res.status < 300: | |
363 | raise HTTPError(res, redirect_loop=max_redirects_exceeded) | |
364 | ||
365 | return res | |
366 | ||
367 | ||
368 | @register_preference(RequestsRH) | |
369 | def requests_preference(rh, request): | |
370 | return 100 | |
371 | ||
372 | ||
373 | # Use our socks proxy implementation with requests to avoid an extra dependency. | |
374 | class SocksHTTPConnection(urllib3.connection.HTTPConnection): | |
375 | def __init__(self, _socks_options, *args, **kwargs): # must use _socks_options to pass PoolKey checks | |
376 | self._proxy_args = _socks_options | |
377 | super().__init__(*args, **kwargs) | |
378 | ||
379 | def _new_conn(self): | |
380 | try: | |
381 | return create_connection( | |
382 | address=(self._proxy_args['addr'], self._proxy_args['port']), | |
383 | timeout=self.timeout, | |
384 | source_address=self.source_address, | |
385 | _create_socket_func=functools.partial( | |
386 | create_socks_proxy_socket, (self.host, self.port), self._proxy_args)) | |
387 | except (socket.timeout, TimeoutError) as e: | |
388 | raise urllib3.exceptions.ConnectTimeoutError( | |
389 | self, f'Connection to {self.host} timed out. (connect timeout={self.timeout})') from e | |
390 | except SocksProxyError as e: | |
391 | raise urllib3.exceptions.ProxyError(str(e), e) from e | |
f9fb3ce8 | 392 | except OSError as e: |
8a8b5452 | 393 | raise urllib3.exceptions.NewConnectionError( |
394 | self, f'Failed to establish a new connection: {e}') from e | |
395 | ||
396 | ||
397 | class SocksHTTPSConnection(SocksHTTPConnection, urllib3.connection.HTTPSConnection): | |
398 | pass | |
399 | ||
400 | ||
401 | class SocksHTTPConnectionPool(urllib3.HTTPConnectionPool): | |
402 | ConnectionCls = SocksHTTPConnection | |
403 | ||
404 | ||
405 | class SocksHTTPSConnectionPool(urllib3.HTTPSConnectionPool): | |
406 | ConnectionCls = SocksHTTPSConnection | |
407 | ||
408 | ||
409 | class SocksProxyManager(urllib3.PoolManager): | |
410 | ||
411 | def __init__(self, socks_proxy, username=None, password=None, num_pools=10, headers=None, **connection_pool_kw): | |
412 | connection_pool_kw['_socks_options'] = make_socks_proxy_opts(socks_proxy) | |
413 | super().__init__(num_pools, headers, **connection_pool_kw) | |
414 | self.pool_classes_by_scheme = { | |
415 | 'http': SocksHTTPConnectionPool, | |
add96eb9 | 416 | 'https': SocksHTTPSConnectionPool, |
8a8b5452 | 417 | } |
418 | ||
419 | ||
420 | requests.adapters.SOCKSProxyManager = SocksProxyManager |