9 from ..dependencies
import brotli
, requests
, urllib3
10 from ..utils
import bug_reports_message
, int_or_none
, variadic
11 from ..utils
.networking
import normalize_url
14 raise ImportError('requests module is not installed')
17 raise ImportError('urllib3 module is not installed')
19 urllib3_version
= tuple(int_or_none(x
, default
=0) for x
in urllib3
.__version__
.split('.'))
21 if urllib3_version
< (1, 26, 17):
22 raise ImportError('Only urllib3 >= 1.26.17 is supported')
24 if requests
.__build
__ < 0x023100:
25 raise ImportError('Only requests >= 2.31.0 is supported')
27 import requests
.adapters
29 import urllib3
.connection
30 import urllib3
.exceptions
33 from ._helper
import (
35 add_accept_encoding_header
,
37 create_socks_proxy_socket
,
39 make_socks_proxy_opts
,
49 from .exceptions
import (
50 CertificateVerifyError
,
58 from ..socks
import ProxyError
as SocksProxyError
60 SUPPORTED_ENCODINGS
= [
64 if brotli
is not None:
65 SUPPORTED_ENCODINGS
.append('br')
68 Override urllib3's behavior to not convert lower-case percent-encoded characters
69 to upper-case during url normalization process.
71 RFC3986 defines that the lower or upper case percent-encoded hexidecimal characters are equivalent
72 and normalizers should convert them to uppercase for consistency [1].
74 However, some sites may have an incorrect implementation where they provide
75 a percent-encoded url that is then compared case-sensitively.[2]
77 While this is a very rare case, since urllib does not do this normalization step, it
78 is best to avoid it in requests too for compatability reasons.
80 1: https://tools.ietf.org/html/rfc3986#section-2.1
81 2: https://github.com/streamlink/streamlink/pull/4003
85 class Urllib3PercentREOverride
:
86 def __init__(self
, r
: re
.Pattern
):
89 # pass through all other attribute calls to the original re
90 def __getattr__(self
, item
):
91 return self
.re
.__getattribute
__(item
)
93 def subn(self
, repl
, string
, *args
, **kwargs
):
94 return string
, self
.re
.subn(repl
, string
, *args
, **kwargs
)[1]
97 # urllib3 >= 1.25.8 uses subn:
98 # https://github.com/urllib3/urllib3/commit/a2697e7c6b275f05879b60f593c5854a816489f0
99 import urllib3
.util
.url
# noqa: E305
101 if hasattr(urllib3
.util
.url
, 'PERCENT_RE'):
102 urllib3
.util
.url
.PERCENT_RE
= Urllib3PercentREOverride(urllib3
.util
.url
.PERCENT_RE
)
103 elif hasattr(urllib3
.util
.url
, '_PERCENT_RE'): # urllib3 >= 2.0.0
104 urllib3
.util
.url
._PERCENT
_RE
= Urllib3PercentREOverride(urllib3
.util
.url
._PERCENT
_RE
)
106 warnings
.warn('Failed to patch PERCENT_RE in urllib3 (does the attribute exist?)' + bug_reports_message())
109 Workaround for issue in urllib.util.ssl_.py: ssl_wrap_context does not pass
110 server_hostname to SSLContext.wrap_socket if server_hostname is an IP,
111 however this is an issue because we set check_hostname to True in our SSLContext.
113 Monkey-patching IS_SECURETRANSPORT forces ssl_wrap_context to pass server_hostname regardless.
115 This has been fixed in urllib3 2.0+.
116 See: https://github.com/urllib3/urllib3/issues/517
119 if urllib3_version
< (2, 0, 0):
120 with contextlib
.suppress(Exception):
121 urllib3
.util
.IS_SECURETRANSPORT
= urllib3
.util
.ssl_
.IS_SECURETRANSPORT
= True
124 # Requests will not automatically handle no_proxy by default
125 # due to buggy no_proxy handling with proxy dict [1].
126 # 1. https://github.com/psf/requests/issues/5000
127 requests
.adapters
.select_proxy
= select_proxy
130 class RequestsResponseAdapter(Response
):
131 def __init__(self
, res
: requests
.models
.Response
):
133 fp
=res
.raw
, headers
=res
.headers
, url
=res
.url
,
134 status
=res
.status_code
, reason
=res
.reason
)
136 self
._requests
_response
= res
138 def read(self
, amt
: int = None):
140 # Interact with urllib3 response directly.
141 return self
.fp
.read(amt
, decode_content
=True)
143 # See urllib3.response.HTTPResponse.read() for exceptions raised on read
144 except urllib3
.exceptions
.SSLError
as e
:
145 raise SSLError(cause
=e
) from e
147 except urllib3
.exceptions
.ProtocolError
as e
:
148 # IncompleteRead is always contained within ProtocolError
149 # See urllib3.response.HTTPResponse._error_catcher()
151 (err
for err
in (e
.__context
__, e
.__cause
__, *variadic(e
.args
))
152 if isinstance(err
, http
.client
.IncompleteRead
)), None)
153 if ir_err
is not None:
154 # `urllib3.exceptions.IncompleteRead` is subclass of `http.client.IncompleteRead`
155 # but uses an `int` for its `partial` property.
156 partial
= ir_err
.partial
if isinstance(ir_err
.partial
, int) else len(ir_err
.partial
)
157 raise IncompleteRead(partial
=partial
, expected
=ir_err
.expected
) from e
158 raise TransportError(cause
=e
) from e
160 except urllib3
.exceptions
.HTTPError
as e
:
161 # catch-all for any other urllib3 response exceptions
162 raise TransportError(cause
=e
) from e
165 class RequestsHTTPAdapter(requests
.adapters
.HTTPAdapter
):
166 def __init__(self
, ssl_context
=None, proxy_ssl_context
=None, source_address
=None, **kwargs
):
169 self
._pm
_args
['ssl_context'] = ssl_context
171 self
._pm
_args
['source_address'] = (source_address
, 0)
172 self
._proxy
_ssl
_context
= proxy_ssl_context
or ssl_context
173 super().__init
__(**kwargs
)
175 def init_poolmanager(self
, *args
, **kwargs
):
176 return super().init_poolmanager(*args
, **kwargs
, **self
._pm
_args
)
178 def proxy_manager_for(self
, proxy
, **proxy_kwargs
):
180 if not proxy
.lower().startswith('socks') and self
._proxy
_ssl
_context
:
181 extra_kwargs
['proxy_ssl_context'] = self
._proxy
_ssl
_context
182 return super().proxy_manager_for(proxy
, **proxy_kwargs
, **self
._pm
_args
, **extra_kwargs
)
184 # Skip `requests` internal verification; we use our own SSLContext
186 def cert_verify(*args
, **kwargs
):
189 # requests 2.31.0-2.32.1
190 def _get_connection(self
, request
, *_
, proxies
=None, **__
):
191 return self
.get_connection(request
.url
, proxies
)
193 # requests 2.32.2+: Reimplementation without `_urllib3_request_context`
194 def get_connection_with_tls_context(self
, request
, verify
, proxies
=None, cert
=None):
195 url
= urllib3
.util
.parse_url(request
.url
).url
197 manager
= self
.poolmanager
198 if proxy
:= select_proxy(url
, proxies
):
199 manager
= self
.proxy_manager_for(proxy
)
201 return manager
.connection_from_url(url
)
204 class RequestsSession(requests
.sessions
.Session
):
206 Ensure unified redirect method handling with our urllib redirect handler.
209 def rebuild_method(self
, prepared_request
, response
):
210 new_method
= get_redirect_method(prepared_request
.method
, response
.status_code
)
212 # HACK: requests removes headers/body on redirect unless code was a 307/308.
213 if new_method
== prepared_request
.method
:
214 response
._real
_status
_code
= response
.status_code
215 response
.status_code
= 308
217 prepared_request
.method
= new_method
219 # Requests fails to resolve dot segments on absolute redirect locations
220 # See: https://github.com/yt-dlp/yt-dlp/issues/9020
221 prepared_request
.url
= normalize_url(prepared_request
.url
)
223 def rebuild_auth(self
, prepared_request
, response
):
224 # HACK: undo status code change from rebuild_method, if applicable.
225 # rebuild_auth runs after requests would remove headers/body based on status code
226 if hasattr(response
, '_real_status_code'):
227 response
.status_code
= response
._real
_status
_code
228 del response
._real
_status
_code
229 return super().rebuild_auth(prepared_request
, response
)
232 class Urllib3LoggingFilter(logging
.Filter
):
234 def filter(self
, record
):
235 # Ignore HTTP request messages since HTTPConnection prints those
236 if record
.msg
== '%s://%s:%s "%s %s %s" %s %s':
241 class Urllib3LoggingHandler(logging
.Handler
):
242 """Redirect urllib3 logs to our logger"""
244 def __init__(self
, logger
, *args
, **kwargs
):
245 super().__init
__(*args
, **kwargs
)
246 self
._logger
= logger
248 def emit(self
, record
):
250 msg
= self
.format(record
)
251 if record
.levelno
>= logging
.ERROR
:
252 self
._logger
.error(msg
)
254 self
._logger
.stdout(msg
)
257 self
.handleError(record
)
261 class RequestsRH(RequestHandler
, InstanceStoreMixin
):
263 """Requests RequestHandler
264 https://github.com/psf/requests
266 _SUPPORTED_URL_SCHEMES
= ('http', 'https')
267 _SUPPORTED_ENCODINGS
= tuple(SUPPORTED_ENCODINGS
)
268 _SUPPORTED_PROXY_SCHEMES
= ('http', 'https', 'socks4', 'socks4a', 'socks5', 'socks5h')
269 _SUPPORTED_FEATURES
= (Features
.NO_PROXY
, Features
.ALL_PROXY
)
272 def __init__(self
, *args
, **kwargs
):
273 super().__init
__(*args
, **kwargs
)
275 # Forward urllib3 debug messages to our logger
276 logger
= logging
.getLogger('urllib3')
277 self
.__logging
_handler
= Urllib3LoggingHandler(logger
=self
._logger
)
278 self
.__logging
_handler
.setFormatter(logging
.Formatter('requests: %(message)s'))
279 self
.__logging
_handler
.addFilter(Urllib3LoggingFilter())
280 logger
.addHandler(self
.__logging
_handler
)
281 # TODO: Use a logger filter to suppress pool reuse warning instead
282 logger
.setLevel(logging
.ERROR
)
285 # Setting this globally is not ideal, but is easier than hacking with urllib3.
286 # It could technically be problematic for scripts embedding yt-dlp.
287 # However, it is unlikely debug traffic is used in that context in a way this will cause problems.
288 urllib3
.connection
.HTTPConnection
.debuglevel
= 1
289 logger
.setLevel(logging
.DEBUG
)
290 # this is expected if we are using --no-check-certificate
291 urllib3
.disable_warnings(urllib3
.exceptions
.InsecureRequestWarning
)
294 self
._clear
_instances
()
295 # Remove the logging handler that contains a reference to our logger
296 # See: https://github.com/yt-dlp/yt-dlp/issues/8922
297 logging
.getLogger('urllib3').removeHandler(self
.__logging
_handler
)
299 def _check_extensions(self
, extensions
):
300 super()._check
_extensions
(extensions
)
301 extensions
.pop('cookiejar', None)
302 extensions
.pop('timeout', None)
304 def _create_instance(self
, cookiejar
):
305 session
= RequestsSession()
306 http_adapter
= RequestsHTTPAdapter(
307 ssl_context
=self
._make
_sslcontext
(),
308 source_address
=self
.source_address
,
309 max_retries
=urllib3
.util
.retry
.Retry(False),
311 session
.adapters
.clear()
312 session
.headers
= requests
.models
.CaseInsensitiveDict({'Connection': 'keep-alive'}
)
313 session
.mount('https://', http_adapter
)
314 session
.mount('http://', http_adapter
)
315 session
.cookies
= cookiejar
316 session
.trust_env
= False # no need, we already load proxies from env
319 def _send(self
, request
):
321 headers
= self
._merge
_headers
(request
.headers
)
322 add_accept_encoding_header(headers
, SUPPORTED_ENCODINGS
)
324 max_redirects_exceeded
= False
326 session
= self
._get
_instance
(cookiejar
=self
._get
_cookiejar
(request
))
329 requests_res
= session
.request(
330 method
=request
.method
,
334 timeout
=self
._calculate
_timeout
(request
),
335 proxies
=self
._get
_proxies
(request
),
336 allow_redirects
=True,
340 except requests
.exceptions
.TooManyRedirects
as e
:
341 max_redirects_exceeded
= True
342 requests_res
= e
.response
344 except requests
.exceptions
.SSLError
as e
:
345 if 'CERTIFICATE_VERIFY_FAILED' in str(e
):
346 raise CertificateVerifyError(cause
=e
) from e
347 raise SSLError(cause
=e
) from e
349 except requests
.exceptions
.ProxyError
as e
:
350 raise ProxyError(cause
=e
) from e
352 except (requests
.exceptions
.ConnectionError
, requests
.exceptions
.Timeout
) as e
:
353 raise TransportError(cause
=e
) from e
355 except urllib3
.exceptions
.HTTPError
as e
:
356 # Catch any urllib3 exceptions that may leak through
357 raise TransportError(cause
=e
) from e
359 except requests
.exceptions
.RequestException
as e
:
360 # Miscellaneous Requests exceptions. May not necessary be network related e.g. InvalidURL
361 raise RequestError(cause
=e
) from e
363 res
= RequestsResponseAdapter(requests_res
)
365 if not 200 <= res
.status
< 300:
366 raise HTTPError(res
, redirect_loop
=max_redirects_exceeded
)
371 @register_preference(RequestsRH
)
372 def requests_preference(rh
, request
):
376 # Use our socks proxy implementation with requests to avoid an extra dependency.
377 class SocksHTTPConnection(urllib3
.connection
.HTTPConnection
):
378 def __init__(self
, _socks_options
, *args
, **kwargs
): # must use _socks_options to pass PoolKey checks
379 self
._proxy
_args
= _socks_options
380 super().__init
__(*args
, **kwargs
)
384 return create_connection(
385 address
=(self
._proxy
_args
['addr'], self
._proxy
_args
['port']),
386 timeout
=self
.timeout
,
387 source_address
=self
.source_address
,
388 _create_socket_func
=functools
.partial(
389 create_socks_proxy_socket
, (self
.host
, self
.port
), self
._proxy
_args
))
390 except (socket
.timeout
, TimeoutError
) as e
:
391 raise urllib3
.exceptions
.ConnectTimeoutError(
392 self
, f
'Connection to {self.host} timed out. (connect timeout={self.timeout})') from e
393 except SocksProxyError
as e
:
394 raise urllib3
.exceptions
.ProxyError(str(e
), e
) from e
396 raise urllib3
.exceptions
.NewConnectionError(
397 self
, f
'Failed to establish a new connection: {e}') from e
400 class SocksHTTPSConnection(SocksHTTPConnection
, urllib3
.connection
.HTTPSConnection
):
404 class SocksHTTPConnectionPool(urllib3
.HTTPConnectionPool
):
405 ConnectionCls
= SocksHTTPConnection
408 class SocksHTTPSConnectionPool(urllib3
.HTTPSConnectionPool
):
409 ConnectionCls
= SocksHTTPSConnection
412 class SocksProxyManager(urllib3
.PoolManager
):
414 def __init__(self
, socks_proxy
, username
=None, password
=None, num_pools
=10, headers
=None, **connection_pool_kw
):
415 connection_pool_kw
['_socks_options'] = make_socks_proxy_opts(socks_proxy
)
416 super().__init
__(num_pools
, headers
, **connection_pool_kw
)
417 self
.pool_classes_by_scheme
= {
418 'http': SocksHTTPConnectionPool
,
419 'https': SocksHTTPSConnectionPool
423 requests
.adapters
.SOCKSProxyManager
= SocksProxyManager