9 from ..dependencies
import brotli
, requests
, urllib3
10 from ..utils
import bug_reports_message
, int_or_none
, variadic
11 from ..utils
.networking
import normalize_url
14 raise ImportError('requests module is not installed')
17 raise ImportError('urllib3 module is not installed')
19 urllib3_version
= tuple(int_or_none(x
, default
=0) for x
in urllib3
.__version__
.split('.'))
21 if urllib3_version
< (1, 26, 17):
22 raise ImportError('Only urllib3 >= 1.26.17 is supported')
24 if requests
.__build
__ < 0x023202:
25 raise ImportError('Only requests >= 2.32.2 is supported')
27 import requests
.adapters
29 import urllib3
.connection
30 import urllib3
.exceptions
33 from ._helper
import (
35 add_accept_encoding_header
,
37 create_socks_proxy_socket
,
39 make_socks_proxy_opts
,
49 from .exceptions
import (
50 CertificateVerifyError
,
58 from ..socks
import ProxyError
as SocksProxyError
60 SUPPORTED_ENCODINGS
= [
64 if brotli
is not None:
65 SUPPORTED_ENCODINGS
.append('br')
68 Override urllib3's behavior to not convert lower-case percent-encoded characters
69 to upper-case during url normalization process.
71 RFC3986 defines that the lower or upper case percent-encoded hexidecimal characters are equivalent
72 and normalizers should convert them to uppercase for consistency [1].
74 However, some sites may have an incorrect implementation where they provide
75 a percent-encoded url that is then compared case-sensitively.[2]
77 While this is a very rare case, since urllib does not do this normalization step, it
78 is best to avoid it in requests too for compatability reasons.
80 1: https://tools.ietf.org/html/rfc3986#section-2.1
81 2: https://github.com/streamlink/streamlink/pull/4003
85 class Urllib3PercentREOverride
:
86 def __init__(self
, r
: re
.Pattern
):
89 # pass through all other attribute calls to the original re
90 def __getattr__(self
, item
):
91 return self
.re
.__getattribute
__(item
)
93 def subn(self
, repl
, string
, *args
, **kwargs
):
94 return string
, self
.re
.subn(repl
, string
, *args
, **kwargs
)[1]
97 # urllib3 >= 1.25.8 uses subn:
98 # https://github.com/urllib3/urllib3/commit/a2697e7c6b275f05879b60f593c5854a816489f0
99 import urllib3
.util
.url
# noqa: E305
101 if hasattr(urllib3
.util
.url
, 'PERCENT_RE'):
102 urllib3
.util
.url
.PERCENT_RE
= Urllib3PercentREOverride(urllib3
.util
.url
.PERCENT_RE
)
103 elif hasattr(urllib3
.util
.url
, '_PERCENT_RE'): # urllib3 >= 2.0.0
104 urllib3
.util
.url
._PERCENT
_RE
= Urllib3PercentREOverride(urllib3
.util
.url
._PERCENT
_RE
)
106 warnings
.warn('Failed to patch PERCENT_RE in urllib3 (does the attribute exist?)' + bug_reports_message())
109 Workaround for issue in urllib.util.ssl_.py: ssl_wrap_context does not pass
110 server_hostname to SSLContext.wrap_socket if server_hostname is an IP,
111 however this is an issue because we set check_hostname to True in our SSLContext.
113 Monkey-patching IS_SECURETRANSPORT forces ssl_wrap_context to pass server_hostname regardless.
115 This has been fixed in urllib3 2.0+.
116 See: https://github.com/urllib3/urllib3/issues/517
119 if urllib3_version
< (2, 0, 0):
120 with contextlib
.suppress(Exception):
121 urllib3
.util
.IS_SECURETRANSPORT
= urllib3
.util
.ssl_
.IS_SECURETRANSPORT
= True
124 # Requests will not automatically handle no_proxy by default
125 # due to buggy no_proxy handling with proxy dict [1].
126 # 1. https://github.com/psf/requests/issues/5000
127 requests
.adapters
.select_proxy
= select_proxy
130 class RequestsResponseAdapter(Response
):
131 def __init__(self
, res
: requests
.models
.Response
):
133 fp
=res
.raw
, headers
=res
.headers
, url
=res
.url
,
134 status
=res
.status_code
, reason
=res
.reason
)
136 self
._requests
_response
= res
138 def read(self
, amt
: int = None):
140 # Interact with urllib3 response directly.
141 return self
.fp
.read(amt
, decode_content
=True)
143 # See urllib3.response.HTTPResponse.read() for exceptions raised on read
144 except urllib3
.exceptions
.SSLError
as e
:
145 raise SSLError(cause
=e
) from e
147 except urllib3
.exceptions
.ProtocolError
as e
:
148 # IncompleteRead is always contained within ProtocolError
149 # See urllib3.response.HTTPResponse._error_catcher()
151 (err
for err
in (e
.__context
__, e
.__cause
__, *variadic(e
.args
))
152 if isinstance(err
, http
.client
.IncompleteRead
)), None)
153 if ir_err
is not None:
154 # `urllib3.exceptions.IncompleteRead` is subclass of `http.client.IncompleteRead`
155 # but uses an `int` for its `partial` property.
156 partial
= ir_err
.partial
if isinstance(ir_err
.partial
, int) else len(ir_err
.partial
)
157 raise IncompleteRead(partial
=partial
, expected
=ir_err
.expected
) from e
158 raise TransportError(cause
=e
) from e
160 except urllib3
.exceptions
.HTTPError
as e
:
161 # catch-all for any other urllib3 response exceptions
162 raise TransportError(cause
=e
) from e
165 class RequestsHTTPAdapter(requests
.adapters
.HTTPAdapter
):
166 def __init__(self
, ssl_context
=None, proxy_ssl_context
=None, source_address
=None, **kwargs
):
169 self
._pm
_args
['ssl_context'] = ssl_context
171 self
._pm
_args
['source_address'] = (source_address
, 0)
172 self
._proxy
_ssl
_context
= proxy_ssl_context
or ssl_context
173 super().__init
__(**kwargs
)
175 def init_poolmanager(self
, *args
, **kwargs
):
176 return super().init_poolmanager(*args
, **kwargs
, **self
._pm
_args
)
178 def proxy_manager_for(self
, proxy
, **proxy_kwargs
):
180 if not proxy
.lower().startswith('socks') and self
._proxy
_ssl
_context
:
181 extra_kwargs
['proxy_ssl_context'] = self
._proxy
_ssl
_context
182 return super().proxy_manager_for(proxy
, **proxy_kwargs
, **self
._pm
_args
, **extra_kwargs
)
184 # Skip `requests` internal verification; we use our own SSLContext
185 def cert_verify(*args
, **kwargs
):
188 # requests 2.32.2+: Reimplementation without `_urllib3_request_context`
189 def get_connection_with_tls_context(self
, request
, verify
, proxies
=None, cert
=None):
190 url
= urllib3
.util
.parse_url(request
.url
).url
192 manager
= self
.poolmanager
193 if proxy
:= select_proxy(url
, proxies
):
194 manager
= self
.proxy_manager_for(proxy
)
196 return manager
.connection_from_url(url
)
199 class RequestsSession(requests
.sessions
.Session
):
201 Ensure unified redirect method handling with our urllib redirect handler.
204 def rebuild_method(self
, prepared_request
, response
):
205 new_method
= get_redirect_method(prepared_request
.method
, response
.status_code
)
207 # HACK: requests removes headers/body on redirect unless code was a 307/308.
208 if new_method
== prepared_request
.method
:
209 response
._real
_status
_code
= response
.status_code
210 response
.status_code
= 308
212 prepared_request
.method
= new_method
214 # Requests fails to resolve dot segments on absolute redirect locations
215 # See: https://github.com/yt-dlp/yt-dlp/issues/9020
216 prepared_request
.url
= normalize_url(prepared_request
.url
)
218 def rebuild_auth(self
, prepared_request
, response
):
219 # HACK: undo status code change from rebuild_method, if applicable.
220 # rebuild_auth runs after requests would remove headers/body based on status code
221 if hasattr(response
, '_real_status_code'):
222 response
.status_code
= response
._real
_status
_code
223 del response
._real
_status
_code
224 return super().rebuild_auth(prepared_request
, response
)
227 class Urllib3LoggingFilter(logging
.Filter
):
229 def filter(self
, record
):
230 # Ignore HTTP request messages since HTTPConnection prints those
231 if record
.msg
== '%s://%s:%s "%s %s %s" %s %s':
236 class Urllib3LoggingHandler(logging
.Handler
):
237 """Redirect urllib3 logs to our logger"""
239 def __init__(self
, logger
, *args
, **kwargs
):
240 super().__init
__(*args
, **kwargs
)
241 self
._logger
= logger
243 def emit(self
, record
):
245 msg
= self
.format(record
)
246 if record
.levelno
>= logging
.ERROR
:
247 self
._logger
.error(msg
)
249 self
._logger
.stdout(msg
)
252 self
.handleError(record
)
256 class RequestsRH(RequestHandler
, InstanceStoreMixin
):
258 """Requests RequestHandler
259 https://github.com/psf/requests
261 _SUPPORTED_URL_SCHEMES
= ('http', 'https')
262 _SUPPORTED_ENCODINGS
= tuple(SUPPORTED_ENCODINGS
)
263 _SUPPORTED_PROXY_SCHEMES
= ('http', 'https', 'socks4', 'socks4a', 'socks5', 'socks5h')
264 _SUPPORTED_FEATURES
= (Features
.NO_PROXY
, Features
.ALL_PROXY
)
267 def __init__(self
, *args
, **kwargs
):
268 super().__init
__(*args
, **kwargs
)
270 # Forward urllib3 debug messages to our logger
271 logger
= logging
.getLogger('urllib3')
272 self
.__logging
_handler
= Urllib3LoggingHandler(logger
=self
._logger
)
273 self
.__logging
_handler
.setFormatter(logging
.Formatter('requests: %(message)s'))
274 self
.__logging
_handler
.addFilter(Urllib3LoggingFilter())
275 logger
.addHandler(self
.__logging
_handler
)
276 # TODO: Use a logger filter to suppress pool reuse warning instead
277 logger
.setLevel(logging
.ERROR
)
280 # Setting this globally is not ideal, but is easier than hacking with urllib3.
281 # It could technically be problematic for scripts embedding yt-dlp.
282 # However, it is unlikely debug traffic is used in that context in a way this will cause problems.
283 urllib3
.connection
.HTTPConnection
.debuglevel
= 1
284 logger
.setLevel(logging
.DEBUG
)
285 # this is expected if we are using --no-check-certificate
286 urllib3
.disable_warnings(urllib3
.exceptions
.InsecureRequestWarning
)
289 self
._clear
_instances
()
290 # Remove the logging handler that contains a reference to our logger
291 # See: https://github.com/yt-dlp/yt-dlp/issues/8922
292 logging
.getLogger('urllib3').removeHandler(self
.__logging
_handler
)
294 def _check_extensions(self
, extensions
):
295 super()._check
_extensions
(extensions
)
296 extensions
.pop('cookiejar', None)
297 extensions
.pop('timeout', None)
299 def _create_instance(self
, cookiejar
):
300 session
= RequestsSession()
301 http_adapter
= RequestsHTTPAdapter(
302 ssl_context
=self
._make
_sslcontext
(),
303 source_address
=self
.source_address
,
304 max_retries
=urllib3
.util
.retry
.Retry(False),
306 session
.adapters
.clear()
307 session
.headers
= requests
.models
.CaseInsensitiveDict({'Connection': 'keep-alive'}
)
308 session
.mount('https://', http_adapter
)
309 session
.mount('http://', http_adapter
)
310 session
.cookies
= cookiejar
311 session
.trust_env
= False # no need, we already load proxies from env
314 def _send(self
, request
):
316 headers
= self
._merge
_headers
(request
.headers
)
317 add_accept_encoding_header(headers
, SUPPORTED_ENCODINGS
)
319 max_redirects_exceeded
= False
321 session
= self
._get
_instance
(cookiejar
=self
._get
_cookiejar
(request
))
324 requests_res
= session
.request(
325 method
=request
.method
,
329 timeout
=self
._calculate
_timeout
(request
),
330 proxies
=self
._get
_proxies
(request
),
331 allow_redirects
=True,
335 except requests
.exceptions
.TooManyRedirects
as e
:
336 max_redirects_exceeded
= True
337 requests_res
= e
.response
339 except requests
.exceptions
.SSLError
as e
:
340 if 'CERTIFICATE_VERIFY_FAILED' in str(e
):
341 raise CertificateVerifyError(cause
=e
) from e
342 raise SSLError(cause
=e
) from e
344 except requests
.exceptions
.ProxyError
as e
:
345 raise ProxyError(cause
=e
) from e
347 except (requests
.exceptions
.ConnectionError
, requests
.exceptions
.Timeout
) as e
:
348 raise TransportError(cause
=e
) from e
350 except urllib3
.exceptions
.HTTPError
as e
:
351 # Catch any urllib3 exceptions that may leak through
352 raise TransportError(cause
=e
) from e
354 except requests
.exceptions
.RequestException
as e
:
355 # Miscellaneous Requests exceptions. May not necessary be network related e.g. InvalidURL
356 raise RequestError(cause
=e
) from e
358 res
= RequestsResponseAdapter(requests_res
)
360 if not 200 <= res
.status
< 300:
361 raise HTTPError(res
, redirect_loop
=max_redirects_exceeded
)
366 @register_preference(RequestsRH
)
367 def requests_preference(rh
, request
):
371 # Use our socks proxy implementation with requests to avoid an extra dependency.
372 class SocksHTTPConnection(urllib3
.connection
.HTTPConnection
):
373 def __init__(self
, _socks_options
, *args
, **kwargs
): # must use _socks_options to pass PoolKey checks
374 self
._proxy
_args
= _socks_options
375 super().__init
__(*args
, **kwargs
)
379 return create_connection(
380 address
=(self
._proxy
_args
['addr'], self
._proxy
_args
['port']),
381 timeout
=self
.timeout
,
382 source_address
=self
.source_address
,
383 _create_socket_func
=functools
.partial(
384 create_socks_proxy_socket
, (self
.host
, self
.port
), self
._proxy
_args
))
385 except (socket
.timeout
, TimeoutError
) as e
:
386 raise urllib3
.exceptions
.ConnectTimeoutError(
387 self
, f
'Connection to {self.host} timed out. (connect timeout={self.timeout})') from e
388 except SocksProxyError
as e
:
389 raise urllib3
.exceptions
.ProxyError(str(e
), e
) from e
391 raise urllib3
.exceptions
.NewConnectionError(
392 self
, f
'Failed to establish a new connection: {e}') from e
395 class SocksHTTPSConnection(SocksHTTPConnection
, urllib3
.connection
.HTTPSConnection
):
399 class SocksHTTPConnectionPool(urllib3
.HTTPConnectionPool
):
400 ConnectionCls
= SocksHTTPConnection
403 class SocksHTTPSConnectionPool(urllib3
.HTTPSConnectionPool
):
404 ConnectionCls
= SocksHTTPSConnection
407 class SocksProxyManager(urllib3
.PoolManager
):
409 def __init__(self
, socks_proxy
, username
=None, password
=None, num_pools
=10, headers
=None, **connection_pool_kw
):
410 connection_pool_kw
['_socks_options'] = make_socks_proxy_opts(socks_proxy
)
411 super().__init
__(num_pools
, headers
, **connection_pool_kw
)
412 self
.pool_classes_by_scheme
= {
413 'http': SocksHTTPConnectionPool
,
414 'https': SocksHTTPSConnectionPool
418 requests
.adapters
.SOCKSProxyManager
= SocksProxyManager