]>
jfr.im git - yt-dlp.git/blob - yt_dlp/networking/common.py
1 from __future__
import annotations
11 import urllib
.response
12 from collections
.abc
import Iterable
, Mapping
13 from email
.message
import Message
14 from http
import HTTPStatus
15 from http
.cookiejar
import CookieJar
17 from ._helper
import make_ssl_context
, wrap_request_errors
18 from .exceptions
import (
32 from ..utils
.networking
import HTTPHeaderDict
34 if typing
.TYPE_CHECKING
:
35 RequestData
= bytes | Iterable
[bytes] | typing
.IO |
None
38 class RequestDirector
:
39 """RequestDirector class
41 Helper class that, when given a request, forward it to a RequestHandler that supports it.
43 @param logger: Logger instance.
44 @param verbose: Print debug request information to stdout.
47 def __init__(self
, logger
, verbose
=False):
48 self
.handlers
: dict[str, RequestHandler
] = {}
49 self
.logger
= logger
# TODO(Grub4k): default logger
50 self
.verbose
= verbose
53 for handler
in self
.handlers
.values():
56 def add_handler(self
, handler
: RequestHandler
):
57 """Add a handler. If a handler of the same RH_KEY exists, it will overwrite it"""
58 assert isinstance(handler
, RequestHandler
), 'handler must be a RequestHandler'
59 self
.handlers
[handler
.RH_KEY
] = handler
61 def _print_verbose(self
, msg
):
63 self
.logger
.stdout(f
'director: {msg}')
65 def send(self
, request
: Request
) -> Response
:
67 Passes a request onto a suitable RequestHandler
70 raise RequestError('No request handlers configured')
72 assert isinstance(request
, Request
)
74 unexpected_errors
= []
75 unsupported_errors
= []
76 # TODO (future): add a per-request preference system
77 for handler
in reversed(list(self
.handlers
.values())):
78 self
._print
_verbose
(f
'Checking if "{handler.RH_NAME}" supports this request.')
80 handler
.validate(request
)
81 except UnsupportedRequest
as e
:
83 f
'"{handler.RH_NAME}" cannot handle this request (reason: {error_to_str(e)})')
84 unsupported_errors
.append(e
)
87 self
._print
_verbose
(f
'Sending request via "{handler.RH_NAME}"')
89 response
= handler
.send(request
)
92 except Exception as e
:
94 f
'[{handler.RH_NAME}] Unexpected error: {error_to_str(e)}{bug_reports_message()}',
96 unexpected_errors
.append(e
)
99 assert isinstance(response
, Response
)
102 raise NoSupportingHandlers(unsupported_errors
, unexpected_errors
)
105 _REQUEST_HANDLERS
= {}
108 def register(handler
):
109 """Register a RequestHandler class"""
110 assert issubclass(handler
, RequestHandler
), f
'{handler} must be a subclass of RequestHandler'
111 assert handler
.RH_KEY
not in _REQUEST_HANDLERS
, f
'RequestHandler {handler.RH_KEY} already registered'
112 _REQUEST_HANDLERS
[handler
.RH_KEY
] = handler
116 class Features(enum
.Enum
):
117 ALL_PROXY
= enum
.auto()
118 NO_PROXY
= enum
.auto()
121 class RequestHandler(abc
.ABC
):
123 """Request Handler class
125 Request handlers are class that, given a Request,
126 process the request from start to finish and return a Response.
128 Concrete subclasses need to redefine the _send(request) method,
129 which handles the underlying request logic and returns a Response.
131 RH_NAME class variable may contain a display name for the RequestHandler.
132 By default, this is generated from the class name.
134 The concrete request handler MUST have "RH" as the suffix in the class name.
136 All exceptions raised by a RequestHandler should be an instance of RequestError.
137 Any other exception raised will be treated as a handler issue.
139 If a Request is not supported by the handler, an UnsupportedRequest
140 should be raised with a reason.
142 By default, some checks are done on the request in _validate() based on the following class variables:
143 - `_SUPPORTED_URL_SCHEMES`: a tuple of supported url schemes.
144 Any Request with an url scheme not in this list will raise an UnsupportedRequest.
146 - `_SUPPORTED_PROXY_SCHEMES`: a tuple of support proxy url schemes. Any Request that contains
147 a proxy url with an url scheme not in this list will raise an UnsupportedRequest.
149 - `_SUPPORTED_FEATURES`: a tuple of supported features, as defined in Features enum.
150 The above may be set to None to disable the checks.
153 @param logger: logger instance
154 @param headers: HTTP Headers to include when sending requests.
155 @param cookiejar: Cookiejar to use for requests.
156 @param timeout: Socket timeout to use when sending requests.
157 @param proxies: Proxies to use for sending requests.
158 @param source_address: Client-side IP address to bind to for requests.
159 @param verbose: Print debug request and traffic information to stdout.
160 @param prefer_system_certs: Whether to prefer system certificates over other means (e.g. certifi).
161 @param client_cert: SSL client certificate configuration.
162 dict with {client_certificate, client_certificate_key, client_certificate_password}
163 @param verify: Verify SSL certificates
164 @param legacy_ssl_support: Enable legacy SSL options such as legacy server connect and older cipher support.
166 Some configuration options may be available for individual Requests too. In this case,
167 either the Request configuration option takes precedence or they are merged.
169 Requests may have additional optional parameters defined as extensions.
170 RequestHandler subclasses may choose to support custom extensions.
172 The following extensions are defined for RequestHandler:
173 - `cookiejar`: Cookiejar to use for this request
174 - `timeout`: socket timeout to use for this request
176 Apart from the url protocol, proxies dict may contain the following keys:
177 - `all`: proxy to use for all protocols. Used as a fallback if no proxy is set for a specific protocol.
178 - `no`: comma seperated list of hostnames (optionally with port) to not use a proxy for.
179 Note: a RequestHandler may not support these, as defined in `_SUPPORTED_FEATURES`.
183 _SUPPORTED_URL_SCHEMES
= ()
184 _SUPPORTED_PROXY_SCHEMES
= ()
185 _SUPPORTED_FEATURES
= ()
189 logger
, # TODO(Grub4k): default logger
190 headers
: HTTPHeaderDict
= None,
191 cookiejar
: CookieJar
= None,
192 timeout
: float |
int |
None = None,
193 proxies
: dict = None,
194 source_address
: str = None,
195 verbose
: bool = False,
196 prefer_system_certs
: bool = False,
197 client_cert
: dict[str, str |
None] = None,
199 legacy_ssl_support
: bool = False,
203 self
._logger
= logger
204 self
.headers
= headers
or {}
205 self
.cookiejar
= cookiejar
if cookiejar
is not None else CookieJar()
206 self
.timeout
= float(timeout
or 20)
207 self
.proxies
= proxies
or {}
208 self
.source_address
= source_address
209 self
.verbose
= verbose
210 self
.prefer_system_certs
= prefer_system_certs
211 self
._client
_cert
= client_cert
or {}
213 self
.legacy_ssl_support
= legacy_ssl_support
216 def _make_sslcontext(self
):
217 return make_ssl_context(
219 legacy_support
=self
.legacy_ssl_support
,
220 use_certifi
=not self
.prefer_system_certs
,
224 def _merge_headers(self
, request_headers
):
225 return HTTPHeaderDict(self
.headers
, request_headers
)
227 def _check_url_scheme(self
, request
: Request
):
228 scheme
= urllib
.parse
.urlparse(request
.url
).scheme
.lower()
229 if self
._SUPPORTED
_URL
_SCHEMES
is not None and scheme
not in self
._SUPPORTED
_URL
_SCHEMES
:
230 raise UnsupportedRequest(f
'Unsupported url scheme: "{scheme}"')
231 return scheme
# for further processing
233 def _check_proxies(self
, proxies
):
234 for proxy_key
, proxy_url
in proxies
.items():
235 if proxy_url
is None:
237 if proxy_key
== 'no':
238 if self
._SUPPORTED
_FEATURES
is not None and Features
.NO_PROXY
not in self
._SUPPORTED
_FEATURES
:
239 raise UnsupportedRequest('"no" proxy is not supported')
243 and self
._SUPPORTED
_FEATURES
is not None
244 and Features
.ALL_PROXY
not in self
._SUPPORTED
_FEATURES
246 raise UnsupportedRequest('"all" proxy is not supported')
248 # Unlikely this handler will use this proxy, so ignore.
249 # This is to allow a case where a proxy may be set for a protocol
250 # for one handler in which such protocol (and proxy) is not supported by another handler.
251 if self
._SUPPORTED
_URL
_SCHEMES
is not None and proxy_key
not in (*self
._SUPPORTED
_URL
_SCHEMES
, 'all'):
254 if self
._SUPPORTED
_PROXY
_SCHEMES
is None:
255 # Skip proxy scheme checks
258 # Scheme-less proxies are not supported
259 if urllib
.request
._parse
_proxy
(proxy_url
)[0] is None:
260 raise UnsupportedRequest(f
'Proxy "{proxy_url}" missing scheme')
262 scheme
= urllib
.parse
.urlparse(proxy_url
).scheme
.lower()
263 if scheme
not in self
._SUPPORTED
_PROXY
_SCHEMES
:
264 raise UnsupportedRequest(f
'Unsupported proxy type: "{scheme}"')
266 def _check_cookiejar_extension(self
, extensions
):
267 if not extensions
.get('cookiejar'):
269 if not isinstance(extensions
['cookiejar'], CookieJar
):
270 raise UnsupportedRequest('cookiejar is not a CookieJar')
272 def _check_timeout_extension(self
, extensions
):
273 if extensions
.get('timeout') is None:
275 if not isinstance(extensions
['timeout'], (float, int)):
276 raise UnsupportedRequest('timeout is not a float or int')
278 def _check_extensions(self
, extensions
):
279 self
._check
_cookiejar
_extension
(extensions
)
280 self
._check
_timeout
_extension
(extensions
)
282 def _validate(self
, request
):
283 self
._check
_url
_scheme
(request
)
284 self
._check
_proxies
(request
.proxies
or self
.proxies
)
285 self
._check
_extensions
(request
.extensions
)
288 def validate(self
, request
: Request
):
289 if not isinstance(request
, Request
):
290 raise TypeError('Expected an instance of Request')
291 self
._validate
(request
)
294 def send(self
, request
: Request
) -> Response
:
295 if not isinstance(request
, Request
):
296 raise TypeError('Expected an instance of Request')
297 return self
._send
(request
)
300 def _send(self
, request
: Request
):
301 """Handle a request from start to finish. Redefine in subclasses."""
308 return cls
.__name
__[:-2]
312 assert cls
.__name
__.endswith('RH'), 'RequestHandler class names must end with "RH"'
313 return cls
.__name
__[:-2]
318 def __exit__(self
, *args
):
324 Represents a request to be made.
325 Partially backwards-compatible with urllib.request.Request.
327 @param url: url to send. Will be sanitized.
328 @param data: payload data to send. Must be bytes, iterable of bytes, a file-like object or None
329 @param headers: headers to send.
330 @param proxies: proxy dict mapping of proto:proxy to use for the request and any redirects.
331 @param query: URL query parameters to update the url with.
332 @param method: HTTP method to use. If no method specified, will use POST if payload data is present else GET
333 @param extensions: Dictionary of Request extensions to add, as supported by handlers.
339 data
: RequestData
= None,
340 headers
: typing
.Mapping
= None,
341 proxies
: dict = None,
344 extensions
: dict = None
347 self
._headers
= HTTPHeaderDict()
351 url
= update_url_query(url
, query
)
356 self
.headers
= headers
357 self
.data
= data
# note: must be done after setting headers
358 self
.proxies
= proxies
or {}
359 self
.extensions
= extensions
or {}
367 if not isinstance(url
, str):
368 raise TypeError('url must be a string')
369 elif url
.startswith('//'):
371 self
._url
= escape_url(url
)
375 return self
._method
or ('POST' if self
.data
is not None else 'GET')
378 def method(self
, method
):
381 elif isinstance(method
, str):
382 self
._method
= method
.upper()
384 raise TypeError('method must be a string')
391 def data(self
, data
: RequestData
):
392 # Try catch some common mistakes
393 if data
is not None and (
394 not isinstance(data
, (bytes, io
.IOBase
, Iterable
)) or isinstance(data
, (str, Mapping
))
396 raise TypeError('data must be bytes, iterable of bytes, or a file-like object')
398 if data
== self
._data
and self
._data
is None:
399 self
.headers
.pop('Content-Length', None)
401 # https://docs.python.org/3/library/urllib.request.html#urllib.request.Request.data
402 if data
!= self
._data
:
403 if self
._data
is not None:
404 self
.headers
.pop('Content-Length', None)
407 if self
._data
is None:
408 self
.headers
.pop('Content-Type', None)
410 if 'Content-Type' not in self
.headers
and self
._data
is not None:
411 self
.headers
['Content-Type'] = 'application/x-www-form-urlencoded'
414 def headers(self
) -> HTTPHeaderDict
:
418 def headers(self
, new_headers
: Mapping
):
419 """Replaces headers of the request. If not a CaseInsensitiveDict, it will be converted to one."""
420 if isinstance(new_headers
, HTTPHeaderDict
):
421 self
._headers
= new_headers
422 elif isinstance(new_headers
, Mapping
):
423 self
._headers
= HTTPHeaderDict(new_headers
)
425 raise TypeError('headers must be a mapping')
427 def update(self
, url
=None, data
=None, headers
=None, query
=None):
428 self
.data
= data
if data
is not None else self
.data
429 self
.headers
.update(headers
or {})
430 self
.url
= update_url_query(url
or self
.url
, query
or {})
433 return self
.__class
__(
435 headers
=copy
.deepcopy(self
.headers
),
436 proxies
=copy
.deepcopy(self
.proxies
),
438 extensions
=copy
.copy(self
.extensions
),
443 HEADRequest
= functools
.partial(Request
, method
='HEAD')
444 PUTRequest
= functools
.partial(Request
, method
='PUT')
447 class Response(io
.IOBase
):
449 Base class for HTTP response adapters.
451 By default, it provides a basic wrapper for a file-like response object.
453 Interface partially backwards-compatible with addinfourl and http.client.HTTPResponse.
455 @param fp: Original, file-like, response.
456 @param url: URL that this is a response of.
457 @param headers: response headers.
458 @param status: Response HTTP status code. Default is 200 OK.
459 @param reason: HTTP status reason. Will use built-in reasons based on status code if not provided.
466 headers
: Mapping
[str, str],
471 self
.headers
= Message()
472 for name
, value
in headers
.items():
473 self
.headers
.add_header(name
, value
)
477 self
.reason
= reason
or HTTPStatus(status
).phrase
482 return self
.fp
.readable()
484 def read(self
, amt
: int = None) -> bytes:
485 # Expected errors raised here should be of type RequestError or subclasses.
486 # Subclasses should redefine this method with more precise error handling.
488 return self
.fp
.read(amt
)
489 except Exception as e
:
490 raise TransportError(cause
=e
) from e
494 return super().close()
496 def get_header(self
, name
, default
=None):
497 """Get header for name.
498 If there are multiple matching headers, return all seperated by comma."""
499 headers
= self
.headers
.get_all(name
)
502 if name
.title() == 'Set-Cookie':
503 # Special case, only get the first one
504 # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.3-4.1
506 return ', '.join(headers
)
508 # The following methods are for compatability reasons and are deprecated
511 deprecation_warning('Response.code is deprecated, use Response.status', stacklevel
=2)
515 deprecation_warning('Response.getcode() is deprecated, use Response.status', stacklevel
=2)
519 deprecation_warning('Response.geturl() is deprecated, use Response.url', stacklevel
=2)
523 deprecation_warning('Response.info() is deprecated, use Response.headers', stacklevel
=2)
526 def getheader(self
, name
, default
=None):
527 deprecation_warning('Response.getheader() is deprecated, use Response.get_header', stacklevel
=2)
528 return self
.get_header(name
, default
)