]>
jfr.im git - yt-dlp.git/blob - yt_dlp/networking/common.py
1 from __future__
import annotations
11 import urllib
.response
12 from collections
.abc
import Iterable
, Mapping
13 from email
.message
import Message
14 from http
import HTTPStatus
15 from http
.cookiejar
import CookieJar
17 from ._helper
import make_ssl_context
, wrap_request_errors
18 from .exceptions
import (
24 from ..compat
.types
import NoneType
33 from ..utils
.networking
import HTTPHeaderDict
35 if typing
.TYPE_CHECKING
:
36 RequestData
= bytes | Iterable
[bytes] | typing
.IO |
None
39 class RequestDirector
:
40 """RequestDirector class
42 Helper class that, when given a request, forward it to a RequestHandler that supports it.
44 @param logger: Logger instance.
45 @param verbose: Print debug request information to stdout.
48 def __init__(self
, logger
, verbose
=False):
49 self
.handlers
: dict[str, RequestHandler
] = {}
50 self
.logger
= logger
# TODO(Grub4k): default logger
51 self
.verbose
= verbose
54 for handler
in self
.handlers
.values():
57 def add_handler(self
, handler
: RequestHandler
):
58 """Add a handler. If a handler of the same RH_KEY exists, it will overwrite it"""
59 assert isinstance(handler
, RequestHandler
), 'handler must be a RequestHandler'
60 self
.handlers
[handler
.RH_KEY
] = handler
62 def _print_verbose(self
, msg
):
64 self
.logger
.stdout(f
'director: {msg}')
66 def send(self
, request
: Request
) -> Response
:
68 Passes a request onto a suitable RequestHandler
71 raise RequestError('No request handlers configured')
73 assert isinstance(request
, Request
)
75 unexpected_errors
= []
76 unsupported_errors
= []
77 # TODO (future): add a per-request preference system
78 for handler
in reversed(list(self
.handlers
.values())):
79 self
._print
_verbose
(f
'Checking if "{handler.RH_NAME}" supports this request.')
81 handler
.validate(request
)
82 except UnsupportedRequest
as e
:
84 f
'"{handler.RH_NAME}" cannot handle this request (reason: {error_to_str(e)})')
85 unsupported_errors
.append(e
)
88 self
._print
_verbose
(f
'Sending request via "{handler.RH_NAME}"')
90 response
= handler
.send(request
)
93 except Exception as e
:
95 f
'[{handler.RH_NAME}] Unexpected error: {error_to_str(e)}{bug_reports_message()}',
97 unexpected_errors
.append(e
)
100 assert isinstance(response
, Response
)
103 raise NoSupportingHandlers(unsupported_errors
, unexpected_errors
)
106 _REQUEST_HANDLERS
= {}
109 def register_rh(handler
):
110 """Register a RequestHandler class"""
111 assert issubclass(handler
, RequestHandler
), f
'{handler} must be a subclass of RequestHandler'
112 assert handler
.RH_KEY
not in _REQUEST_HANDLERS
, f
'RequestHandler {handler.RH_KEY} already registered'
113 _REQUEST_HANDLERS
[handler
.RH_KEY
] = handler
117 class Features(enum
.Enum
):
118 ALL_PROXY
= enum
.auto()
119 NO_PROXY
= enum
.auto()
122 class RequestHandler(abc
.ABC
):
124 """Request Handler class
126 Request handlers are class that, given a Request,
127 process the request from start to finish and return a Response.
129 Concrete subclasses need to redefine the _send(request) method,
130 which handles the underlying request logic and returns a Response.
132 RH_NAME class variable may contain a display name for the RequestHandler.
133 By default, this is generated from the class name.
135 The concrete request handler MUST have "RH" as the suffix in the class name.
137 All exceptions raised by a RequestHandler should be an instance of RequestError.
138 Any other exception raised will be treated as a handler issue.
140 If a Request is not supported by the handler, an UnsupportedRequest
141 should be raised with a reason.
143 By default, some checks are done on the request in _validate() based on the following class variables:
144 - `_SUPPORTED_URL_SCHEMES`: a tuple of supported url schemes.
145 Any Request with an url scheme not in this list will raise an UnsupportedRequest.
147 - `_SUPPORTED_PROXY_SCHEMES`: a tuple of support proxy url schemes. Any Request that contains
148 a proxy url with an url scheme not in this list will raise an UnsupportedRequest.
150 - `_SUPPORTED_FEATURES`: a tuple of supported features, as defined in Features enum.
152 The above may be set to None to disable the checks.
155 @param logger: logger instance
156 @param headers: HTTP Headers to include when sending requests.
157 @param cookiejar: Cookiejar to use for requests.
158 @param timeout: Socket timeout to use when sending requests.
159 @param proxies: Proxies to use for sending requests.
160 @param source_address: Client-side IP address to bind to for requests.
161 @param verbose: Print debug request and traffic information to stdout.
162 @param prefer_system_certs: Whether to prefer system certificates over other means (e.g. certifi).
163 @param client_cert: SSL client certificate configuration.
164 dict with {client_certificate, client_certificate_key, client_certificate_password}
165 @param verify: Verify SSL certificates
166 @param legacy_ssl_support: Enable legacy SSL options such as legacy server connect and older cipher support.
168 Some configuration options may be available for individual Requests too. In this case,
169 either the Request configuration option takes precedence or they are merged.
171 Requests may have additional optional parameters defined as extensions.
172 RequestHandler subclasses may choose to support custom extensions.
174 If an extension is supported, subclasses should extend _check_extensions(extensions)
175 to pop and validate the extension.
176 - Extensions left in `extensions` are treated as unsupported and UnsupportedRequest will be raised.
178 The following extensions are defined for RequestHandler:
179 - `cookiejar`: Cookiejar to use for this request.
180 - `timeout`: socket timeout to use for this request.
181 To enable these, add extensions.pop('<extension>', None) to _check_extensions
183 Apart from the url protocol, proxies dict may contain the following keys:
184 - `all`: proxy to use for all protocols. Used as a fallback if no proxy is set for a specific protocol.
185 - `no`: comma seperated list of hostnames (optionally with port) to not use a proxy for.
186 Note: a RequestHandler may not support these, as defined in `_SUPPORTED_FEATURES`.
190 _SUPPORTED_URL_SCHEMES
= ()
191 _SUPPORTED_PROXY_SCHEMES
= ()
192 _SUPPORTED_FEATURES
= ()
196 logger
, # TODO(Grub4k): default logger
197 headers
: HTTPHeaderDict
= None,
198 cookiejar
: CookieJar
= None,
199 timeout
: float |
int |
None = None,
200 proxies
: dict = None,
201 source_address
: str = None,
202 verbose
: bool = False,
203 prefer_system_certs
: bool = False,
204 client_cert
: dict[str, str |
None] = None,
206 legacy_ssl_support
: bool = False,
210 self
._logger
= logger
211 self
.headers
= headers
or {}
212 self
.cookiejar
= cookiejar
if cookiejar
is not None else CookieJar()
213 self
.timeout
= float(timeout
or 20)
214 self
.proxies
= proxies
or {}
215 self
.source_address
= source_address
216 self
.verbose
= verbose
217 self
.prefer_system_certs
= prefer_system_certs
218 self
._client
_cert
= client_cert
or {}
220 self
.legacy_ssl_support
= legacy_ssl_support
223 def _make_sslcontext(self
):
224 return make_ssl_context(
226 legacy_support
=self
.legacy_ssl_support
,
227 use_certifi
=not self
.prefer_system_certs
,
231 def _merge_headers(self
, request_headers
):
232 return HTTPHeaderDict(self
.headers
, request_headers
)
234 def _check_url_scheme(self
, request
: Request
):
235 scheme
= urllib
.parse
.urlparse(request
.url
).scheme
.lower()
236 if self
._SUPPORTED
_URL
_SCHEMES
is not None and scheme
not in self
._SUPPORTED
_URL
_SCHEMES
:
237 raise UnsupportedRequest(f
'Unsupported url scheme: "{scheme}"')
238 return scheme
# for further processing
240 def _check_proxies(self
, proxies
):
241 for proxy_key
, proxy_url
in proxies
.items():
242 if proxy_url
is None:
244 if proxy_key
== 'no':
245 if self
._SUPPORTED
_FEATURES
is not None and Features
.NO_PROXY
not in self
._SUPPORTED
_FEATURES
:
246 raise UnsupportedRequest('"no" proxy is not supported')
250 and self
._SUPPORTED
_FEATURES
is not None
251 and Features
.ALL_PROXY
not in self
._SUPPORTED
_FEATURES
253 raise UnsupportedRequest('"all" proxy is not supported')
255 # Unlikely this handler will use this proxy, so ignore.
256 # This is to allow a case where a proxy may be set for a protocol
257 # for one handler in which such protocol (and proxy) is not supported by another handler.
258 if self
._SUPPORTED
_URL
_SCHEMES
is not None and proxy_key
not in (*self
._SUPPORTED
_URL
_SCHEMES
, 'all'):
261 if self
._SUPPORTED
_PROXY
_SCHEMES
is None:
262 # Skip proxy scheme checks
265 # Scheme-less proxies are not supported
266 if urllib
.request
._parse
_proxy
(proxy_url
)[0] is None:
267 raise UnsupportedRequest(f
'Proxy "{proxy_url}" missing scheme')
269 scheme
= urllib
.parse
.urlparse(proxy_url
).scheme
.lower()
270 if scheme
not in self
._SUPPORTED
_PROXY
_SCHEMES
:
271 raise UnsupportedRequest(f
'Unsupported proxy type: "{scheme}"')
273 def _check_extensions(self
, extensions
):
274 """Check extensions for unsupported extensions. Subclasses should extend this."""
275 assert isinstance(extensions
.get('cookiejar'), (CookieJar
, NoneType
))
276 assert isinstance(extensions
.get('timeout'), (float, int, NoneType
))
278 def _validate(self
, request
):
279 self
._check
_url
_scheme
(request
)
280 self
._check
_proxies
(request
.proxies
or self
.proxies
)
281 extensions
= request
.extensions
.copy()
282 self
._check
_extensions
(extensions
)
284 # TODO: add support for optional extensions
285 raise UnsupportedRequest(f
'Unsupported extensions: {", ".join(extensions.keys())}')
288 def validate(self
, request
: Request
):
289 if not isinstance(request
, Request
):
290 raise TypeError('Expected an instance of Request')
291 self
._validate
(request
)
294 def send(self
, request
: Request
) -> Response
:
295 if not isinstance(request
, Request
):
296 raise TypeError('Expected an instance of Request')
297 return self
._send
(request
)
300 def _send(self
, request
: Request
):
301 """Handle a request from start to finish. Redefine in subclasses."""
308 return cls
.__name
__[:-2]
312 assert cls
.__name
__.endswith('RH'), 'RequestHandler class names must end with "RH"'
313 return cls
.__name
__[:-2]
318 def __exit__(self
, *args
):
324 Represents a request to be made.
325 Partially backwards-compatible with urllib.request.Request.
327 @param url: url to send. Will be sanitized.
328 @param data: payload data to send. Must be bytes, iterable of bytes, a file-like object or None
329 @param headers: headers to send.
330 @param proxies: proxy dict mapping of proto:proxy to use for the request and any redirects.
331 @param query: URL query parameters to update the url with.
332 @param method: HTTP method to use. If no method specified, will use POST if payload data is present else GET
333 @param extensions: Dictionary of Request extensions to add, as supported by handlers.
339 data
: RequestData
= None,
340 headers
: typing
.Mapping
= None,
341 proxies
: dict = None,
344 extensions
: dict = None
347 self
._headers
= HTTPHeaderDict()
351 url
= update_url_query(url
, query
)
356 self
.headers
= headers
357 self
.data
= data
# note: must be done after setting headers
358 self
.proxies
= proxies
or {}
359 self
.extensions
= extensions
or {}
367 if not isinstance(url
, str):
368 raise TypeError('url must be a string')
369 elif url
.startswith('//'):
371 self
._url
= escape_url(url
)
375 return self
._method
or ('POST' if self
.data
is not None else 'GET')
378 def method(self
, method
):
381 elif isinstance(method
, str):
382 self
._method
= method
.upper()
384 raise TypeError('method must be a string')
391 def data(self
, data
: RequestData
):
392 # Try catch some common mistakes
393 if data
is not None and (
394 not isinstance(data
, (bytes, io
.IOBase
, Iterable
)) or isinstance(data
, (str, Mapping
))
396 raise TypeError('data must be bytes, iterable of bytes, or a file-like object')
398 if data
== self
._data
and self
._data
is None:
399 self
.headers
.pop('Content-Length', None)
401 # https://docs.python.org/3/library/urllib.request.html#urllib.request.Request.data
402 if data
!= self
._data
:
403 if self
._data
is not None:
404 self
.headers
.pop('Content-Length', None)
407 if self
._data
is None:
408 self
.headers
.pop('Content-Type', None)
410 if 'Content-Type' not in self
.headers
and self
._data
is not None:
411 self
.headers
['Content-Type'] = 'application/x-www-form-urlencoded'
414 def headers(self
) -> HTTPHeaderDict
:
418 def headers(self
, new_headers
: Mapping
):
419 """Replaces headers of the request. If not a CaseInsensitiveDict, it will be converted to one."""
420 if isinstance(new_headers
, HTTPHeaderDict
):
421 self
._headers
= new_headers
422 elif isinstance(new_headers
, Mapping
):
423 self
._headers
= HTTPHeaderDict(new_headers
)
425 raise TypeError('headers must be a mapping')
427 def update(self
, url
=None, data
=None, headers
=None, query
=None):
428 self
.data
= data
if data
is not None else self
.data
429 self
.headers
.update(headers
or {})
430 self
.url
= update_url_query(url
or self
.url
, query
or {})
433 return self
.__class
__(
435 headers
=copy
.deepcopy(self
.headers
),
436 proxies
=copy
.deepcopy(self
.proxies
),
438 extensions
=copy
.copy(self
.extensions
),
443 HEADRequest
= functools
.partial(Request
, method
='HEAD')
444 PUTRequest
= functools
.partial(Request
, method
='PUT')
447 class Response(io
.IOBase
):
449 Base class for HTTP response adapters.
451 By default, it provides a basic wrapper for a file-like response object.
453 Interface partially backwards-compatible with addinfourl and http.client.HTTPResponse.
455 @param fp: Original, file-like, response.
456 @param url: URL that this is a response of.
457 @param headers: response headers.
458 @param status: Response HTTP status code. Default is 200 OK.
459 @param reason: HTTP status reason. Will use built-in reasons based on status code if not provided.
466 headers
: Mapping
[str, str],
471 self
.headers
= Message()
472 for name
, value
in headers
.items():
473 self
.headers
.add_header(name
, value
)
477 self
.reason
= reason
or HTTPStatus(status
).phrase
482 return self
.fp
.readable()
484 def read(self
, amt
: int = None) -> bytes:
485 # Expected errors raised here should be of type RequestError or subclasses.
486 # Subclasses should redefine this method with more precise error handling.
488 return self
.fp
.read(amt
)
489 except Exception as e
:
490 raise TransportError(cause
=e
) from e
494 return super().close()
496 def get_header(self
, name
, default
=None):
497 """Get header for name.
498 If there are multiple matching headers, return all seperated by comma."""
499 headers
= self
.headers
.get_all(name
)
502 if name
.title() == 'Set-Cookie':
503 # Special case, only get the first one
504 # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.3-4.1
506 return ', '.join(headers
)
508 # The following methods are for compatability reasons and are deprecated
511 deprecation_warning('Response.code is deprecated, use Response.status', stacklevel
=2)
515 deprecation_warning('Response.getcode() is deprecated, use Response.status', stacklevel
=2)
519 deprecation_warning('Response.geturl() is deprecated, use Response.url', stacklevel
=2)
523 deprecation_warning('Response.info() is deprecated, use Response.headers', stacklevel
=2)
526 def getheader(self
, name
, default
=None):
527 deprecation_warning('Response.getheader() is deprecated, use Response.get_header', stacklevel
=2)
528 return self
.get_header(name
, default
)