]>
jfr.im git - yt-dlp.git/blob - yt_dlp/networking/common.py
1 from __future__
import annotations
11 import urllib
.response
12 from collections
.abc
import Iterable
, Mapping
13 from email
.message
import Message
14 from http
import HTTPStatus
15 from http
.cookiejar
import CookieJar
17 from ._helper
import make_ssl_context
, wrap_request_errors
18 from .exceptions
import (
31 from ..utils
.networking
import HTTPHeaderDict
33 if typing
.TYPE_CHECKING
:
34 RequestData
= bytes | Iterable
[bytes] | typing
.IO |
None
37 class RequestDirector
:
38 """RequestDirector class
40 Helper class that, when given a request, forward it to a RequestHandler that supports it.
42 @param logger: Logger instance.
43 @param verbose: Print debug request information to stdout.
46 def __init__(self
, logger
, verbose
=False):
47 self
.handlers
: dict[str, RequestHandler
] = {}
48 self
.logger
= logger
# TODO(Grub4k): default logger
49 self
.verbose
= verbose
52 for handler
in self
.handlers
.values():
55 def add_handler(self
, handler
: RequestHandler
):
56 """Add a handler. If a handler of the same RH_KEY exists, it will overwrite it"""
57 assert isinstance(handler
, RequestHandler
), 'handler must be a RequestHandler'
58 self
.handlers
[handler
.RH_KEY
] = handler
60 def _print_verbose(self
, msg
):
62 self
.logger
.stdout(f
'director: {msg}')
64 def send(self
, request
: Request
) -> Response
:
66 Passes a request onto a suitable RequestHandler
69 raise RequestError('No request handlers configured')
71 assert isinstance(request
, Request
)
73 unexpected_errors
= []
74 unsupported_errors
= []
75 # TODO (future): add a per-request preference system
76 for handler
in reversed(list(self
.handlers
.values())):
77 self
._print
_verbose
(f
'Checking if "{handler.RH_NAME}" supports this request.')
79 handler
.validate(request
)
80 except UnsupportedRequest
as e
:
82 f
'"{handler.RH_NAME}" cannot handle this request (reason: {error_to_str(e)})')
83 unsupported_errors
.append(e
)
86 self
._print
_verbose
(f
'Sending request via "{handler.RH_NAME}"')
88 response
= handler
.send(request
)
91 except Exception as e
:
93 f
'[{handler.RH_NAME}] Unexpected error: {error_to_str(e)}{bug_reports_message()}',
95 unexpected_errors
.append(e
)
98 assert isinstance(response
, Response
)
101 raise NoSupportingHandlers(unsupported_errors
, unexpected_errors
)
104 _REQUEST_HANDLERS
= {}
107 def register(handler
):
108 """Register a RequestHandler class"""
109 assert issubclass(handler
, RequestHandler
), f
'{handler} must be a subclass of RequestHandler'
110 assert handler
.RH_KEY
not in _REQUEST_HANDLERS
, f
'RequestHandler {handler.RH_KEY} already registered'
111 _REQUEST_HANDLERS
[handler
.RH_KEY
] = handler
115 class Features(enum
.Enum
):
116 ALL_PROXY
= enum
.auto()
117 NO_PROXY
= enum
.auto()
120 class RequestHandler(abc
.ABC
):
122 """Request Handler class
124 Request handlers are class that, given a Request,
125 process the request from start to finish and return a Response.
127 Concrete subclasses need to redefine the _send(request) method,
128 which handles the underlying request logic and returns a Response.
130 RH_NAME class variable may contain a display name for the RequestHandler.
131 By default, this is generated from the class name.
133 The concrete request handler MUST have "RH" as the suffix in the class name.
135 All exceptions raised by a RequestHandler should be an instance of RequestError.
136 Any other exception raised will be treated as a handler issue.
138 If a Request is not supported by the handler, an UnsupportedRequest
139 should be raised with a reason.
141 By default, some checks are done on the request in _validate() based on the following class variables:
142 - `_SUPPORTED_URL_SCHEMES`: a tuple of supported url schemes.
143 Any Request with an url scheme not in this list will raise an UnsupportedRequest.
145 - `_SUPPORTED_PROXY_SCHEMES`: a tuple of support proxy url schemes. Any Request that contains
146 a proxy url with an url scheme not in this list will raise an UnsupportedRequest.
148 - `_SUPPORTED_FEATURES`: a tuple of supported features, as defined in Features enum.
149 The above may be set to None to disable the checks.
152 @param logger: logger instance
153 @param headers: HTTP Headers to include when sending requests.
154 @param cookiejar: Cookiejar to use for requests.
155 @param timeout: Socket timeout to use when sending requests.
156 @param proxies: Proxies to use for sending requests.
157 @param source_address: Client-side IP address to bind to for requests.
158 @param verbose: Print debug request and traffic information to stdout.
159 @param prefer_system_certs: Whether to prefer system certificates over other means (e.g. certifi).
160 @param client_cert: SSL client certificate configuration.
161 dict with {client_certificate, client_certificate_key, client_certificate_password}
162 @param verify: Verify SSL certificates
163 @param legacy_ssl_support: Enable legacy SSL options such as legacy server connect and older cipher support.
165 Some configuration options may be available for individual Requests too. In this case,
166 either the Request configuration option takes precedence or they are merged.
168 Requests may have additional optional parameters defined as extensions.
169 RequestHandler subclasses may choose to support custom extensions.
171 The following extensions are defined for RequestHandler:
172 - `cookiejar`: Cookiejar to use for this request
173 - `timeout`: socket timeout to use for this request
175 Apart from the url protocol, proxies dict may contain the following keys:
176 - `all`: proxy to use for all protocols. Used as a fallback if no proxy is set for a specific protocol.
177 - `no`: comma seperated list of hostnames (optionally with port) to not use a proxy for.
178 Note: a RequestHandler may not support these, as defined in `_SUPPORTED_FEATURES`.
182 _SUPPORTED_URL_SCHEMES
= ()
183 _SUPPORTED_PROXY_SCHEMES
= ()
184 _SUPPORTED_FEATURES
= ()
188 logger
, # TODO(Grub4k): default logger
189 headers
: HTTPHeaderDict
= None,
190 cookiejar
: CookieJar
= None,
191 timeout
: float |
int |
None = None,
192 proxies
: dict = None,
193 source_address
: str = None,
194 verbose
: bool = False,
195 prefer_system_certs
: bool = False,
196 client_cert
: dict[str, str |
None] = None,
198 legacy_ssl_support
: bool = False,
202 self
._logger
= logger
203 self
.headers
= headers
or {}
204 self
.cookiejar
= cookiejar
if cookiejar
is not None else CookieJar()
205 self
.timeout
= float(timeout
or 20)
206 self
.proxies
= proxies
or {}
207 self
.source_address
= source_address
208 self
.verbose
= verbose
209 self
.prefer_system_certs
= prefer_system_certs
210 self
._client
_cert
= client_cert
or {}
212 self
.legacy_ssl_support
= legacy_ssl_support
215 def _make_sslcontext(self
):
216 return make_ssl_context(
218 legacy_support
=self
.legacy_ssl_support
,
219 use_certifi
=not self
.prefer_system_certs
,
223 def _merge_headers(self
, request_headers
):
224 return HTTPHeaderDict(self
.headers
, request_headers
)
226 def _check_url_scheme(self
, request
: Request
):
227 scheme
= urllib
.parse
.urlparse(request
.url
).scheme
.lower()
228 if self
._SUPPORTED
_URL
_SCHEMES
is not None and scheme
not in self
._SUPPORTED
_URL
_SCHEMES
:
229 raise UnsupportedRequest(f
'Unsupported url scheme: "{scheme}"')
230 return scheme
# for further processing
232 def _check_proxies(self
, proxies
):
233 for proxy_key
, proxy_url
in proxies
.items():
234 if proxy_url
is None:
236 if proxy_key
== 'no':
237 if self
._SUPPORTED
_FEATURES
is not None and Features
.NO_PROXY
not in self
._SUPPORTED
_FEATURES
:
238 raise UnsupportedRequest('"no" proxy is not supported')
242 and self
._SUPPORTED
_FEATURES
is not None
243 and Features
.ALL_PROXY
not in self
._SUPPORTED
_FEATURES
245 raise UnsupportedRequest('"all" proxy is not supported')
247 # Unlikely this handler will use this proxy, so ignore.
248 # This is to allow a case where a proxy may be set for a protocol
249 # for one handler in which such protocol (and proxy) is not supported by another handler.
250 if self
._SUPPORTED
_URL
_SCHEMES
is not None and proxy_key
not in (*self
._SUPPORTED
_URL
_SCHEMES
, 'all'):
253 if self
._SUPPORTED
_PROXY
_SCHEMES
is None:
254 # Skip proxy scheme checks
257 # Scheme-less proxies are not supported
258 if urllib
.request
._parse
_proxy
(proxy_url
)[0] is None:
259 raise UnsupportedRequest(f
'Proxy "{proxy_url}" missing scheme')
261 scheme
= urllib
.parse
.urlparse(proxy_url
).scheme
.lower()
262 if scheme
not in self
._SUPPORTED
_PROXY
_SCHEMES
:
263 raise UnsupportedRequest(f
'Unsupported proxy type: "{scheme}"')
265 def _check_cookiejar_extension(self
, extensions
):
266 if not extensions
.get('cookiejar'):
268 if not isinstance(extensions
['cookiejar'], CookieJar
):
269 raise UnsupportedRequest('cookiejar is not a CookieJar')
271 def _check_timeout_extension(self
, extensions
):
272 if extensions
.get('timeout') is None:
274 if not isinstance(extensions
['timeout'], (float, int)):
275 raise UnsupportedRequest('timeout is not a float or int')
277 def _check_extensions(self
, extensions
):
278 self
._check
_cookiejar
_extension
(extensions
)
279 self
._check
_timeout
_extension
(extensions
)
281 def _validate(self
, request
):
282 self
._check
_url
_scheme
(request
)
283 self
._check
_proxies
(request
.proxies
or self
.proxies
)
284 self
._check
_extensions
(request
.extensions
)
287 def validate(self
, request
: Request
):
288 if not isinstance(request
, Request
):
289 raise TypeError('Expected an instance of Request')
290 self
._validate
(request
)
293 def send(self
, request
: Request
) -> Response
:
294 if not isinstance(request
, Request
):
295 raise TypeError('Expected an instance of Request')
296 return self
._send
(request
)
299 def _send(self
, request
: Request
):
300 """Handle a request from start to finish. Redefine in subclasses."""
307 return cls
.__name
__[:-2]
311 assert cls
.__name
__.endswith('RH'), 'RequestHandler class names must end with "RH"'
312 return cls
.__name
__[:-2]
317 def __exit__(self
, *args
):
323 Represents a request to be made.
324 Partially backwards-compatible with urllib.request.Request.
326 @param url: url to send. Will be sanitized.
327 @param data: payload data to send. Must be bytes, iterable of bytes, a file-like object or None
328 @param headers: headers to send.
329 @param proxies: proxy dict mapping of proto:proxy to use for the request and any redirects.
330 @param query: URL query parameters to update the url with.
331 @param method: HTTP method to use. If no method specified, will use POST if payload data is present else GET
332 @param extensions: Dictionary of Request extensions to add, as supported by handlers.
338 data
: RequestData
= None,
339 headers
: typing
.Mapping
= None,
340 proxies
: dict = None,
343 extensions
: dict = None
346 self
._headers
= HTTPHeaderDict()
350 url
= update_url_query(url
, query
)
355 self
.headers
= headers
356 self
.data
= data
# note: must be done after setting headers
357 self
.proxies
= proxies
or {}
358 self
.extensions
= extensions
or {}
366 if not isinstance(url
, str):
367 raise TypeError('url must be a string')
368 elif url
.startswith('//'):
370 self
._url
= escape_url(url
)
374 return self
._method
or ('POST' if self
.data
is not None else 'GET')
377 def method(self
, method
):
380 elif isinstance(method
, str):
381 self
._method
= method
.upper()
383 raise TypeError('method must be a string')
390 def data(self
, data
: RequestData
):
391 # Try catch some common mistakes
392 if data
is not None and (
393 not isinstance(data
, (bytes, io
.IOBase
, Iterable
)) or isinstance(data
, (str, Mapping
))
395 raise TypeError('data must be bytes, iterable of bytes, or a file-like object')
397 if data
== self
._data
and self
._data
is None:
398 self
.headers
.pop('Content-Length', None)
400 # https://docs.python.org/3/library/urllib.request.html#urllib.request.Request.data
401 if data
!= self
._data
:
402 if self
._data
is not None:
403 self
.headers
.pop('Content-Length', None)
406 if self
._data
is None:
407 self
.headers
.pop('Content-Type', None)
409 if 'Content-Type' not in self
.headers
and self
._data
is not None:
410 self
.headers
['Content-Type'] = 'application/x-www-form-urlencoded'
413 def headers(self
) -> HTTPHeaderDict
:
417 def headers(self
, new_headers
: Mapping
):
418 """Replaces headers of the request. If not a CaseInsensitiveDict, it will be converted to one."""
419 if isinstance(new_headers
, HTTPHeaderDict
):
420 self
._headers
= new_headers
421 elif isinstance(new_headers
, Mapping
):
422 self
._headers
= HTTPHeaderDict(new_headers
)
424 raise TypeError('headers must be a mapping')
426 def update(self
, url
=None, data
=None, headers
=None, query
=None):
427 self
.data
= data
or self
.data
428 self
.headers
.update(headers
or {})
429 self
.url
= update_url_query(url
or self
.url
, query
or {})
432 return self
.__class
__(
434 headers
=copy
.deepcopy(self
.headers
),
435 proxies
=copy
.deepcopy(self
.proxies
),
437 extensions
=copy
.copy(self
.extensions
),
442 HEADRequest
= functools
.partial(Request
, method
='HEAD')
443 PUTRequest
= functools
.partial(Request
, method
='PUT')
446 class Response(io
.IOBase
):
448 Base class for HTTP response adapters.
450 By default, it provides a basic wrapper for a file-like response object.
452 Interface partially backwards-compatible with addinfourl and http.client.HTTPResponse.
454 @param fp: Original, file-like, response.
455 @param url: URL that this is a response of.
456 @param headers: response headers.
457 @param status: Response HTTP status code. Default is 200 OK.
458 @param reason: HTTP status reason. Will use built-in reasons based on status code if not provided.
465 headers
: Mapping
[str, str],
470 self
.headers
= Message()
471 for name
, value
in headers
.items():
472 self
.headers
.add_header(name
, value
)
476 self
.reason
= reason
or HTTPStatus(status
).phrase
481 return self
.fp
.readable()
483 def read(self
, amt
: int = None) -> bytes:
484 # Expected errors raised here should be of type RequestError or subclasses.
485 # Subclasses should redefine this method with more precise error handling.
487 return self
.fp
.read(amt
)
488 except Exception as e
:
489 raise TransportError(cause
=e
) from e
493 return super().close()
495 def get_header(self
, name
, default
=None):
496 """Get header for name.
497 If there are multiple matching headers, return all seperated by comma."""
498 headers
= self
.headers
.get_all(name
)
501 if name
.title() == 'Set-Cookie':
502 # Special case, only get the first one
503 # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.3-4.1
505 return ', '.join(headers
)
507 # The following methods are for compatability reasons and are deprecated
521 def getheader(self
, name
, default
=None):
522 return self
.get_header(name
, default
)