]>
jfr.im git - yt-dlp.git/blob - yt_dlp/networking/common.py
1 from __future__
import annotations
11 import urllib
.response
12 from collections
.abc
import Iterable
, Mapping
13 from email
.message
import Message
14 from http
import HTTPStatus
16 from ._helper
import make_ssl_context
, wrap_request_errors
17 from .exceptions
import (
23 from ..compat
.types
import NoneType
24 from ..cookies
import YoutubeDLCookieJar
32 from ..utils
.networking
import HTTPHeaderDict
, normalize_url
35 def register_preference(*handlers
: type[RequestHandler
]):
36 assert all(issubclass(handler
, RequestHandler
) for handler
in handlers
)
38 def outer(preference
: Preference
):
39 @functools.wraps(preference
)
40 def inner(handler
, *args
, **kwargs
):
41 if not handlers
or isinstance(handler
, handlers
):
42 return preference(handler
, *args
, **kwargs
)
44 _RH_PREFERENCES
.add(inner
)
49 class RequestDirector
:
50 """RequestDirector class
52 Helper class that, when given a request, forward it to a RequestHandler that supports it.
54 Preference functions in the form of func(handler, request) -> int
55 can be registered into the `preferences` set. These are used to sort handlers
56 in order of preference.
58 @param logger: Logger instance.
59 @param verbose: Print debug request information to stdout.
62 def __init__(self
, logger
, verbose
=False):
63 self
.handlers
: dict[str, RequestHandler
] = {}
64 self
.preferences
: set[Preference
] = set()
65 self
.logger
= logger
# TODO(Grub4k): default logger
66 self
.verbose
= verbose
69 for handler
in self
.handlers
.values():
73 def add_handler(self
, handler
: RequestHandler
):
74 """Add a handler. If a handler of the same RH_KEY exists, it will overwrite it"""
75 assert isinstance(handler
, RequestHandler
), 'handler must be a RequestHandler'
76 self
.handlers
[handler
.RH_KEY
] = handler
78 def _get_handlers(self
, request
: Request
) -> list[RequestHandler
]:
79 """Sorts handlers by preference, given a request"""
81 rh
: sum(pref(rh
, request
) for pref
in self
.preferences
)
82 for rh
in self
.handlers
.values()
84 self
._print
_verbose
('Handler preferences for this request: %s' % ', '.join(
85 f
'{rh.RH_NAME}={pref}' for rh
, pref
in preferences
.items()))
86 return sorted(self
.handlers
.values(), key
=preferences
.get
, reverse
=True)
88 def _print_verbose(self
, msg
):
90 self
.logger
.stdout(f
'director: {msg}')
92 def send(self
, request
: Request
) -> Response
:
94 Passes a request onto a suitable RequestHandler
97 raise RequestError('No request handlers configured')
99 assert isinstance(request
, Request
)
101 unexpected_errors
= []
102 unsupported_errors
= []
103 for handler
in self
._get
_handlers
(request
):
104 self
._print
_verbose
(f
'Checking if "{handler.RH_NAME}" supports this request.')
106 handler
.validate(request
)
107 except UnsupportedRequest
as e
:
109 f
'"{handler.RH_NAME}" cannot handle this request (reason: {error_to_str(e)})')
110 unsupported_errors
.append(e
)
113 self
._print
_verbose
(f
'Sending request via "{handler.RH_NAME}"')
115 response
= handler
.send(request
)
118 except Exception as e
:
120 f
'[{handler.RH_NAME}] Unexpected error: {error_to_str(e)}{bug_reports_message()}',
122 unexpected_errors
.append(e
)
125 assert isinstance(response
, Response
)
128 raise NoSupportingHandlers(unsupported_errors
, unexpected_errors
)
131 _REQUEST_HANDLERS
= {}
134 def register_rh(handler
):
135 """Register a RequestHandler class"""
136 assert issubclass(handler
, RequestHandler
), f
'{handler} must be a subclass of RequestHandler'
137 assert handler
.RH_KEY
not in _REQUEST_HANDLERS
, f
'RequestHandler {handler.RH_KEY} already registered'
138 _REQUEST_HANDLERS
[handler
.RH_KEY
] = handler
142 class Features(enum
.Enum
):
143 ALL_PROXY
= enum
.auto()
144 NO_PROXY
= enum
.auto()
147 class RequestHandler(abc
.ABC
):
149 """Request Handler class
151 Request handlers are class that, given a Request,
152 process the request from start to finish and return a Response.
154 Concrete subclasses need to redefine the _send(request) method,
155 which handles the underlying request logic and returns a Response.
157 RH_NAME class variable may contain a display name for the RequestHandler.
158 By default, this is generated from the class name.
160 The concrete request handler MUST have "RH" as the suffix in the class name.
162 All exceptions raised by a RequestHandler should be an instance of RequestError.
163 Any other exception raised will be treated as a handler issue.
165 If a Request is not supported by the handler, an UnsupportedRequest
166 should be raised with a reason.
168 By default, some checks are done on the request in _validate() based on the following class variables:
169 - `_SUPPORTED_URL_SCHEMES`: a tuple of supported url schemes.
170 Any Request with an url scheme not in this list will raise an UnsupportedRequest.
172 - `_SUPPORTED_PROXY_SCHEMES`: a tuple of support proxy url schemes. Any Request that contains
173 a proxy url with an url scheme not in this list will raise an UnsupportedRequest.
175 - `_SUPPORTED_FEATURES`: a tuple of supported features, as defined in Features enum.
177 The above may be set to None to disable the checks.
180 @param logger: logger instance
181 @param headers: HTTP Headers to include when sending requests.
182 @param cookiejar: Cookiejar to use for requests.
183 @param timeout: Socket timeout to use when sending requests.
184 @param proxies: Proxies to use for sending requests.
185 @param source_address: Client-side IP address to bind to for requests.
186 @param verbose: Print debug request and traffic information to stdout.
187 @param prefer_system_certs: Whether to prefer system certificates over other means (e.g. certifi).
188 @param client_cert: SSL client certificate configuration.
189 dict with {client_certificate, client_certificate_key, client_certificate_password}
190 @param verify: Verify SSL certificates
191 @param legacy_ssl_support: Enable legacy SSL options such as legacy server connect and older cipher support.
193 Some configuration options may be available for individual Requests too. In this case,
194 either the Request configuration option takes precedence or they are merged.
196 Requests may have additional optional parameters defined as extensions.
197 RequestHandler subclasses may choose to support custom extensions.
199 If an extension is supported, subclasses should extend _check_extensions(extensions)
200 to pop and validate the extension.
201 - Extensions left in `extensions` are treated as unsupported and UnsupportedRequest will be raised.
203 The following extensions are defined for RequestHandler:
204 - `cookiejar`: Cookiejar to use for this request.
205 - `timeout`: socket timeout to use for this request.
206 To enable these, add extensions.pop('<extension>', None) to _check_extensions
208 Apart from the url protocol, proxies dict may contain the following keys:
209 - `all`: proxy to use for all protocols. Used as a fallback if no proxy is set for a specific protocol.
210 - `no`: comma seperated list of hostnames (optionally with port) to not use a proxy for.
211 Note: a RequestHandler may not support these, as defined in `_SUPPORTED_FEATURES`.
215 _SUPPORTED_URL_SCHEMES
= ()
216 _SUPPORTED_PROXY_SCHEMES
= ()
217 _SUPPORTED_FEATURES
= ()
221 logger
, # TODO(Grub4k): default logger
222 headers
: HTTPHeaderDict
= None,
223 cookiejar
: YoutubeDLCookieJar
= None,
224 timeout
: float |
int |
None = None,
225 proxies
: dict = None,
226 source_address
: str = None,
227 verbose
: bool = False,
228 prefer_system_certs
: bool = False,
229 client_cert
: dict[str, str |
None] = None,
231 legacy_ssl_support
: bool = False,
235 self
._logger
= logger
236 self
.headers
= headers
or {}
237 self
.cookiejar
= cookiejar
if cookiejar
is not None else YoutubeDLCookieJar()
238 self
.timeout
= float(timeout
or 20)
239 self
.proxies
= proxies
or {}
240 self
.source_address
= source_address
241 self
.verbose
= verbose
242 self
.prefer_system_certs
= prefer_system_certs
243 self
._client
_cert
= client_cert
or {}
245 self
.legacy_ssl_support
= legacy_ssl_support
248 def _make_sslcontext(self
):
249 return make_ssl_context(
251 legacy_support
=self
.legacy_ssl_support
,
252 use_certifi
=not self
.prefer_system_certs
,
256 def _merge_headers(self
, request_headers
):
257 return HTTPHeaderDict(self
.headers
, request_headers
)
259 def _calculate_timeout(self
, request
):
260 return float(request
.extensions
.get('timeout') or self
.timeout
)
262 def _get_cookiejar(self
, request
):
263 return request
.extensions
.get('cookiejar') or self
.cookiejar
265 def _get_proxies(self
, request
):
266 return (request
.proxies
or self
.proxies
).copy()
268 def _check_url_scheme(self
, request
: Request
):
269 scheme
= urllib
.parse
.urlparse(request
.url
).scheme
.lower()
270 if self
._SUPPORTED
_URL
_SCHEMES
is not None and scheme
not in self
._SUPPORTED
_URL
_SCHEMES
:
271 raise UnsupportedRequest(f
'Unsupported url scheme: "{scheme}"')
272 return scheme
# for further processing
274 def _check_proxies(self
, proxies
):
275 for proxy_key
, proxy_url
in proxies
.items():
276 if proxy_url
is None:
278 if proxy_key
== 'no':
279 if self
._SUPPORTED
_FEATURES
is not None and Features
.NO_PROXY
not in self
._SUPPORTED
_FEATURES
:
280 raise UnsupportedRequest('"no" proxy is not supported')
284 and self
._SUPPORTED
_FEATURES
is not None
285 and Features
.ALL_PROXY
not in self
._SUPPORTED
_FEATURES
287 raise UnsupportedRequest('"all" proxy is not supported')
289 # Unlikely this handler will use this proxy, so ignore.
290 # This is to allow a case where a proxy may be set for a protocol
291 # for one handler in which such protocol (and proxy) is not supported by another handler.
292 if self
._SUPPORTED
_URL
_SCHEMES
is not None and proxy_key
not in (*self
._SUPPORTED
_URL
_SCHEMES
, 'all'):
295 if self
._SUPPORTED
_PROXY
_SCHEMES
is None:
296 # Skip proxy scheme checks
300 if urllib
.request
._parse
_proxy
(proxy_url
)[0] is None:
301 # Scheme-less proxies are not supported
302 raise UnsupportedRequest(f
'Proxy "{proxy_url}" missing scheme')
303 except ValueError as e
:
304 # parse_proxy may raise on some invalid proxy urls such as "/a/b/c"
305 raise UnsupportedRequest(f
'Invalid proxy url "{proxy_url}": {e}')
307 scheme
= urllib
.parse
.urlparse(proxy_url
).scheme
.lower()
308 if scheme
not in self
._SUPPORTED
_PROXY
_SCHEMES
:
309 raise UnsupportedRequest(f
'Unsupported proxy type: "{scheme}"')
311 def _check_extensions(self
, extensions
):
312 """Check extensions for unsupported extensions. Subclasses should extend this."""
313 assert isinstance(extensions
.get('cookiejar'), (YoutubeDLCookieJar
, NoneType
))
314 assert isinstance(extensions
.get('timeout'), (float, int, NoneType
))
316 def _validate(self
, request
):
317 self
._check
_url
_scheme
(request
)
318 self
._check
_proxies
(request
.proxies
or self
.proxies
)
319 extensions
= request
.extensions
.copy()
320 self
._check
_extensions
(extensions
)
322 # TODO: add support for optional extensions
323 raise UnsupportedRequest(f
'Unsupported extensions: {", ".join(extensions.keys())}')
326 def validate(self
, request
: Request
):
327 if not isinstance(request
, Request
):
328 raise TypeError('Expected an instance of Request')
329 self
._validate
(request
)
332 def send(self
, request
: Request
) -> Response
:
333 if not isinstance(request
, Request
):
334 raise TypeError('Expected an instance of Request')
335 return self
._send
(request
)
338 def _send(self
, request
: Request
):
339 """Handle a request from start to finish. Redefine in subclasses."""
347 return cls
.__name
__[:-2]
351 assert cls
.__name
__.endswith('RH'), 'RequestHandler class names must end with "RH"'
352 return cls
.__name
__[:-2]
357 def __exit__(self
, *args
):
363 Represents a request to be made.
364 Partially backwards-compatible with urllib.request.Request.
366 @param url: url to send. Will be sanitized.
367 @param data: payload data to send. Must be bytes, iterable of bytes, a file-like object or None
368 @param headers: headers to send.
369 @param proxies: proxy dict mapping of proto:proxy to use for the request and any redirects.
370 @param query: URL query parameters to update the url with.
371 @param method: HTTP method to use. If no method specified, will use POST if payload data is present else GET
372 @param extensions: Dictionary of Request extensions to add, as supported by handlers.
378 data
: RequestData
= None,
379 headers
: typing
.Mapping
= None,
380 proxies
: dict = None,
383 extensions
: dict = None
386 self
._headers
= HTTPHeaderDict()
390 url
= update_url_query(url
, query
)
395 self
.headers
= headers
396 self
.data
= data
# note: must be done after setting headers
397 self
.proxies
= proxies
or {}
398 self
.extensions
= extensions
or {}
406 if not isinstance(url
, str):
407 raise TypeError('url must be a string')
408 elif url
.startswith('//'):
410 self
._url
= normalize_url(url
)
414 return self
._method
or ('POST' if self
.data
is not None else 'GET')
417 def method(self
, method
):
420 elif isinstance(method
, str):
421 self
._method
= method
.upper()
423 raise TypeError('method must be a string')
430 def data(self
, data
: RequestData
):
431 # Try catch some common mistakes
432 if data
is not None and (
433 not isinstance(data
, (bytes, io
.IOBase
, Iterable
)) or isinstance(data
, (str, Mapping
))
435 raise TypeError('data must be bytes, iterable of bytes, or a file-like object')
437 if data
== self
._data
and self
._data
is None:
438 self
.headers
.pop('Content-Length', None)
440 # https://docs.python.org/3/library/urllib.request.html#urllib.request.Request.data
441 if data
!= self
._data
:
442 if self
._data
is not None:
443 self
.headers
.pop('Content-Length', None)
446 if self
._data
is None:
447 self
.headers
.pop('Content-Type', None)
449 if 'Content-Type' not in self
.headers
and self
._data
is not None:
450 self
.headers
['Content-Type'] = 'application/x-www-form-urlencoded'
453 def headers(self
) -> HTTPHeaderDict
:
457 def headers(self
, new_headers
: Mapping
):
458 """Replaces headers of the request. If not a HTTPHeaderDict, it will be converted to one."""
459 if isinstance(new_headers
, HTTPHeaderDict
):
460 self
._headers
= new_headers
461 elif isinstance(new_headers
, Mapping
):
462 self
._headers
= HTTPHeaderDict(new_headers
)
464 raise TypeError('headers must be a mapping')
466 def update(self
, url
=None, data
=None, headers
=None, query
=None, extensions
=None):
467 self
.data
= data
if data
is not None else self
.data
468 self
.headers
.update(headers
or {})
469 self
.extensions
.update(extensions
or {})
470 self
.url
= update_url_query(url
or self
.url
, query
or {})
473 return self
.__class
__(
475 headers
=copy
.deepcopy(self
.headers
),
476 proxies
=copy
.deepcopy(self
.proxies
),
478 extensions
=copy
.copy(self
.extensions
),
483 HEADRequest
= functools
.partial(Request
, method
='HEAD')
484 PUTRequest
= functools
.partial(Request
, method
='PUT')
487 class Response(io
.IOBase
):
489 Base class for HTTP response adapters.
491 By default, it provides a basic wrapper for a file-like response object.
493 Interface partially backwards-compatible with addinfourl and http.client.HTTPResponse.
495 @param fp: Original, file-like, response.
496 @param url: URL that this is a response of.
497 @param headers: response headers.
498 @param status: Response HTTP status code. Default is 200 OK.
499 @param reason: HTTP status reason. Will use built-in reasons based on status code if not provided.
500 @param extensions: Dictionary of handler-specific response extensions.
507 headers
: Mapping
[str, str],
510 extensions
: dict = None
514 self
.headers
= Message()
515 for name
, value
in headers
.items():
516 self
.headers
.add_header(name
, value
)
520 self
.reason
= reason
or HTTPStatus(status
).phrase
523 self
.extensions
= extensions
or {}
526 return self
.fp
.readable()
528 def read(self
, amt
: int = None) -> bytes:
529 # Expected errors raised here should be of type RequestError or subclasses.
530 # Subclasses should redefine this method with more precise error handling.
532 return self
.fp
.read(amt
)
533 except Exception as e
:
534 raise TransportError(cause
=e
) from e
538 return super().close()
540 def get_header(self
, name
, default
=None):
541 """Get header for name.
542 If there are multiple matching headers, return all seperated by comma."""
543 headers
= self
.headers
.get_all(name
)
546 if name
.title() == 'Set-Cookie':
547 # Special case, only get the first one
548 # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.3-4.1
550 return ', '.join(headers
)
552 # The following methods are for compatability reasons and are deprecated
555 deprecation_warning('Response.code is deprecated, use Response.status', stacklevel
=2)
559 deprecation_warning('Response.getcode() is deprecated, use Response.status', stacklevel
=2)
563 deprecation_warning('Response.geturl() is deprecated, use Response.url', stacklevel
=2)
567 deprecation_warning('Response.info() is deprecated, use Response.headers', stacklevel
=2)
570 def getheader(self
, name
, default
=None):
571 deprecation_warning('Response.getheader() is deprecated, use Response.get_header', stacklevel
=2)
572 return self
.get_header(name
, default
)
575 if typing
.TYPE_CHECKING
:
576 RequestData
= bytes | Iterable
[bytes] | typing
.IO |
None
577 Preference
= typing
.Callable
[[RequestHandler
, Request
], int]
579 _RH_PREFERENCES
: set[Preference
] = set()