]>
jfr.im git - yt-dlp.git/blob - yt_dlp/networking/common.py
1 from __future__
import annotations
11 import urllib
.response
12 from collections
.abc
import Iterable
, Mapping
13 from email
.message
import Message
14 from http
import HTTPStatus
16 from ._helper
import make_ssl_context
, wrap_request_errors
17 from .exceptions
import (
23 from ..compat
.types
import NoneType
24 from ..cookies
import YoutubeDLCookieJar
32 from ..utils
.networking
import HTTPHeaderDict
, normalize_url
37 def register_preference(*handlers
: type[RequestHandler
]):
38 assert all(issubclass(handler
, RequestHandler
) for handler
in handlers
)
40 def outer(preference
: Preference
):
41 @functools.wraps(preference
)
42 def inner(handler
, *args
, **kwargs
):
43 if not handlers
or isinstance(handler
, handlers
):
44 return preference(handler
, *args
, **kwargs
)
46 _RH_PREFERENCES
.add(inner
)
51 class RequestDirector
:
52 """RequestDirector class
54 Helper class that, when given a request, forward it to a RequestHandler that supports it.
56 Preference functions in the form of func(handler, request) -> int
57 can be registered into the `preferences` set. These are used to sort handlers
58 in order of preference.
60 @param logger: Logger instance.
61 @param verbose: Print debug request information to stdout.
64 def __init__(self
, logger
, verbose
=False):
65 self
.handlers
: dict[str, RequestHandler
] = {}
66 self
.preferences
: set[Preference
] = set()
67 self
.logger
= logger
# TODO(Grub4k): default logger
68 self
.verbose
= verbose
71 for handler
in self
.handlers
.values():
75 def add_handler(self
, handler
: RequestHandler
):
76 """Add a handler. If a handler of the same RH_KEY exists, it will overwrite it"""
77 assert isinstance(handler
, RequestHandler
), 'handler must be a RequestHandler'
78 self
.handlers
[handler
.RH_KEY
] = handler
80 def _get_handlers(self
, request
: Request
) -> list[RequestHandler
]:
81 """Sorts handlers by preference, given a request"""
83 rh
: sum(pref(rh
, request
) for pref
in self
.preferences
)
84 for rh
in self
.handlers
.values()
86 self
._print
_verbose
('Handler preferences for this request: %s' % ', '.join(
87 f
'{rh.RH_NAME}={pref}' for rh
, pref
in preferences
.items()))
88 return sorted(self
.handlers
.values(), key
=preferences
.get
, reverse
=True)
90 def _print_verbose(self
, msg
):
92 self
.logger
.stdout(f
'director: {msg}')
94 def send(self
, request
: Request
) -> Response
:
96 Passes a request onto a suitable RequestHandler
99 raise RequestError('No request handlers configured')
101 assert isinstance(request
, Request
)
103 unexpected_errors
= []
104 unsupported_errors
= []
105 for handler
in self
._get
_handlers
(request
):
106 self
._print
_verbose
(f
'Checking if "{handler.RH_NAME}" supports this request.')
108 handler
.validate(request
)
109 except UnsupportedRequest
as e
:
111 f
'"{handler.RH_NAME}" cannot handle this request (reason: {error_to_str(e)})')
112 unsupported_errors
.append(e
)
115 self
._print
_verbose
(f
'Sending request via "{handler.RH_NAME}"')
117 response
= handler
.send(request
)
120 except Exception as e
:
122 f
'[{handler.RH_NAME}] Unexpected error: {error_to_str(e)}{bug_reports_message()}',
124 unexpected_errors
.append(e
)
127 assert isinstance(response
, Response
)
130 raise NoSupportingHandlers(unsupported_errors
, unexpected_errors
)
133 _REQUEST_HANDLERS
= {}
136 def register_rh(handler
):
137 """Register a RequestHandler class"""
138 assert issubclass(handler
, RequestHandler
), f
'{handler} must be a subclass of RequestHandler'
139 assert handler
.RH_KEY
not in _REQUEST_HANDLERS
, f
'RequestHandler {handler.RH_KEY} already registered'
140 _REQUEST_HANDLERS
[handler
.RH_KEY
] = handler
144 class Features(enum
.Enum
):
145 ALL_PROXY
= enum
.auto()
146 NO_PROXY
= enum
.auto()
149 class RequestHandler(abc
.ABC
):
151 """Request Handler class
153 Request handlers are class that, given a Request,
154 process the request from start to finish and return a Response.
156 Concrete subclasses need to redefine the _send(request) method,
157 which handles the underlying request logic and returns a Response.
159 RH_NAME class variable may contain a display name for the RequestHandler.
160 By default, this is generated from the class name.
162 The concrete request handler MUST have "RH" as the suffix in the class name.
164 All exceptions raised by a RequestHandler should be an instance of RequestError.
165 Any other exception raised will be treated as a handler issue.
167 If a Request is not supported by the handler, an UnsupportedRequest
168 should be raised with a reason.
170 By default, some checks are done on the request in _validate() based on the following class variables:
171 - `_SUPPORTED_URL_SCHEMES`: a tuple of supported url schemes.
172 Any Request with an url scheme not in this list will raise an UnsupportedRequest.
174 - `_SUPPORTED_PROXY_SCHEMES`: a tuple of support proxy url schemes. Any Request that contains
175 a proxy url with an url scheme not in this list will raise an UnsupportedRequest.
177 - `_SUPPORTED_FEATURES`: a tuple of supported features, as defined in Features enum.
179 The above may be set to None to disable the checks.
182 @param logger: logger instance
183 @param headers: HTTP Headers to include when sending requests.
184 @param cookiejar: Cookiejar to use for requests.
185 @param timeout: Socket timeout to use when sending requests.
186 @param proxies: Proxies to use for sending requests.
187 @param source_address: Client-side IP address to bind to for requests.
188 @param verbose: Print debug request and traffic information to stdout.
189 @param prefer_system_certs: Whether to prefer system certificates over other means (e.g. certifi).
190 @param client_cert: SSL client certificate configuration.
191 dict with {client_certificate, client_certificate_key, client_certificate_password}
192 @param verify: Verify SSL certificates
193 @param legacy_ssl_support: Enable legacy SSL options such as legacy server connect and older cipher support.
195 Some configuration options may be available for individual Requests too. In this case,
196 either the Request configuration option takes precedence or they are merged.
198 Requests may have additional optional parameters defined as extensions.
199 RequestHandler subclasses may choose to support custom extensions.
201 If an extension is supported, subclasses should extend _check_extensions(extensions)
202 to pop and validate the extension.
203 - Extensions left in `extensions` are treated as unsupported and UnsupportedRequest will be raised.
205 The following extensions are defined for RequestHandler:
206 - `cookiejar`: Cookiejar to use for this request.
207 - `timeout`: socket timeout to use for this request.
208 To enable these, add extensions.pop('<extension>', None) to _check_extensions
210 Apart from the url protocol, proxies dict may contain the following keys:
211 - `all`: proxy to use for all protocols. Used as a fallback if no proxy is set for a specific protocol.
212 - `no`: comma seperated list of hostnames (optionally with port) to not use a proxy for.
213 Note: a RequestHandler may not support these, as defined in `_SUPPORTED_FEATURES`.
217 _SUPPORTED_URL_SCHEMES
= ()
218 _SUPPORTED_PROXY_SCHEMES
= ()
219 _SUPPORTED_FEATURES
= ()
223 logger
, # TODO(Grub4k): default logger
224 headers
: HTTPHeaderDict
= None,
225 cookiejar
: YoutubeDLCookieJar
= None,
226 timeout
: float |
int |
None = None,
227 proxies
: dict = None,
228 source_address
: str = None,
229 verbose
: bool = False,
230 prefer_system_certs
: bool = False,
231 client_cert
: dict[str, str |
None] = None,
233 legacy_ssl_support
: bool = False,
237 self
._logger
= logger
238 self
.headers
= headers
or {}
239 self
.cookiejar
= cookiejar
if cookiejar
is not None else YoutubeDLCookieJar()
240 self
.timeout
= float(timeout
or DEFAULT_TIMEOUT
)
241 self
.proxies
= proxies
or {}
242 self
.source_address
= source_address
243 self
.verbose
= verbose
244 self
.prefer_system_certs
= prefer_system_certs
245 self
._client
_cert
= client_cert
or {}
247 self
.legacy_ssl_support
= legacy_ssl_support
250 def _make_sslcontext(self
):
251 return make_ssl_context(
253 legacy_support
=self
.legacy_ssl_support
,
254 use_certifi
=not self
.prefer_system_certs
,
258 def _merge_headers(self
, request_headers
):
259 return HTTPHeaderDict(self
.headers
, request_headers
)
261 def _calculate_timeout(self
, request
):
262 return float(request
.extensions
.get('timeout') or self
.timeout
)
264 def _get_cookiejar(self
, request
):
265 return request
.extensions
.get('cookiejar') or self
.cookiejar
267 def _get_proxies(self
, request
):
268 return (request
.proxies
or self
.proxies
).copy()
270 def _check_url_scheme(self
, request
: Request
):
271 scheme
= urllib
.parse
.urlparse(request
.url
).scheme
.lower()
272 if self
._SUPPORTED
_URL
_SCHEMES
is not None and scheme
not in self
._SUPPORTED
_URL
_SCHEMES
:
273 raise UnsupportedRequest(f
'Unsupported url scheme: "{scheme}"')
274 return scheme
# for further processing
276 def _check_proxies(self
, proxies
):
277 for proxy_key
, proxy_url
in proxies
.items():
278 if proxy_url
is None:
280 if proxy_key
== 'no':
281 if self
._SUPPORTED
_FEATURES
is not None and Features
.NO_PROXY
not in self
._SUPPORTED
_FEATURES
:
282 raise UnsupportedRequest('"no" proxy is not supported')
286 and self
._SUPPORTED
_FEATURES
is not None
287 and Features
.ALL_PROXY
not in self
._SUPPORTED
_FEATURES
289 raise UnsupportedRequest('"all" proxy is not supported')
291 # Unlikely this handler will use this proxy, so ignore.
292 # This is to allow a case where a proxy may be set for a protocol
293 # for one handler in which such protocol (and proxy) is not supported by another handler.
294 if self
._SUPPORTED
_URL
_SCHEMES
is not None and proxy_key
not in (*self
._SUPPORTED
_URL
_SCHEMES
, 'all'):
297 if self
._SUPPORTED
_PROXY
_SCHEMES
is None:
298 # Skip proxy scheme checks
302 if urllib
.request
._parse
_proxy
(proxy_url
)[0] is None:
303 # Scheme-less proxies are not supported
304 raise UnsupportedRequest(f
'Proxy "{proxy_url}" missing scheme')
305 except ValueError as e
:
306 # parse_proxy may raise on some invalid proxy urls such as "/a/b/c"
307 raise UnsupportedRequest(f
'Invalid proxy url "{proxy_url}": {e}')
309 scheme
= urllib
.parse
.urlparse(proxy_url
).scheme
.lower()
310 if scheme
not in self
._SUPPORTED
_PROXY
_SCHEMES
:
311 raise UnsupportedRequest(f
'Unsupported proxy type: "{scheme}"')
313 def _check_extensions(self
, extensions
):
314 """Check extensions for unsupported extensions. Subclasses should extend this."""
315 assert isinstance(extensions
.get('cookiejar'), (YoutubeDLCookieJar
, NoneType
))
316 assert isinstance(extensions
.get('timeout'), (float, int, NoneType
))
318 def _validate(self
, request
):
319 self
._check
_url
_scheme
(request
)
320 self
._check
_proxies
(request
.proxies
or self
.proxies
)
321 extensions
= request
.extensions
.copy()
322 self
._check
_extensions
(extensions
)
324 # TODO: add support for optional extensions
325 raise UnsupportedRequest(f
'Unsupported extensions: {", ".join(extensions.keys())}')
328 def validate(self
, request
: Request
):
329 if not isinstance(request
, Request
):
330 raise TypeError('Expected an instance of Request')
331 self
._validate
(request
)
334 def send(self
, request
: Request
) -> Response
:
335 if not isinstance(request
, Request
):
336 raise TypeError('Expected an instance of Request')
337 return self
._send
(request
)
340 def _send(self
, request
: Request
):
341 """Handle a request from start to finish. Redefine in subclasses."""
349 return cls
.__name
__[:-2]
353 assert cls
.__name
__.endswith('RH'), 'RequestHandler class names must end with "RH"'
354 return cls
.__name
__[:-2]
359 def __exit__(self
, *args
):
365 Represents a request to be made.
366 Partially backwards-compatible with urllib.request.Request.
368 @param url: url to send. Will be sanitized.
369 @param data: payload data to send. Must be bytes, iterable of bytes, a file-like object or None
370 @param headers: headers to send.
371 @param proxies: proxy dict mapping of proto:proxy to use for the request and any redirects.
372 @param query: URL query parameters to update the url with.
373 @param method: HTTP method to use. If no method specified, will use POST if payload data is present else GET
374 @param extensions: Dictionary of Request extensions to add, as supported by handlers.
380 data
: RequestData
= None,
381 headers
: typing
.Mapping
= None,
382 proxies
: dict = None,
385 extensions
: dict = None
388 self
._headers
= HTTPHeaderDict()
392 url
= update_url_query(url
, query
)
397 self
.headers
= headers
398 self
.data
= data
# note: must be done after setting headers
399 self
.proxies
= proxies
or {}
400 self
.extensions
= extensions
or {}
408 if not isinstance(url
, str):
409 raise TypeError('url must be a string')
410 elif url
.startswith('//'):
412 self
._url
= normalize_url(url
)
416 return self
._method
or ('POST' if self
.data
is not None else 'GET')
419 def method(self
, method
):
422 elif isinstance(method
, str):
423 self
._method
= method
.upper()
425 raise TypeError('method must be a string')
432 def data(self
, data
: RequestData
):
433 # Try catch some common mistakes
434 if data
is not None and (
435 not isinstance(data
, (bytes, io
.IOBase
, Iterable
)) or isinstance(data
, (str, Mapping
))
437 raise TypeError('data must be bytes, iterable of bytes, or a file-like object')
439 if data
== self
._data
and self
._data
is None:
440 self
.headers
.pop('Content-Length', None)
442 # https://docs.python.org/3/library/urllib.request.html#urllib.request.Request.data
443 if data
!= self
._data
:
444 if self
._data
is not None:
445 self
.headers
.pop('Content-Length', None)
448 if self
._data
is None:
449 self
.headers
.pop('Content-Type', None)
451 if 'Content-Type' not in self
.headers
and self
._data
is not None:
452 self
.headers
['Content-Type'] = 'application/x-www-form-urlencoded'
455 def headers(self
) -> HTTPHeaderDict
:
459 def headers(self
, new_headers
: Mapping
):
460 """Replaces headers of the request. If not a HTTPHeaderDict, it will be converted to one."""
461 if isinstance(new_headers
, HTTPHeaderDict
):
462 self
._headers
= new_headers
463 elif isinstance(new_headers
, Mapping
):
464 self
._headers
= HTTPHeaderDict(new_headers
)
466 raise TypeError('headers must be a mapping')
468 def update(self
, url
=None, data
=None, headers
=None, query
=None, extensions
=None):
469 self
.data
= data
if data
is not None else self
.data
470 self
.headers
.update(headers
or {})
471 self
.extensions
.update(extensions
or {})
472 self
.url
= update_url_query(url
or self
.url
, query
or {})
475 return self
.__class
__(
477 headers
=copy
.deepcopy(self
.headers
),
478 proxies
=copy
.deepcopy(self
.proxies
),
480 extensions
=copy
.copy(self
.extensions
),
485 HEADRequest
= functools
.partial(Request
, method
='HEAD')
486 PUTRequest
= functools
.partial(Request
, method
='PUT')
489 class Response(io
.IOBase
):
491 Base class for HTTP response adapters.
493 By default, it provides a basic wrapper for a file-like response object.
495 Interface partially backwards-compatible with addinfourl and http.client.HTTPResponse.
497 @param fp: Original, file-like, response.
498 @param url: URL that this is a response of.
499 @param headers: response headers.
500 @param status: Response HTTP status code. Default is 200 OK.
501 @param reason: HTTP status reason. Will use built-in reasons based on status code if not provided.
502 @param extensions: Dictionary of handler-specific response extensions.
509 headers
: Mapping
[str, str],
512 extensions
: dict = None
516 self
.headers
= Message()
517 for name
, value
in headers
.items():
518 self
.headers
.add_header(name
, value
)
522 self
.reason
= reason
or HTTPStatus(status
).phrase
525 self
.extensions
= extensions
or {}
528 return self
.fp
.readable()
530 def read(self
, amt
: int = None) -> bytes:
531 # Expected errors raised here should be of type RequestError or subclasses.
532 # Subclasses should redefine this method with more precise error handling.
534 return self
.fp
.read(amt
)
535 except Exception as e
:
536 raise TransportError(cause
=e
) from e
540 return super().close()
542 def get_header(self
, name
, default
=None):
543 """Get header for name.
544 If there are multiple matching headers, return all seperated by comma."""
545 headers
= self
.headers
.get_all(name
)
548 if name
.title() == 'Set-Cookie':
549 # Special case, only get the first one
550 # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.3-4.1
552 return ', '.join(headers
)
554 # The following methods are for compatability reasons and are deprecated
557 deprecation_warning('Response.code is deprecated, use Response.status', stacklevel
=2)
561 deprecation_warning('Response.getcode() is deprecated, use Response.status', stacklevel
=2)
565 deprecation_warning('Response.geturl() is deprecated, use Response.url', stacklevel
=2)
569 deprecation_warning('Response.info() is deprecated, use Response.headers', stacklevel
=2)
572 def getheader(self
, name
, default
=None):
573 deprecation_warning('Response.getheader() is deprecated, use Response.get_header', stacklevel
=2)
574 return self
.get_header(name
, default
)
577 if typing
.TYPE_CHECKING
:
578 RequestData
= bytes | Iterable
[bytes] | typing
.IO |
None
579 Preference
= typing
.Callable
[[RequestHandler
, Request
], int]
581 _RH_PREFERENCES
: set[Preference
] = set()