]> jfr.im git - yt-dlp.git/blame - yt_dlp/networking/common.py
[build] Add transitional `setup.py` and `pyinst.py` (#9296)
[yt-dlp.git] / yt_dlp / networking / common.py
CommitLineData
227bf1a3 1from __future__ import annotations
2
3import abc
4import copy
5import enum
6import functools
7import io
8import typing
9import urllib.parse
10import urllib.request
11import urllib.response
12from collections.abc import Iterable, Mapping
13from email.message import Message
14from http import HTTPStatus
227bf1a3 15
16from ._helper import make_ssl_context, wrap_request_errors
17from .exceptions import (
18 NoSupportingHandlers,
19 RequestError,
20 TransportError,
21 UnsupportedRequest,
22)
86aea0d3 23from ..compat.types import NoneType
6148833f 24from ..cookies import YoutubeDLCookieJar
227bf1a3 25from ..utils import (
26 bug_reports_message,
27 classproperty,
3d2623a8 28 deprecation_warning,
227bf1a3 29 error_to_str,
227bf1a3 30 update_url_query,
31)
4bf91228 32from ..utils.networking import HTTPHeaderDict, normalize_url
227bf1a3 33
db7b054a 34
35def register_preference(*handlers: type[RequestHandler]):
36 assert all(issubclass(handler, RequestHandler) for handler in handlers)
37
38 def outer(preference: Preference):
39 @functools.wraps(preference)
40 def inner(handler, *args, **kwargs):
41 if not handlers or isinstance(handler, handlers):
42 return preference(handler, *args, **kwargs)
43 return 0
44 _RH_PREFERENCES.add(inner)
45 return inner
46 return outer
227bf1a3 47
48
49class RequestDirector:
50 """RequestDirector class
51
52 Helper class that, when given a request, forward it to a RequestHandler that supports it.
53
db7b054a 54 Preference functions in the form of func(handler, request) -> int
55 can be registered into the `preferences` set. These are used to sort handlers
56 in order of preference.
57
227bf1a3 58 @param logger: Logger instance.
59 @param verbose: Print debug request information to stdout.
60 """
61
62 def __init__(self, logger, verbose=False):
63 self.handlers: dict[str, RequestHandler] = {}
db7b054a 64 self.preferences: set[Preference] = set()
227bf1a3 65 self.logger = logger # TODO(Grub4k): default logger
66 self.verbose = verbose
67
68 def close(self):
69 for handler in self.handlers.values():
70 handler.close()
dbd8b1bf 71 self.handlers.clear()
227bf1a3 72
73 def add_handler(self, handler: RequestHandler):
74 """Add a handler. If a handler of the same RH_KEY exists, it will overwrite it"""
75 assert isinstance(handler, RequestHandler), 'handler must be a RequestHandler'
76 self.handlers[handler.RH_KEY] = handler
77
db7b054a 78 def _get_handlers(self, request: Request) -> list[RequestHandler]:
79 """Sorts handlers by preference, given a request"""
80 preferences = {
81 rh: sum(pref(rh, request) for pref in self.preferences)
82 for rh in self.handlers.values()
83 }
84 self._print_verbose('Handler preferences for this request: %s' % ', '.join(
85 f'{rh.RH_NAME}={pref}' for rh, pref in preferences.items()))
86 return sorted(self.handlers.values(), key=preferences.get, reverse=True)
87
227bf1a3 88 def _print_verbose(self, msg):
89 if self.verbose:
90 self.logger.stdout(f'director: {msg}')
91
92 def send(self, request: Request) -> Response:
93 """
94 Passes a request onto a suitable RequestHandler
95 """
96 if not self.handlers:
97 raise RequestError('No request handlers configured')
98
99 assert isinstance(request, Request)
100
101 unexpected_errors = []
102 unsupported_errors = []
db7b054a 103 for handler in self._get_handlers(request):
227bf1a3 104 self._print_verbose(f'Checking if "{handler.RH_NAME}" supports this request.')
105 try:
106 handler.validate(request)
107 except UnsupportedRequest as e:
108 self._print_verbose(
109 f'"{handler.RH_NAME}" cannot handle this request (reason: {error_to_str(e)})')
110 unsupported_errors.append(e)
111 continue
112
113 self._print_verbose(f'Sending request via "{handler.RH_NAME}"')
114 try:
115 response = handler.send(request)
116 except RequestError:
117 raise
118 except Exception as e:
119 self.logger.error(
120 f'[{handler.RH_NAME}] Unexpected error: {error_to_str(e)}{bug_reports_message()}',
121 is_error=False)
122 unexpected_errors.append(e)
123 continue
124
125 assert isinstance(response, Response)
126 return response
127
128 raise NoSupportingHandlers(unsupported_errors, unexpected_errors)
129
130
131_REQUEST_HANDLERS = {}
132
133
62b5c94c 134def register_rh(handler):
227bf1a3 135 """Register a RequestHandler class"""
136 assert issubclass(handler, RequestHandler), f'{handler} must be a subclass of RequestHandler'
137 assert handler.RH_KEY not in _REQUEST_HANDLERS, f'RequestHandler {handler.RH_KEY} already registered'
138 _REQUEST_HANDLERS[handler.RH_KEY] = handler
139 return handler
140
141
142class Features(enum.Enum):
143 ALL_PROXY = enum.auto()
144 NO_PROXY = enum.auto()
145
146
147class RequestHandler(abc.ABC):
148
149 """Request Handler class
150
151 Request handlers are class that, given a Request,
152 process the request from start to finish and return a Response.
153
154 Concrete subclasses need to redefine the _send(request) method,
155 which handles the underlying request logic and returns a Response.
156
157 RH_NAME class variable may contain a display name for the RequestHandler.
158 By default, this is generated from the class name.
159
160 The concrete request handler MUST have "RH" as the suffix in the class name.
161
162 All exceptions raised by a RequestHandler should be an instance of RequestError.
163 Any other exception raised will be treated as a handler issue.
164
165 If a Request is not supported by the handler, an UnsupportedRequest
166 should be raised with a reason.
167
168 By default, some checks are done on the request in _validate() based on the following class variables:
169 - `_SUPPORTED_URL_SCHEMES`: a tuple of supported url schemes.
170 Any Request with an url scheme not in this list will raise an UnsupportedRequest.
171
172 - `_SUPPORTED_PROXY_SCHEMES`: a tuple of support proxy url schemes. Any Request that contains
173 a proxy url with an url scheme not in this list will raise an UnsupportedRequest.
174
175 - `_SUPPORTED_FEATURES`: a tuple of supported features, as defined in Features enum.
86aea0d3 176
227bf1a3 177 The above may be set to None to disable the checks.
178
179 Parameters:
180 @param logger: logger instance
181 @param headers: HTTP Headers to include when sending requests.
182 @param cookiejar: Cookiejar to use for requests.
183 @param timeout: Socket timeout to use when sending requests.
184 @param proxies: Proxies to use for sending requests.
185 @param source_address: Client-side IP address to bind to for requests.
186 @param verbose: Print debug request and traffic information to stdout.
187 @param prefer_system_certs: Whether to prefer system certificates over other means (e.g. certifi).
188 @param client_cert: SSL client certificate configuration.
189 dict with {client_certificate, client_certificate_key, client_certificate_password}
190 @param verify: Verify SSL certificates
191 @param legacy_ssl_support: Enable legacy SSL options such as legacy server connect and older cipher support.
192
193 Some configuration options may be available for individual Requests too. In this case,
194 either the Request configuration option takes precedence or they are merged.
195
196 Requests may have additional optional parameters defined as extensions.
197 RequestHandler subclasses may choose to support custom extensions.
198
86aea0d3 199 If an extension is supported, subclasses should extend _check_extensions(extensions)
200 to pop and validate the extension.
201 - Extensions left in `extensions` are treated as unsupported and UnsupportedRequest will be raised.
202
227bf1a3 203 The following extensions are defined for RequestHandler:
86aea0d3 204 - `cookiejar`: Cookiejar to use for this request.
205 - `timeout`: socket timeout to use for this request.
206 To enable these, add extensions.pop('<extension>', None) to _check_extensions
227bf1a3 207
208 Apart from the url protocol, proxies dict may contain the following keys:
209 - `all`: proxy to use for all protocols. Used as a fallback if no proxy is set for a specific protocol.
210 - `no`: comma seperated list of hostnames (optionally with port) to not use a proxy for.
211 Note: a RequestHandler may not support these, as defined in `_SUPPORTED_FEATURES`.
212
213 """
214
215 _SUPPORTED_URL_SCHEMES = ()
216 _SUPPORTED_PROXY_SCHEMES = ()
217 _SUPPORTED_FEATURES = ()
218
219 def __init__(
220 self, *,
221 logger, # TODO(Grub4k): default logger
222 headers: HTTPHeaderDict = None,
6148833f 223 cookiejar: YoutubeDLCookieJar = None,
227bf1a3 224 timeout: float | int | None = None,
225 proxies: dict = None,
226 source_address: str = None,
227 verbose: bool = False,
228 prefer_system_certs: bool = False,
229 client_cert: dict[str, str | None] = None,
230 verify: bool = True,
231 legacy_ssl_support: bool = False,
232 **_,
233 ):
234
235 self._logger = logger
236 self.headers = headers or {}
6148833f 237 self.cookiejar = cookiejar if cookiejar is not None else YoutubeDLCookieJar()
227bf1a3 238 self.timeout = float(timeout or 20)
239 self.proxies = proxies or {}
240 self.source_address = source_address
241 self.verbose = verbose
242 self.prefer_system_certs = prefer_system_certs
243 self._client_cert = client_cert or {}
244 self.verify = verify
245 self.legacy_ssl_support = legacy_ssl_support
246 super().__init__()
247
248 def _make_sslcontext(self):
249 return make_ssl_context(
250 verify=self.verify,
251 legacy_support=self.legacy_ssl_support,
252 use_certifi=not self.prefer_system_certs,
253 **self._client_cert,
254 )
255
256 def _merge_headers(self, request_headers):
257 return HTTPHeaderDict(self.headers, request_headers)
258
259 def _check_url_scheme(self, request: Request):
260 scheme = urllib.parse.urlparse(request.url).scheme.lower()
261 if self._SUPPORTED_URL_SCHEMES is not None and scheme not in self._SUPPORTED_URL_SCHEMES:
262 raise UnsupportedRequest(f'Unsupported url scheme: "{scheme}"')
263 return scheme # for further processing
264
265 def _check_proxies(self, proxies):
266 for proxy_key, proxy_url in proxies.items():
267 if proxy_url is None:
268 continue
269 if proxy_key == 'no':
270 if self._SUPPORTED_FEATURES is not None and Features.NO_PROXY not in self._SUPPORTED_FEATURES:
271 raise UnsupportedRequest('"no" proxy is not supported')
272 continue
273 if (
274 proxy_key == 'all'
275 and self._SUPPORTED_FEATURES is not None
276 and Features.ALL_PROXY not in self._SUPPORTED_FEATURES
277 ):
278 raise UnsupportedRequest('"all" proxy is not supported')
279
280 # Unlikely this handler will use this proxy, so ignore.
281 # This is to allow a case where a proxy may be set for a protocol
282 # for one handler in which such protocol (and proxy) is not supported by another handler.
283 if self._SUPPORTED_URL_SCHEMES is not None and proxy_key not in (*self._SUPPORTED_URL_SCHEMES, 'all'):
284 continue
285
286 if self._SUPPORTED_PROXY_SCHEMES is None:
287 # Skip proxy scheme checks
288 continue
289
bbeacff7 290 try:
291 if urllib.request._parse_proxy(proxy_url)[0] is None:
292 # Scheme-less proxies are not supported
293 raise UnsupportedRequest(f'Proxy "{proxy_url}" missing scheme')
294 except ValueError as e:
295 # parse_proxy may raise on some invalid proxy urls such as "/a/b/c"
296 raise UnsupportedRequest(f'Invalid proxy url "{proxy_url}": {e}')
227bf1a3 297
298 scheme = urllib.parse.urlparse(proxy_url).scheme.lower()
299 if scheme not in self._SUPPORTED_PROXY_SCHEMES:
300 raise UnsupportedRequest(f'Unsupported proxy type: "{scheme}"')
301
227bf1a3 302 def _check_extensions(self, extensions):
86aea0d3 303 """Check extensions for unsupported extensions. Subclasses should extend this."""
6148833f 304 assert isinstance(extensions.get('cookiejar'), (YoutubeDLCookieJar, NoneType))
86aea0d3 305 assert isinstance(extensions.get('timeout'), (float, int, NoneType))
227bf1a3 306
307 def _validate(self, request):
308 self._check_url_scheme(request)
309 self._check_proxies(request.proxies or self.proxies)
86aea0d3 310 extensions = request.extensions.copy()
311 self._check_extensions(extensions)
312 if extensions:
313 # TODO: add support for optional extensions
314 raise UnsupportedRequest(f'Unsupported extensions: {", ".join(extensions.keys())}')
227bf1a3 315
316 @wrap_request_errors
317 def validate(self, request: Request):
318 if not isinstance(request, Request):
319 raise TypeError('Expected an instance of Request')
320 self._validate(request)
321
322 @wrap_request_errors
323 def send(self, request: Request) -> Response:
324 if not isinstance(request, Request):
325 raise TypeError('Expected an instance of Request')
326 return self._send(request)
327
328 @abc.abstractmethod
329 def _send(self, request: Request):
330 """Handle a request from start to finish. Redefine in subclasses."""
6148833f 331 pass
227bf1a3 332
333 def close(self):
334 pass
335
336 @classproperty
337 def RH_NAME(cls):
338 return cls.__name__[:-2]
339
340 @classproperty
341 def RH_KEY(cls):
342 assert cls.__name__.endswith('RH'), 'RequestHandler class names must end with "RH"'
343 return cls.__name__[:-2]
344
345 def __enter__(self):
346 return self
347
348 def __exit__(self, *args):
349 self.close()
350
351
352class Request:
353 """
354 Represents a request to be made.
355 Partially backwards-compatible with urllib.request.Request.
356
357 @param url: url to send. Will be sanitized.
358 @param data: payload data to send. Must be bytes, iterable of bytes, a file-like object or None
359 @param headers: headers to send.
360 @param proxies: proxy dict mapping of proto:proxy to use for the request and any redirects.
361 @param query: URL query parameters to update the url with.
362 @param method: HTTP method to use. If no method specified, will use POST if payload data is present else GET
363 @param extensions: Dictionary of Request extensions to add, as supported by handlers.
364 """
365
366 def __init__(
367 self,
368 url: str,
369 data: RequestData = None,
370 headers: typing.Mapping = None,
371 proxies: dict = None,
372 query: dict = None,
373 method: str = None,
374 extensions: dict = None
375 ):
376
377 self._headers = HTTPHeaderDict()
378 self._data = None
379
380 if query:
381 url = update_url_query(url, query)
382
383 self.url = url
384 self.method = method
385 if headers:
386 self.headers = headers
387 self.data = data # note: must be done after setting headers
388 self.proxies = proxies or {}
389 self.extensions = extensions or {}
390
391 @property
392 def url(self):
393 return self._url
394
395 @url.setter
396 def url(self, url):
397 if not isinstance(url, str):
398 raise TypeError('url must be a string')
399 elif url.startswith('//'):
400 url = 'http:' + url
4bf91228 401 self._url = normalize_url(url)
227bf1a3 402
403 @property
404 def method(self):
405 return self._method or ('POST' if self.data is not None else 'GET')
406
407 @method.setter
408 def method(self, method):
409 if method is None:
410 self._method = None
411 elif isinstance(method, str):
412 self._method = method.upper()
413 else:
414 raise TypeError('method must be a string')
415
416 @property
417 def data(self):
418 return self._data
419
420 @data.setter
421 def data(self, data: RequestData):
422 # Try catch some common mistakes
423 if data is not None and (
424 not isinstance(data, (bytes, io.IOBase, Iterable)) or isinstance(data, (str, Mapping))
425 ):
426 raise TypeError('data must be bytes, iterable of bytes, or a file-like object')
427
428 if data == self._data and self._data is None:
429 self.headers.pop('Content-Length', None)
430
431 # https://docs.python.org/3/library/urllib.request.html#urllib.request.Request.data
432 if data != self._data:
433 if self._data is not None:
434 self.headers.pop('Content-Length', None)
435 self._data = data
436
437 if self._data is None:
438 self.headers.pop('Content-Type', None)
439
440 if 'Content-Type' not in self.headers and self._data is not None:
441 self.headers['Content-Type'] = 'application/x-www-form-urlencoded'
442
443 @property
444 def headers(self) -> HTTPHeaderDict:
445 return self._headers
446
447 @headers.setter
448 def headers(self, new_headers: Mapping):
449 """Replaces headers of the request. If not a CaseInsensitiveDict, it will be converted to one."""
450 if isinstance(new_headers, HTTPHeaderDict):
451 self._headers = new_headers
452 elif isinstance(new_headers, Mapping):
453 self._headers = HTTPHeaderDict(new_headers)
454 else:
455 raise TypeError('headers must be a mapping')
456
457 def update(self, url=None, data=None, headers=None, query=None):
71baa490 458 self.data = data if data is not None else self.data
227bf1a3 459 self.headers.update(headers or {})
460 self.url = update_url_query(url or self.url, query or {})
461
462 def copy(self):
463 return self.__class__(
464 url=self.url,
465 headers=copy.deepcopy(self.headers),
466 proxies=copy.deepcopy(self.proxies),
467 data=self._data,
468 extensions=copy.copy(self.extensions),
469 method=self._method,
470 )
471
472
473HEADRequest = functools.partial(Request, method='HEAD')
474PUTRequest = functools.partial(Request, method='PUT')
475
476
477class Response(io.IOBase):
478 """
479 Base class for HTTP response adapters.
480
481 By default, it provides a basic wrapper for a file-like response object.
482
483 Interface partially backwards-compatible with addinfourl and http.client.HTTPResponse.
484
485 @param fp: Original, file-like, response.
486 @param url: URL that this is a response of.
487 @param headers: response headers.
488 @param status: Response HTTP status code. Default is 200 OK.
489 @param reason: HTTP status reason. Will use built-in reasons based on status code if not provided.
490 """
491
492 def __init__(
493 self,
494 fp: typing.IO,
495 url: str,
496 headers: Mapping[str, str],
497 status: int = 200,
498 reason: str = None):
499
500 self.fp = fp
501 self.headers = Message()
502 for name, value in headers.items():
503 self.headers.add_header(name, value)
504 self.status = status
505 self.url = url
506 try:
507 self.reason = reason or HTTPStatus(status).phrase
508 except ValueError:
509 self.reason = None
510
511 def readable(self):
512 return self.fp.readable()
513
514 def read(self, amt: int = None) -> bytes:
515 # Expected errors raised here should be of type RequestError or subclasses.
516 # Subclasses should redefine this method with more precise error handling.
517 try:
518 return self.fp.read(amt)
519 except Exception as e:
520 raise TransportError(cause=e) from e
521
522 def close(self):
523 self.fp.close()
524 return super().close()
525
526 def get_header(self, name, default=None):
527 """Get header for name.
528 If there are multiple matching headers, return all seperated by comma."""
529 headers = self.headers.get_all(name)
530 if not headers:
531 return default
532 if name.title() == 'Set-Cookie':
533 # Special case, only get the first one
534 # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.3-4.1
535 return headers[0]
536 return ', '.join(headers)
537
538 # The following methods are for compatability reasons and are deprecated
539 @property
540 def code(self):
3d2623a8 541 deprecation_warning('Response.code is deprecated, use Response.status', stacklevel=2)
227bf1a3 542 return self.status
543
544 def getcode(self):
3d2623a8 545 deprecation_warning('Response.getcode() is deprecated, use Response.status', stacklevel=2)
227bf1a3 546 return self.status
547
548 def geturl(self):
3d2623a8 549 deprecation_warning('Response.geturl() is deprecated, use Response.url', stacklevel=2)
227bf1a3 550 return self.url
551
552 def info(self):
3d2623a8 553 deprecation_warning('Response.info() is deprecated, use Response.headers', stacklevel=2)
227bf1a3 554 return self.headers
555
556 def getheader(self, name, default=None):
3d2623a8 557 deprecation_warning('Response.getheader() is deprecated, use Response.get_header', stacklevel=2)
227bf1a3 558 return self.get_header(name, default)
db7b054a 559
560
561if typing.TYPE_CHECKING:
562 RequestData = bytes | Iterable[bytes] | typing.IO | None
563 Preference = typing.Callable[[RequestHandler, Request], int]
564
565_RH_PREFERENCES: set[Preference] = set()