]> jfr.im git - yt-dlp.git/blob - yt_dlp/networking/common.py
[cleanup] Add more ruff rules (#10149)
[yt-dlp.git] / yt_dlp / networking / common.py
1 from __future__ import annotations
2
3 import abc
4 import copy
5 import enum
6 import functools
7 import io
8 import typing
9 import urllib.parse
10 import urllib.request
11 import urllib.response
12 from collections.abc import Iterable, Mapping
13 from email.message import Message
14 from http import HTTPStatus
15
16 from ._helper import make_ssl_context, wrap_request_errors
17 from .exceptions import (
18 NoSupportingHandlers,
19 RequestError,
20 TransportError,
21 UnsupportedRequest,
22 )
23 from ..compat.types import NoneType
24 from ..cookies import YoutubeDLCookieJar
25 from ..utils import (
26 bug_reports_message,
27 classproperty,
28 deprecation_warning,
29 error_to_str,
30 update_url_query,
31 )
32 from ..utils.networking import HTTPHeaderDict, normalize_url
33
34 DEFAULT_TIMEOUT = 20
35
36
37 def register_preference(*handlers: type[RequestHandler]):
38 assert all(issubclass(handler, RequestHandler) for handler in handlers)
39
40 def outer(preference: Preference):
41 @functools.wraps(preference)
42 def inner(handler, *args, **kwargs):
43 if not handlers or isinstance(handler, handlers):
44 return preference(handler, *args, **kwargs)
45 return 0
46 _RH_PREFERENCES.add(inner)
47 return inner
48 return outer
49
50
51 class RequestDirector:
52 """RequestDirector class
53
54 Helper class that, when given a request, forward it to a RequestHandler that supports it.
55
56 Preference functions in the form of func(handler, request) -> int
57 can be registered into the `preferences` set. These are used to sort handlers
58 in order of preference.
59
60 @param logger: Logger instance.
61 @param verbose: Print debug request information to stdout.
62 """
63
64 def __init__(self, logger, verbose=False):
65 self.handlers: dict[str, RequestHandler] = {}
66 self.preferences: set[Preference] = set()
67 self.logger = logger # TODO(Grub4k): default logger
68 self.verbose = verbose
69
70 def close(self):
71 for handler in self.handlers.values():
72 handler.close()
73 self.handlers.clear()
74
75 def add_handler(self, handler: RequestHandler):
76 """Add a handler. If a handler of the same RH_KEY exists, it will overwrite it"""
77 assert isinstance(handler, RequestHandler), 'handler must be a RequestHandler'
78 self.handlers[handler.RH_KEY] = handler
79
80 def _get_handlers(self, request: Request) -> list[RequestHandler]:
81 """Sorts handlers by preference, given a request"""
82 preferences = {
83 rh: sum(pref(rh, request) for pref in self.preferences)
84 for rh in self.handlers.values()
85 }
86 self._print_verbose('Handler preferences for this request: {}'.format(', '.join(
87 f'{rh.RH_NAME}={pref}' for rh, pref in preferences.items())))
88 return sorted(self.handlers.values(), key=preferences.get, reverse=True)
89
90 def _print_verbose(self, msg):
91 if self.verbose:
92 self.logger.stdout(f'director: {msg}')
93
94 def send(self, request: Request) -> Response:
95 """
96 Passes a request onto a suitable RequestHandler
97 """
98 if not self.handlers:
99 raise RequestError('No request handlers configured')
100
101 assert isinstance(request, Request)
102
103 unexpected_errors = []
104 unsupported_errors = []
105 for handler in self._get_handlers(request):
106 self._print_verbose(f'Checking if "{handler.RH_NAME}" supports this request.')
107 try:
108 handler.validate(request)
109 except UnsupportedRequest as e:
110 self._print_verbose(
111 f'"{handler.RH_NAME}" cannot handle this request (reason: {error_to_str(e)})')
112 unsupported_errors.append(e)
113 continue
114
115 self._print_verbose(f'Sending request via "{handler.RH_NAME}"')
116 try:
117 response = handler.send(request)
118 except RequestError:
119 raise
120 except Exception as e:
121 self.logger.error(
122 f'[{handler.RH_NAME}] Unexpected error: {error_to_str(e)}{bug_reports_message()}',
123 is_error=False)
124 unexpected_errors.append(e)
125 continue
126
127 assert isinstance(response, Response)
128 return response
129
130 raise NoSupportingHandlers(unsupported_errors, unexpected_errors)
131
132
133 _REQUEST_HANDLERS = {}
134
135
136 def register_rh(handler):
137 """Register a RequestHandler class"""
138 assert issubclass(handler, RequestHandler), f'{handler} must be a subclass of RequestHandler'
139 assert handler.RH_KEY not in _REQUEST_HANDLERS, f'RequestHandler {handler.RH_KEY} already registered'
140 _REQUEST_HANDLERS[handler.RH_KEY] = handler
141 return handler
142
143
144 class Features(enum.Enum):
145 ALL_PROXY = enum.auto()
146 NO_PROXY = enum.auto()
147
148
149 class RequestHandler(abc.ABC):
150
151 """Request Handler class
152
153 Request handlers are class that, given a Request,
154 process the request from start to finish and return a Response.
155
156 Concrete subclasses need to redefine the _send(request) method,
157 which handles the underlying request logic and returns a Response.
158
159 RH_NAME class variable may contain a display name for the RequestHandler.
160 By default, this is generated from the class name.
161
162 The concrete request handler MUST have "RH" as the suffix in the class name.
163
164 All exceptions raised by a RequestHandler should be an instance of RequestError.
165 Any other exception raised will be treated as a handler issue.
166
167 If a Request is not supported by the handler, an UnsupportedRequest
168 should be raised with a reason.
169
170 By default, some checks are done on the request in _validate() based on the following class variables:
171 - `_SUPPORTED_URL_SCHEMES`: a tuple of supported url schemes.
172 Any Request with an url scheme not in this list will raise an UnsupportedRequest.
173
174 - `_SUPPORTED_PROXY_SCHEMES`: a tuple of support proxy url schemes. Any Request that contains
175 a proxy url with an url scheme not in this list will raise an UnsupportedRequest.
176
177 - `_SUPPORTED_FEATURES`: a tuple of supported features, as defined in Features enum.
178
179 The above may be set to None to disable the checks.
180
181 Parameters:
182 @param logger: logger instance
183 @param headers: HTTP Headers to include when sending requests.
184 @param cookiejar: Cookiejar to use for requests.
185 @param timeout: Socket timeout to use when sending requests.
186 @param proxies: Proxies to use for sending requests.
187 @param source_address: Client-side IP address to bind to for requests.
188 @param verbose: Print debug request and traffic information to stdout.
189 @param prefer_system_certs: Whether to prefer system certificates over other means (e.g. certifi).
190 @param client_cert: SSL client certificate configuration.
191 dict with {client_certificate, client_certificate_key, client_certificate_password}
192 @param verify: Verify SSL certificates
193 @param legacy_ssl_support: Enable legacy SSL options such as legacy server connect and older cipher support.
194
195 Some configuration options may be available for individual Requests too. In this case,
196 either the Request configuration option takes precedence or they are merged.
197
198 Requests may have additional optional parameters defined as extensions.
199 RequestHandler subclasses may choose to support custom extensions.
200
201 If an extension is supported, subclasses should extend _check_extensions(extensions)
202 to pop and validate the extension.
203 - Extensions left in `extensions` are treated as unsupported and UnsupportedRequest will be raised.
204
205 The following extensions are defined for RequestHandler:
206 - `cookiejar`: Cookiejar to use for this request.
207 - `timeout`: socket timeout to use for this request.
208 To enable these, add extensions.pop('<extension>', None) to _check_extensions
209
210 Apart from the url protocol, proxies dict may contain the following keys:
211 - `all`: proxy to use for all protocols. Used as a fallback if no proxy is set for a specific protocol.
212 - `no`: comma seperated list of hostnames (optionally with port) to not use a proxy for.
213 Note: a RequestHandler may not support these, as defined in `_SUPPORTED_FEATURES`.
214
215 """
216
217 _SUPPORTED_URL_SCHEMES = ()
218 _SUPPORTED_PROXY_SCHEMES = ()
219 _SUPPORTED_FEATURES = ()
220
221 def __init__(
222 self, *,
223 logger, # TODO(Grub4k): default logger
224 headers: HTTPHeaderDict = None,
225 cookiejar: YoutubeDLCookieJar = None,
226 timeout: float | int | None = None,
227 proxies: dict | None = None,
228 source_address: str | None = None,
229 verbose: bool = False,
230 prefer_system_certs: bool = False,
231 client_cert: dict[str, str | None] | None = None,
232 verify: bool = True,
233 legacy_ssl_support: bool = False,
234 **_,
235 ):
236
237 self._logger = logger
238 self.headers = headers or {}
239 self.cookiejar = cookiejar if cookiejar is not None else YoutubeDLCookieJar()
240 self.timeout = float(timeout or DEFAULT_TIMEOUT)
241 self.proxies = proxies or {}
242 self.source_address = source_address
243 self.verbose = verbose
244 self.prefer_system_certs = prefer_system_certs
245 self._client_cert = client_cert or {}
246 self.verify = verify
247 self.legacy_ssl_support = legacy_ssl_support
248 super().__init__()
249
250 def _make_sslcontext(self):
251 return make_ssl_context(
252 verify=self.verify,
253 legacy_support=self.legacy_ssl_support,
254 use_certifi=not self.prefer_system_certs,
255 **self._client_cert,
256 )
257
258 def _merge_headers(self, request_headers):
259 return HTTPHeaderDict(self.headers, request_headers)
260
261 def _calculate_timeout(self, request):
262 return float(request.extensions.get('timeout') or self.timeout)
263
264 def _get_cookiejar(self, request):
265 return request.extensions.get('cookiejar') or self.cookiejar
266
267 def _get_proxies(self, request):
268 return (request.proxies or self.proxies).copy()
269
270 def _check_url_scheme(self, request: Request):
271 scheme = urllib.parse.urlparse(request.url).scheme.lower()
272 if self._SUPPORTED_URL_SCHEMES is not None and scheme not in self._SUPPORTED_URL_SCHEMES:
273 raise UnsupportedRequest(f'Unsupported url scheme: "{scheme}"')
274 return scheme # for further processing
275
276 def _check_proxies(self, proxies):
277 for proxy_key, proxy_url in proxies.items():
278 if proxy_url is None:
279 continue
280 if proxy_key == 'no':
281 if self._SUPPORTED_FEATURES is not None and Features.NO_PROXY not in self._SUPPORTED_FEATURES:
282 raise UnsupportedRequest('"no" proxy is not supported')
283 continue
284 if (
285 proxy_key == 'all'
286 and self._SUPPORTED_FEATURES is not None
287 and Features.ALL_PROXY not in self._SUPPORTED_FEATURES
288 ):
289 raise UnsupportedRequest('"all" proxy is not supported')
290
291 # Unlikely this handler will use this proxy, so ignore.
292 # This is to allow a case where a proxy may be set for a protocol
293 # for one handler in which such protocol (and proxy) is not supported by another handler.
294 if self._SUPPORTED_URL_SCHEMES is not None and proxy_key not in (*self._SUPPORTED_URL_SCHEMES, 'all'):
295 continue
296
297 if self._SUPPORTED_PROXY_SCHEMES is None:
298 # Skip proxy scheme checks
299 continue
300
301 try:
302 if urllib.request._parse_proxy(proxy_url)[0] is None:
303 # Scheme-less proxies are not supported
304 raise UnsupportedRequest(f'Proxy "{proxy_url}" missing scheme')
305 except ValueError as e:
306 # parse_proxy may raise on some invalid proxy urls such as "/a/b/c"
307 raise UnsupportedRequest(f'Invalid proxy url "{proxy_url}": {e}')
308
309 scheme = urllib.parse.urlparse(proxy_url).scheme.lower()
310 if scheme not in self._SUPPORTED_PROXY_SCHEMES:
311 raise UnsupportedRequest(f'Unsupported proxy type: "{scheme}"')
312
313 def _check_extensions(self, extensions):
314 """Check extensions for unsupported extensions. Subclasses should extend this."""
315 assert isinstance(extensions.get('cookiejar'), (YoutubeDLCookieJar, NoneType))
316 assert isinstance(extensions.get('timeout'), (float, int, NoneType))
317
318 def _validate(self, request):
319 self._check_url_scheme(request)
320 self._check_proxies(request.proxies or self.proxies)
321 extensions = request.extensions.copy()
322 self._check_extensions(extensions)
323 if extensions:
324 # TODO: add support for optional extensions
325 raise UnsupportedRequest(f'Unsupported extensions: {", ".join(extensions.keys())}')
326
327 @wrap_request_errors
328 def validate(self, request: Request):
329 if not isinstance(request, Request):
330 raise TypeError('Expected an instance of Request')
331 self._validate(request)
332
333 @wrap_request_errors
334 def send(self, request: Request) -> Response:
335 if not isinstance(request, Request):
336 raise TypeError('Expected an instance of Request')
337 return self._send(request)
338
339 @abc.abstractmethod
340 def _send(self, request: Request):
341 """Handle a request from start to finish. Redefine in subclasses."""
342 pass
343
344 def close(self): # noqa: B027
345 pass
346
347 @classproperty
348 def RH_NAME(cls):
349 return cls.__name__[:-2]
350
351 @classproperty
352 def RH_KEY(cls):
353 assert cls.__name__.endswith('RH'), 'RequestHandler class names must end with "RH"'
354 return cls.__name__[:-2]
355
356 def __enter__(self):
357 return self
358
359 def __exit__(self, *args):
360 self.close()
361
362
363 class Request:
364 """
365 Represents a request to be made.
366 Partially backwards-compatible with urllib.request.Request.
367
368 @param url: url to send. Will be sanitized.
369 @param data: payload data to send. Must be bytes, iterable of bytes, a file-like object or None
370 @param headers: headers to send.
371 @param proxies: proxy dict mapping of proto:proxy to use for the request and any redirects.
372 @param query: URL query parameters to update the url with.
373 @param method: HTTP method to use. If no method specified, will use POST if payload data is present else GET
374 @param extensions: Dictionary of Request extensions to add, as supported by handlers.
375 """
376
377 def __init__(
378 self,
379 url: str,
380 data: RequestData = None,
381 headers: typing.Mapping | None = None,
382 proxies: dict | None = None,
383 query: dict | None = None,
384 method: str | None = None,
385 extensions: dict | None = None,
386 ):
387
388 self._headers = HTTPHeaderDict()
389 self._data = None
390
391 if query:
392 url = update_url_query(url, query)
393
394 self.url = url
395 self.method = method
396 if headers:
397 self.headers = headers
398 self.data = data # note: must be done after setting headers
399 self.proxies = proxies or {}
400 self.extensions = extensions or {}
401
402 @property
403 def url(self):
404 return self._url
405
406 @url.setter
407 def url(self, url):
408 if not isinstance(url, str):
409 raise TypeError('url must be a string')
410 elif url.startswith('//'):
411 url = 'http:' + url
412 self._url = normalize_url(url)
413
414 @property
415 def method(self):
416 return self._method or ('POST' if self.data is not None else 'GET')
417
418 @method.setter
419 def method(self, method):
420 if method is None:
421 self._method = None
422 elif isinstance(method, str):
423 self._method = method.upper()
424 else:
425 raise TypeError('method must be a string')
426
427 @property
428 def data(self):
429 return self._data
430
431 @data.setter
432 def data(self, data: RequestData):
433 # Try catch some common mistakes
434 if data is not None and (
435 not isinstance(data, (bytes, io.IOBase, Iterable)) or isinstance(data, (str, Mapping))
436 ):
437 raise TypeError('data must be bytes, iterable of bytes, or a file-like object')
438
439 if data == self._data and self._data is None:
440 self.headers.pop('Content-Length', None)
441
442 # https://docs.python.org/3/library/urllib.request.html#urllib.request.Request.data
443 if data != self._data:
444 if self._data is not None:
445 self.headers.pop('Content-Length', None)
446 self._data = data
447
448 if self._data is None:
449 self.headers.pop('Content-Type', None)
450
451 if 'Content-Type' not in self.headers and self._data is not None:
452 self.headers['Content-Type'] = 'application/x-www-form-urlencoded'
453
454 @property
455 def headers(self) -> HTTPHeaderDict:
456 return self._headers
457
458 @headers.setter
459 def headers(self, new_headers: Mapping):
460 """Replaces headers of the request. If not a HTTPHeaderDict, it will be converted to one."""
461 if isinstance(new_headers, HTTPHeaderDict):
462 self._headers = new_headers
463 elif isinstance(new_headers, Mapping):
464 self._headers = HTTPHeaderDict(new_headers)
465 else:
466 raise TypeError('headers must be a mapping')
467
468 def update(self, url=None, data=None, headers=None, query=None, extensions=None):
469 self.data = data if data is not None else self.data
470 self.headers.update(headers or {})
471 self.extensions.update(extensions or {})
472 self.url = update_url_query(url or self.url, query or {})
473
474 def copy(self):
475 return self.__class__(
476 url=self.url,
477 headers=copy.deepcopy(self.headers),
478 proxies=copy.deepcopy(self.proxies),
479 data=self._data,
480 extensions=copy.copy(self.extensions),
481 method=self._method,
482 )
483
484
485 HEADRequest = functools.partial(Request, method='HEAD')
486 PUTRequest = functools.partial(Request, method='PUT')
487
488
489 class Response(io.IOBase):
490 """
491 Base class for HTTP response adapters.
492
493 By default, it provides a basic wrapper for a file-like response object.
494
495 Interface partially backwards-compatible with addinfourl and http.client.HTTPResponse.
496
497 @param fp: Original, file-like, response.
498 @param url: URL that this is a response of.
499 @param headers: response headers.
500 @param status: Response HTTP status code. Default is 200 OK.
501 @param reason: HTTP status reason. Will use built-in reasons based on status code if not provided.
502 @param extensions: Dictionary of handler-specific response extensions.
503 """
504
505 def __init__(
506 self,
507 fp: io.IOBase,
508 url: str,
509 headers: Mapping[str, str],
510 status: int = 200,
511 reason: str | None = None,
512 extensions: dict | None = None,
513 ):
514
515 self.fp = fp
516 self.headers = Message()
517 for name, value in headers.items():
518 self.headers.add_header(name, value)
519 self.status = status
520 self.url = url
521 try:
522 self.reason = reason or HTTPStatus(status).phrase
523 except ValueError:
524 self.reason = None
525 self.extensions = extensions or {}
526
527 def readable(self):
528 return self.fp.readable()
529
530 def read(self, amt: int | None = None) -> bytes:
531 # Expected errors raised here should be of type RequestError or subclasses.
532 # Subclasses should redefine this method with more precise error handling.
533 try:
534 return self.fp.read(amt)
535 except Exception as e:
536 raise TransportError(cause=e) from e
537
538 def close(self):
539 self.fp.close()
540 return super().close()
541
542 def get_header(self, name, default=None):
543 """Get header for name.
544 If there are multiple matching headers, return all seperated by comma."""
545 headers = self.headers.get_all(name)
546 if not headers:
547 return default
548 if name.title() == 'Set-Cookie':
549 # Special case, only get the first one
550 # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.3-4.1
551 return headers[0]
552 return ', '.join(headers)
553
554 # The following methods are for compatability reasons and are deprecated
555 @property
556 def code(self):
557 deprecation_warning('Response.code is deprecated, use Response.status', stacklevel=2)
558 return self.status
559
560 def getcode(self):
561 deprecation_warning('Response.getcode() is deprecated, use Response.status', stacklevel=2)
562 return self.status
563
564 def geturl(self):
565 deprecation_warning('Response.geturl() is deprecated, use Response.url', stacklevel=2)
566 return self.url
567
568 def info(self):
569 deprecation_warning('Response.info() is deprecated, use Response.headers', stacklevel=2)
570 return self.headers
571
572 def getheader(self, name, default=None):
573 deprecation_warning('Response.getheader() is deprecated, use Response.get_header', stacklevel=2)
574 return self.get_header(name, default)
575
576
577 if typing.TYPE_CHECKING:
578 RequestData = bytes | Iterable[bytes] | typing.IO | None
579 Preference = typing.Callable[[RequestHandler, Request], int]
580
581 _RH_PREFERENCES: set[Preference] = set()