]> jfr.im git - yt-dlp.git/blame - yt_dlp/networking/common.py
[ie/PicartoVod] Fix extractor (#7727)
[yt-dlp.git] / yt_dlp / networking / common.py
CommitLineData
227bf1a3 1from __future__ import annotations
2
3import abc
4import copy
5import enum
6import functools
7import io
8import typing
9import urllib.parse
10import urllib.request
11import urllib.response
12from collections.abc import Iterable, Mapping
13from email.message import Message
14from http import HTTPStatus
227bf1a3 15
16from ._helper import make_ssl_context, wrap_request_errors
17from .exceptions import (
18 NoSupportingHandlers,
19 RequestError,
20 TransportError,
21 UnsupportedRequest,
22)
86aea0d3 23from ..compat.types import NoneType
6148833f 24from ..cookies import YoutubeDLCookieJar
227bf1a3 25from ..utils import (
26 bug_reports_message,
27 classproperty,
3d2623a8 28 deprecation_warning,
227bf1a3 29 error_to_str,
227bf1a3 30 update_url_query,
31)
4bf91228 32from ..utils.networking import HTTPHeaderDict, normalize_url
227bf1a3 33
34if typing.TYPE_CHECKING:
35 RequestData = bytes | Iterable[bytes] | typing.IO | None
36
37
38class RequestDirector:
39 """RequestDirector class
40
41 Helper class that, when given a request, forward it to a RequestHandler that supports it.
42
43 @param logger: Logger instance.
44 @param verbose: Print debug request information to stdout.
45 """
46
47 def __init__(self, logger, verbose=False):
48 self.handlers: dict[str, RequestHandler] = {}
49 self.logger = logger # TODO(Grub4k): default logger
50 self.verbose = verbose
51
52 def close(self):
53 for handler in self.handlers.values():
54 handler.close()
55
56 def add_handler(self, handler: RequestHandler):
57 """Add a handler. If a handler of the same RH_KEY exists, it will overwrite it"""
58 assert isinstance(handler, RequestHandler), 'handler must be a RequestHandler'
59 self.handlers[handler.RH_KEY] = handler
60
61 def _print_verbose(self, msg):
62 if self.verbose:
63 self.logger.stdout(f'director: {msg}')
64
65 def send(self, request: Request) -> Response:
66 """
67 Passes a request onto a suitable RequestHandler
68 """
69 if not self.handlers:
70 raise RequestError('No request handlers configured')
71
72 assert isinstance(request, Request)
73
74 unexpected_errors = []
75 unsupported_errors = []
76 # TODO (future): add a per-request preference system
77 for handler in reversed(list(self.handlers.values())):
78 self._print_verbose(f'Checking if "{handler.RH_NAME}" supports this request.')
79 try:
80 handler.validate(request)
81 except UnsupportedRequest as e:
82 self._print_verbose(
83 f'"{handler.RH_NAME}" cannot handle this request (reason: {error_to_str(e)})')
84 unsupported_errors.append(e)
85 continue
86
87 self._print_verbose(f'Sending request via "{handler.RH_NAME}"')
88 try:
89 response = handler.send(request)
90 except RequestError:
91 raise
92 except Exception as e:
93 self.logger.error(
94 f'[{handler.RH_NAME}] Unexpected error: {error_to_str(e)}{bug_reports_message()}',
95 is_error=False)
96 unexpected_errors.append(e)
97 continue
98
99 assert isinstance(response, Response)
100 return response
101
102 raise NoSupportingHandlers(unsupported_errors, unexpected_errors)
103
104
105_REQUEST_HANDLERS = {}
106
107
62b5c94c 108def register_rh(handler):
227bf1a3 109 """Register a RequestHandler class"""
110 assert issubclass(handler, RequestHandler), f'{handler} must be a subclass of RequestHandler'
111 assert handler.RH_KEY not in _REQUEST_HANDLERS, f'RequestHandler {handler.RH_KEY} already registered'
112 _REQUEST_HANDLERS[handler.RH_KEY] = handler
113 return handler
114
115
116class Features(enum.Enum):
117 ALL_PROXY = enum.auto()
118 NO_PROXY = enum.auto()
119
120
121class RequestHandler(abc.ABC):
122
123 """Request Handler class
124
125 Request handlers are class that, given a Request,
126 process the request from start to finish and return a Response.
127
128 Concrete subclasses need to redefine the _send(request) method,
129 which handles the underlying request logic and returns a Response.
130
131 RH_NAME class variable may contain a display name for the RequestHandler.
132 By default, this is generated from the class name.
133
134 The concrete request handler MUST have "RH" as the suffix in the class name.
135
136 All exceptions raised by a RequestHandler should be an instance of RequestError.
137 Any other exception raised will be treated as a handler issue.
138
139 If a Request is not supported by the handler, an UnsupportedRequest
140 should be raised with a reason.
141
142 By default, some checks are done on the request in _validate() based on the following class variables:
143 - `_SUPPORTED_URL_SCHEMES`: a tuple of supported url schemes.
144 Any Request with an url scheme not in this list will raise an UnsupportedRequest.
145
146 - `_SUPPORTED_PROXY_SCHEMES`: a tuple of support proxy url schemes. Any Request that contains
147 a proxy url with an url scheme not in this list will raise an UnsupportedRequest.
148
149 - `_SUPPORTED_FEATURES`: a tuple of supported features, as defined in Features enum.
86aea0d3 150
227bf1a3 151 The above may be set to None to disable the checks.
152
153 Parameters:
154 @param logger: logger instance
155 @param headers: HTTP Headers to include when sending requests.
156 @param cookiejar: Cookiejar to use for requests.
157 @param timeout: Socket timeout to use when sending requests.
158 @param proxies: Proxies to use for sending requests.
159 @param source_address: Client-side IP address to bind to for requests.
160 @param verbose: Print debug request and traffic information to stdout.
161 @param prefer_system_certs: Whether to prefer system certificates over other means (e.g. certifi).
162 @param client_cert: SSL client certificate configuration.
163 dict with {client_certificate, client_certificate_key, client_certificate_password}
164 @param verify: Verify SSL certificates
165 @param legacy_ssl_support: Enable legacy SSL options such as legacy server connect and older cipher support.
166
167 Some configuration options may be available for individual Requests too. In this case,
168 either the Request configuration option takes precedence or they are merged.
169
170 Requests may have additional optional parameters defined as extensions.
171 RequestHandler subclasses may choose to support custom extensions.
172
86aea0d3 173 If an extension is supported, subclasses should extend _check_extensions(extensions)
174 to pop and validate the extension.
175 - Extensions left in `extensions` are treated as unsupported and UnsupportedRequest will be raised.
176
227bf1a3 177 The following extensions are defined for RequestHandler:
86aea0d3 178 - `cookiejar`: Cookiejar to use for this request.
179 - `timeout`: socket timeout to use for this request.
180 To enable these, add extensions.pop('<extension>', None) to _check_extensions
227bf1a3 181
182 Apart from the url protocol, proxies dict may contain the following keys:
183 - `all`: proxy to use for all protocols. Used as a fallback if no proxy is set for a specific protocol.
184 - `no`: comma seperated list of hostnames (optionally with port) to not use a proxy for.
185 Note: a RequestHandler may not support these, as defined in `_SUPPORTED_FEATURES`.
186
187 """
188
189 _SUPPORTED_URL_SCHEMES = ()
190 _SUPPORTED_PROXY_SCHEMES = ()
191 _SUPPORTED_FEATURES = ()
192
193 def __init__(
194 self, *,
195 logger, # TODO(Grub4k): default logger
196 headers: HTTPHeaderDict = None,
6148833f 197 cookiejar: YoutubeDLCookieJar = None,
227bf1a3 198 timeout: float | int | None = None,
199 proxies: dict = None,
200 source_address: str = None,
201 verbose: bool = False,
202 prefer_system_certs: bool = False,
203 client_cert: dict[str, str | None] = None,
204 verify: bool = True,
205 legacy_ssl_support: bool = False,
206 **_,
207 ):
208
209 self._logger = logger
210 self.headers = headers or {}
6148833f 211 self.cookiejar = cookiejar if cookiejar is not None else YoutubeDLCookieJar()
227bf1a3 212 self.timeout = float(timeout or 20)
213 self.proxies = proxies or {}
214 self.source_address = source_address
215 self.verbose = verbose
216 self.prefer_system_certs = prefer_system_certs
217 self._client_cert = client_cert or {}
218 self.verify = verify
219 self.legacy_ssl_support = legacy_ssl_support
220 super().__init__()
221
222 def _make_sslcontext(self):
223 return make_ssl_context(
224 verify=self.verify,
225 legacy_support=self.legacy_ssl_support,
226 use_certifi=not self.prefer_system_certs,
227 **self._client_cert,
228 )
229
230 def _merge_headers(self, request_headers):
231 return HTTPHeaderDict(self.headers, request_headers)
232
233 def _check_url_scheme(self, request: Request):
234 scheme = urllib.parse.urlparse(request.url).scheme.lower()
235 if self._SUPPORTED_URL_SCHEMES is not None and scheme not in self._SUPPORTED_URL_SCHEMES:
236 raise UnsupportedRequest(f'Unsupported url scheme: "{scheme}"')
237 return scheme # for further processing
238
239 def _check_proxies(self, proxies):
240 for proxy_key, proxy_url in proxies.items():
241 if proxy_url is None:
242 continue
243 if proxy_key == 'no':
244 if self._SUPPORTED_FEATURES is not None and Features.NO_PROXY not in self._SUPPORTED_FEATURES:
245 raise UnsupportedRequest('"no" proxy is not supported')
246 continue
247 if (
248 proxy_key == 'all'
249 and self._SUPPORTED_FEATURES is not None
250 and Features.ALL_PROXY not in self._SUPPORTED_FEATURES
251 ):
252 raise UnsupportedRequest('"all" proxy is not supported')
253
254 # Unlikely this handler will use this proxy, so ignore.
255 # This is to allow a case where a proxy may be set for a protocol
256 # for one handler in which such protocol (and proxy) is not supported by another handler.
257 if self._SUPPORTED_URL_SCHEMES is not None and proxy_key not in (*self._SUPPORTED_URL_SCHEMES, 'all'):
258 continue
259
260 if self._SUPPORTED_PROXY_SCHEMES is None:
261 # Skip proxy scheme checks
262 continue
263
bbeacff7 264 try:
265 if urllib.request._parse_proxy(proxy_url)[0] is None:
266 # Scheme-less proxies are not supported
267 raise UnsupportedRequest(f'Proxy "{proxy_url}" missing scheme')
268 except ValueError as e:
269 # parse_proxy may raise on some invalid proxy urls such as "/a/b/c"
270 raise UnsupportedRequest(f'Invalid proxy url "{proxy_url}": {e}')
227bf1a3 271
272 scheme = urllib.parse.urlparse(proxy_url).scheme.lower()
273 if scheme not in self._SUPPORTED_PROXY_SCHEMES:
274 raise UnsupportedRequest(f'Unsupported proxy type: "{scheme}"')
275
227bf1a3 276 def _check_extensions(self, extensions):
86aea0d3 277 """Check extensions for unsupported extensions. Subclasses should extend this."""
6148833f 278 assert isinstance(extensions.get('cookiejar'), (YoutubeDLCookieJar, NoneType))
86aea0d3 279 assert isinstance(extensions.get('timeout'), (float, int, NoneType))
227bf1a3 280
281 def _validate(self, request):
282 self._check_url_scheme(request)
283 self._check_proxies(request.proxies or self.proxies)
86aea0d3 284 extensions = request.extensions.copy()
285 self._check_extensions(extensions)
286 if extensions:
287 # TODO: add support for optional extensions
288 raise UnsupportedRequest(f'Unsupported extensions: {", ".join(extensions.keys())}')
227bf1a3 289
290 @wrap_request_errors
291 def validate(self, request: Request):
292 if not isinstance(request, Request):
293 raise TypeError('Expected an instance of Request')
294 self._validate(request)
295
296 @wrap_request_errors
297 def send(self, request: Request) -> Response:
298 if not isinstance(request, Request):
299 raise TypeError('Expected an instance of Request')
300 return self._send(request)
301
302 @abc.abstractmethod
303 def _send(self, request: Request):
304 """Handle a request from start to finish. Redefine in subclasses."""
6148833f 305 pass
227bf1a3 306
307 def close(self):
308 pass
309
310 @classproperty
311 def RH_NAME(cls):
312 return cls.__name__[:-2]
313
314 @classproperty
315 def RH_KEY(cls):
316 assert cls.__name__.endswith('RH'), 'RequestHandler class names must end with "RH"'
317 return cls.__name__[:-2]
318
319 def __enter__(self):
320 return self
321
322 def __exit__(self, *args):
323 self.close()
324
325
326class Request:
327 """
328 Represents a request to be made.
329 Partially backwards-compatible with urllib.request.Request.
330
331 @param url: url to send. Will be sanitized.
332 @param data: payload data to send. Must be bytes, iterable of bytes, a file-like object or None
333 @param headers: headers to send.
334 @param proxies: proxy dict mapping of proto:proxy to use for the request and any redirects.
335 @param query: URL query parameters to update the url with.
336 @param method: HTTP method to use. If no method specified, will use POST if payload data is present else GET
337 @param extensions: Dictionary of Request extensions to add, as supported by handlers.
338 """
339
340 def __init__(
341 self,
342 url: str,
343 data: RequestData = None,
344 headers: typing.Mapping = None,
345 proxies: dict = None,
346 query: dict = None,
347 method: str = None,
348 extensions: dict = None
349 ):
350
351 self._headers = HTTPHeaderDict()
352 self._data = None
353
354 if query:
355 url = update_url_query(url, query)
356
357 self.url = url
358 self.method = method
359 if headers:
360 self.headers = headers
361 self.data = data # note: must be done after setting headers
362 self.proxies = proxies or {}
363 self.extensions = extensions or {}
364
365 @property
366 def url(self):
367 return self._url
368
369 @url.setter
370 def url(self, url):
371 if not isinstance(url, str):
372 raise TypeError('url must be a string')
373 elif url.startswith('//'):
374 url = 'http:' + url
4bf91228 375 self._url = normalize_url(url)
227bf1a3 376
377 @property
378 def method(self):
379 return self._method or ('POST' if self.data is not None else 'GET')
380
381 @method.setter
382 def method(self, method):
383 if method is None:
384 self._method = None
385 elif isinstance(method, str):
386 self._method = method.upper()
387 else:
388 raise TypeError('method must be a string')
389
390 @property
391 def data(self):
392 return self._data
393
394 @data.setter
395 def data(self, data: RequestData):
396 # Try catch some common mistakes
397 if data is not None and (
398 not isinstance(data, (bytes, io.IOBase, Iterable)) or isinstance(data, (str, Mapping))
399 ):
400 raise TypeError('data must be bytes, iterable of bytes, or a file-like object')
401
402 if data == self._data and self._data is None:
403 self.headers.pop('Content-Length', None)
404
405 # https://docs.python.org/3/library/urllib.request.html#urllib.request.Request.data
406 if data != self._data:
407 if self._data is not None:
408 self.headers.pop('Content-Length', None)
409 self._data = data
410
411 if self._data is None:
412 self.headers.pop('Content-Type', None)
413
414 if 'Content-Type' not in self.headers and self._data is not None:
415 self.headers['Content-Type'] = 'application/x-www-form-urlencoded'
416
417 @property
418 def headers(self) -> HTTPHeaderDict:
419 return self._headers
420
421 @headers.setter
422 def headers(self, new_headers: Mapping):
423 """Replaces headers of the request. If not a CaseInsensitiveDict, it will be converted to one."""
424 if isinstance(new_headers, HTTPHeaderDict):
425 self._headers = new_headers
426 elif isinstance(new_headers, Mapping):
427 self._headers = HTTPHeaderDict(new_headers)
428 else:
429 raise TypeError('headers must be a mapping')
430
431 def update(self, url=None, data=None, headers=None, query=None):
71baa490 432 self.data = data if data is not None else self.data
227bf1a3 433 self.headers.update(headers or {})
434 self.url = update_url_query(url or self.url, query or {})
435
436 def copy(self):
437 return self.__class__(
438 url=self.url,
439 headers=copy.deepcopy(self.headers),
440 proxies=copy.deepcopy(self.proxies),
441 data=self._data,
442 extensions=copy.copy(self.extensions),
443 method=self._method,
444 )
445
446
447HEADRequest = functools.partial(Request, method='HEAD')
448PUTRequest = functools.partial(Request, method='PUT')
449
450
451class Response(io.IOBase):
452 """
453 Base class for HTTP response adapters.
454
455 By default, it provides a basic wrapper for a file-like response object.
456
457 Interface partially backwards-compatible with addinfourl and http.client.HTTPResponse.
458
459 @param fp: Original, file-like, response.
460 @param url: URL that this is a response of.
461 @param headers: response headers.
462 @param status: Response HTTP status code. Default is 200 OK.
463 @param reason: HTTP status reason. Will use built-in reasons based on status code if not provided.
464 """
465
466 def __init__(
467 self,
468 fp: typing.IO,
469 url: str,
470 headers: Mapping[str, str],
471 status: int = 200,
472 reason: str = None):
473
474 self.fp = fp
475 self.headers = Message()
476 for name, value in headers.items():
477 self.headers.add_header(name, value)
478 self.status = status
479 self.url = url
480 try:
481 self.reason = reason or HTTPStatus(status).phrase
482 except ValueError:
483 self.reason = None
484
485 def readable(self):
486 return self.fp.readable()
487
488 def read(self, amt: int = None) -> bytes:
489 # Expected errors raised here should be of type RequestError or subclasses.
490 # Subclasses should redefine this method with more precise error handling.
491 try:
492 return self.fp.read(amt)
493 except Exception as e:
494 raise TransportError(cause=e) from e
495
496 def close(self):
497 self.fp.close()
498 return super().close()
499
500 def get_header(self, name, default=None):
501 """Get header for name.
502 If there are multiple matching headers, return all seperated by comma."""
503 headers = self.headers.get_all(name)
504 if not headers:
505 return default
506 if name.title() == 'Set-Cookie':
507 # Special case, only get the first one
508 # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.3-4.1
509 return headers[0]
510 return ', '.join(headers)
511
512 # The following methods are for compatability reasons and are deprecated
513 @property
514 def code(self):
3d2623a8 515 deprecation_warning('Response.code is deprecated, use Response.status', stacklevel=2)
227bf1a3 516 return self.status
517
518 def getcode(self):
3d2623a8 519 deprecation_warning('Response.getcode() is deprecated, use Response.status', stacklevel=2)
227bf1a3 520 return self.status
521
522 def geturl(self):
3d2623a8 523 deprecation_warning('Response.geturl() is deprecated, use Response.url', stacklevel=2)
227bf1a3 524 return self.url
525
526 def info(self):
3d2623a8 527 deprecation_warning('Response.info() is deprecated, use Response.headers', stacklevel=2)
227bf1a3 528 return self.headers
529
530 def getheader(self, name, default=None):
3d2623a8 531 deprecation_warning('Response.getheader() is deprecated, use Response.get_header', stacklevel=2)
227bf1a3 532 return self.get_header(name, default)