]>
Commit | Line | Data |
---|---|---|
227bf1a3 | 1 | from __future__ import annotations |
2 | ||
3 | import abc | |
4 | import copy | |
5 | import enum | |
6 | import functools | |
7 | import io | |
8 | import typing | |
9 | import urllib.parse | |
10 | import urllib.request | |
11 | import urllib.response | |
12 | from collections.abc import Iterable, Mapping | |
13 | from email.message import Message | |
14 | from http import HTTPStatus | |
15 | from http.cookiejar import CookieJar | |
16 | ||
17 | from ._helper import make_ssl_context, wrap_request_errors | |
18 | from .exceptions import ( | |
19 | NoSupportingHandlers, | |
20 | RequestError, | |
21 | TransportError, | |
22 | UnsupportedRequest, | |
23 | ) | |
86aea0d3 | 24 | from ..compat.types import NoneType |
227bf1a3 | 25 | from ..utils import ( |
26 | bug_reports_message, | |
27 | classproperty, | |
3d2623a8 | 28 | deprecation_warning, |
227bf1a3 | 29 | error_to_str, |
30 | escape_url, | |
31 | update_url_query, | |
32 | ) | |
33 | from ..utils.networking import HTTPHeaderDict | |
34 | ||
35 | if typing.TYPE_CHECKING: | |
36 | RequestData = bytes | Iterable[bytes] | typing.IO | None | |
37 | ||
38 | ||
39 | class RequestDirector: | |
40 | """RequestDirector class | |
41 | ||
42 | Helper class that, when given a request, forward it to a RequestHandler that supports it. | |
43 | ||
44 | @param logger: Logger instance. | |
45 | @param verbose: Print debug request information to stdout. | |
46 | """ | |
47 | ||
48 | def __init__(self, logger, verbose=False): | |
49 | self.handlers: dict[str, RequestHandler] = {} | |
50 | self.logger = logger # TODO(Grub4k): default logger | |
51 | self.verbose = verbose | |
52 | ||
53 | def close(self): | |
54 | for handler in self.handlers.values(): | |
55 | handler.close() | |
56 | ||
57 | def add_handler(self, handler: RequestHandler): | |
58 | """Add a handler. If a handler of the same RH_KEY exists, it will overwrite it""" | |
59 | assert isinstance(handler, RequestHandler), 'handler must be a RequestHandler' | |
60 | self.handlers[handler.RH_KEY] = handler | |
61 | ||
62 | def _print_verbose(self, msg): | |
63 | if self.verbose: | |
64 | self.logger.stdout(f'director: {msg}') | |
65 | ||
66 | def send(self, request: Request) -> Response: | |
67 | """ | |
68 | Passes a request onto a suitable RequestHandler | |
69 | """ | |
70 | if not self.handlers: | |
71 | raise RequestError('No request handlers configured') | |
72 | ||
73 | assert isinstance(request, Request) | |
74 | ||
75 | unexpected_errors = [] | |
76 | unsupported_errors = [] | |
77 | # TODO (future): add a per-request preference system | |
78 | for handler in reversed(list(self.handlers.values())): | |
79 | self._print_verbose(f'Checking if "{handler.RH_NAME}" supports this request.') | |
80 | try: | |
81 | handler.validate(request) | |
82 | except UnsupportedRequest as e: | |
83 | self._print_verbose( | |
84 | f'"{handler.RH_NAME}" cannot handle this request (reason: {error_to_str(e)})') | |
85 | unsupported_errors.append(e) | |
86 | continue | |
87 | ||
88 | self._print_verbose(f'Sending request via "{handler.RH_NAME}"') | |
89 | try: | |
90 | response = handler.send(request) | |
91 | except RequestError: | |
92 | raise | |
93 | except Exception as e: | |
94 | self.logger.error( | |
95 | f'[{handler.RH_NAME}] Unexpected error: {error_to_str(e)}{bug_reports_message()}', | |
96 | is_error=False) | |
97 | unexpected_errors.append(e) | |
98 | continue | |
99 | ||
100 | assert isinstance(response, Response) | |
101 | return response | |
102 | ||
103 | raise NoSupportingHandlers(unsupported_errors, unexpected_errors) | |
104 | ||
105 | ||
106 | _REQUEST_HANDLERS = {} | |
107 | ||
108 | ||
62b5c94c | 109 | def register_rh(handler): |
227bf1a3 | 110 | """Register a RequestHandler class""" |
111 | assert issubclass(handler, RequestHandler), f'{handler} must be a subclass of RequestHandler' | |
112 | assert handler.RH_KEY not in _REQUEST_HANDLERS, f'RequestHandler {handler.RH_KEY} already registered' | |
113 | _REQUEST_HANDLERS[handler.RH_KEY] = handler | |
114 | return handler | |
115 | ||
116 | ||
117 | class Features(enum.Enum): | |
118 | ALL_PROXY = enum.auto() | |
119 | NO_PROXY = enum.auto() | |
120 | ||
121 | ||
122 | class RequestHandler(abc.ABC): | |
123 | ||
124 | """Request Handler class | |
125 | ||
126 | Request handlers are class that, given a Request, | |
127 | process the request from start to finish and return a Response. | |
128 | ||
129 | Concrete subclasses need to redefine the _send(request) method, | |
130 | which handles the underlying request logic and returns a Response. | |
131 | ||
132 | RH_NAME class variable may contain a display name for the RequestHandler. | |
133 | By default, this is generated from the class name. | |
134 | ||
135 | The concrete request handler MUST have "RH" as the suffix in the class name. | |
136 | ||
137 | All exceptions raised by a RequestHandler should be an instance of RequestError. | |
138 | Any other exception raised will be treated as a handler issue. | |
139 | ||
140 | If a Request is not supported by the handler, an UnsupportedRequest | |
141 | should be raised with a reason. | |
142 | ||
143 | By default, some checks are done on the request in _validate() based on the following class variables: | |
144 | - `_SUPPORTED_URL_SCHEMES`: a tuple of supported url schemes. | |
145 | Any Request with an url scheme not in this list will raise an UnsupportedRequest. | |
146 | ||
147 | - `_SUPPORTED_PROXY_SCHEMES`: a tuple of support proxy url schemes. Any Request that contains | |
148 | a proxy url with an url scheme not in this list will raise an UnsupportedRequest. | |
149 | ||
150 | - `_SUPPORTED_FEATURES`: a tuple of supported features, as defined in Features enum. | |
86aea0d3 | 151 | |
227bf1a3 | 152 | The above may be set to None to disable the checks. |
153 | ||
154 | Parameters: | |
155 | @param logger: logger instance | |
156 | @param headers: HTTP Headers to include when sending requests. | |
157 | @param cookiejar: Cookiejar to use for requests. | |
158 | @param timeout: Socket timeout to use when sending requests. | |
159 | @param proxies: Proxies to use for sending requests. | |
160 | @param source_address: Client-side IP address to bind to for requests. | |
161 | @param verbose: Print debug request and traffic information to stdout. | |
162 | @param prefer_system_certs: Whether to prefer system certificates over other means (e.g. certifi). | |
163 | @param client_cert: SSL client certificate configuration. | |
164 | dict with {client_certificate, client_certificate_key, client_certificate_password} | |
165 | @param verify: Verify SSL certificates | |
166 | @param legacy_ssl_support: Enable legacy SSL options such as legacy server connect and older cipher support. | |
167 | ||
168 | Some configuration options may be available for individual Requests too. In this case, | |
169 | either the Request configuration option takes precedence or they are merged. | |
170 | ||
171 | Requests may have additional optional parameters defined as extensions. | |
172 | RequestHandler subclasses may choose to support custom extensions. | |
173 | ||
86aea0d3 | 174 | If an extension is supported, subclasses should extend _check_extensions(extensions) |
175 | to pop and validate the extension. | |
176 | - Extensions left in `extensions` are treated as unsupported and UnsupportedRequest will be raised. | |
177 | ||
227bf1a3 | 178 | The following extensions are defined for RequestHandler: |
86aea0d3 | 179 | - `cookiejar`: Cookiejar to use for this request. |
180 | - `timeout`: socket timeout to use for this request. | |
181 | To enable these, add extensions.pop('<extension>', None) to _check_extensions | |
227bf1a3 | 182 | |
183 | Apart from the url protocol, proxies dict may contain the following keys: | |
184 | - `all`: proxy to use for all protocols. Used as a fallback if no proxy is set for a specific protocol. | |
185 | - `no`: comma seperated list of hostnames (optionally with port) to not use a proxy for. | |
186 | Note: a RequestHandler may not support these, as defined in `_SUPPORTED_FEATURES`. | |
187 | ||
188 | """ | |
189 | ||
190 | _SUPPORTED_URL_SCHEMES = () | |
191 | _SUPPORTED_PROXY_SCHEMES = () | |
192 | _SUPPORTED_FEATURES = () | |
193 | ||
194 | def __init__( | |
195 | self, *, | |
196 | logger, # TODO(Grub4k): default logger | |
197 | headers: HTTPHeaderDict = None, | |
198 | cookiejar: CookieJar = None, | |
199 | timeout: float | int | None = None, | |
200 | proxies: dict = None, | |
201 | source_address: str = None, | |
202 | verbose: bool = False, | |
203 | prefer_system_certs: bool = False, | |
204 | client_cert: dict[str, str | None] = None, | |
205 | verify: bool = True, | |
206 | legacy_ssl_support: bool = False, | |
207 | **_, | |
208 | ): | |
209 | ||
210 | self._logger = logger | |
211 | self.headers = headers or {} | |
212 | self.cookiejar = cookiejar if cookiejar is not None else CookieJar() | |
213 | self.timeout = float(timeout or 20) | |
214 | self.proxies = proxies or {} | |
215 | self.source_address = source_address | |
216 | self.verbose = verbose | |
217 | self.prefer_system_certs = prefer_system_certs | |
218 | self._client_cert = client_cert or {} | |
219 | self.verify = verify | |
220 | self.legacy_ssl_support = legacy_ssl_support | |
221 | super().__init__() | |
222 | ||
223 | def _make_sslcontext(self): | |
224 | return make_ssl_context( | |
225 | verify=self.verify, | |
226 | legacy_support=self.legacy_ssl_support, | |
227 | use_certifi=not self.prefer_system_certs, | |
228 | **self._client_cert, | |
229 | ) | |
230 | ||
231 | def _merge_headers(self, request_headers): | |
232 | return HTTPHeaderDict(self.headers, request_headers) | |
233 | ||
234 | def _check_url_scheme(self, request: Request): | |
235 | scheme = urllib.parse.urlparse(request.url).scheme.lower() | |
236 | if self._SUPPORTED_URL_SCHEMES is not None and scheme not in self._SUPPORTED_URL_SCHEMES: | |
237 | raise UnsupportedRequest(f'Unsupported url scheme: "{scheme}"') | |
238 | return scheme # for further processing | |
239 | ||
240 | def _check_proxies(self, proxies): | |
241 | for proxy_key, proxy_url in proxies.items(): | |
242 | if proxy_url is None: | |
243 | continue | |
244 | if proxy_key == 'no': | |
245 | if self._SUPPORTED_FEATURES is not None and Features.NO_PROXY not in self._SUPPORTED_FEATURES: | |
246 | raise UnsupportedRequest('"no" proxy is not supported') | |
247 | continue | |
248 | if ( | |
249 | proxy_key == 'all' | |
250 | and self._SUPPORTED_FEATURES is not None | |
251 | and Features.ALL_PROXY not in self._SUPPORTED_FEATURES | |
252 | ): | |
253 | raise UnsupportedRequest('"all" proxy is not supported') | |
254 | ||
255 | # Unlikely this handler will use this proxy, so ignore. | |
256 | # This is to allow a case where a proxy may be set for a protocol | |
257 | # for one handler in which such protocol (and proxy) is not supported by another handler. | |
258 | if self._SUPPORTED_URL_SCHEMES is not None and proxy_key not in (*self._SUPPORTED_URL_SCHEMES, 'all'): | |
259 | continue | |
260 | ||
261 | if self._SUPPORTED_PROXY_SCHEMES is None: | |
262 | # Skip proxy scheme checks | |
263 | continue | |
264 | ||
bbeacff7 | 265 | try: |
266 | if urllib.request._parse_proxy(proxy_url)[0] is None: | |
267 | # Scheme-less proxies are not supported | |
268 | raise UnsupportedRequest(f'Proxy "{proxy_url}" missing scheme') | |
269 | except ValueError as e: | |
270 | # parse_proxy may raise on some invalid proxy urls such as "/a/b/c" | |
271 | raise UnsupportedRequest(f'Invalid proxy url "{proxy_url}": {e}') | |
227bf1a3 | 272 | |
273 | scheme = urllib.parse.urlparse(proxy_url).scheme.lower() | |
274 | if scheme not in self._SUPPORTED_PROXY_SCHEMES: | |
275 | raise UnsupportedRequest(f'Unsupported proxy type: "{scheme}"') | |
276 | ||
227bf1a3 | 277 | def _check_extensions(self, extensions): |
86aea0d3 | 278 | """Check extensions for unsupported extensions. Subclasses should extend this.""" |
279 | assert isinstance(extensions.get('cookiejar'), (CookieJar, NoneType)) | |
280 | assert isinstance(extensions.get('timeout'), (float, int, NoneType)) | |
227bf1a3 | 281 | |
282 | def _validate(self, request): | |
283 | self._check_url_scheme(request) | |
284 | self._check_proxies(request.proxies or self.proxies) | |
86aea0d3 | 285 | extensions = request.extensions.copy() |
286 | self._check_extensions(extensions) | |
287 | if extensions: | |
288 | # TODO: add support for optional extensions | |
289 | raise UnsupportedRequest(f'Unsupported extensions: {", ".join(extensions.keys())}') | |
227bf1a3 | 290 | |
291 | @wrap_request_errors | |
292 | def validate(self, request: Request): | |
293 | if not isinstance(request, Request): | |
294 | raise TypeError('Expected an instance of Request') | |
295 | self._validate(request) | |
296 | ||
297 | @wrap_request_errors | |
298 | def send(self, request: Request) -> Response: | |
299 | if not isinstance(request, Request): | |
300 | raise TypeError('Expected an instance of Request') | |
301 | return self._send(request) | |
302 | ||
303 | @abc.abstractmethod | |
304 | def _send(self, request: Request): | |
305 | """Handle a request from start to finish. Redefine in subclasses.""" | |
306 | ||
307 | def close(self): | |
308 | pass | |
309 | ||
310 | @classproperty | |
311 | def RH_NAME(cls): | |
312 | return cls.__name__[:-2] | |
313 | ||
314 | @classproperty | |
315 | def RH_KEY(cls): | |
316 | assert cls.__name__.endswith('RH'), 'RequestHandler class names must end with "RH"' | |
317 | return cls.__name__[:-2] | |
318 | ||
319 | def __enter__(self): | |
320 | return self | |
321 | ||
322 | def __exit__(self, *args): | |
323 | self.close() | |
324 | ||
325 | ||
326 | class Request: | |
327 | """ | |
328 | Represents a request to be made. | |
329 | Partially backwards-compatible with urllib.request.Request. | |
330 | ||
331 | @param url: url to send. Will be sanitized. | |
332 | @param data: payload data to send. Must be bytes, iterable of bytes, a file-like object or None | |
333 | @param headers: headers to send. | |
334 | @param proxies: proxy dict mapping of proto:proxy to use for the request and any redirects. | |
335 | @param query: URL query parameters to update the url with. | |
336 | @param method: HTTP method to use. If no method specified, will use POST if payload data is present else GET | |
337 | @param extensions: Dictionary of Request extensions to add, as supported by handlers. | |
338 | """ | |
339 | ||
340 | def __init__( | |
341 | self, | |
342 | url: str, | |
343 | data: RequestData = None, | |
344 | headers: typing.Mapping = None, | |
345 | proxies: dict = None, | |
346 | query: dict = None, | |
347 | method: str = None, | |
348 | extensions: dict = None | |
349 | ): | |
350 | ||
351 | self._headers = HTTPHeaderDict() | |
352 | self._data = None | |
353 | ||
354 | if query: | |
355 | url = update_url_query(url, query) | |
356 | ||
357 | self.url = url | |
358 | self.method = method | |
359 | if headers: | |
360 | self.headers = headers | |
361 | self.data = data # note: must be done after setting headers | |
362 | self.proxies = proxies or {} | |
363 | self.extensions = extensions or {} | |
364 | ||
365 | @property | |
366 | def url(self): | |
367 | return self._url | |
368 | ||
369 | @url.setter | |
370 | def url(self, url): | |
371 | if not isinstance(url, str): | |
372 | raise TypeError('url must be a string') | |
373 | elif url.startswith('//'): | |
374 | url = 'http:' + url | |
375 | self._url = escape_url(url) | |
376 | ||
377 | @property | |
378 | def method(self): | |
379 | return self._method or ('POST' if self.data is not None else 'GET') | |
380 | ||
381 | @method.setter | |
382 | def method(self, method): | |
383 | if method is None: | |
384 | self._method = None | |
385 | elif isinstance(method, str): | |
386 | self._method = method.upper() | |
387 | else: | |
388 | raise TypeError('method must be a string') | |
389 | ||
390 | @property | |
391 | def data(self): | |
392 | return self._data | |
393 | ||
394 | @data.setter | |
395 | def data(self, data: RequestData): | |
396 | # Try catch some common mistakes | |
397 | if data is not None and ( | |
398 | not isinstance(data, (bytes, io.IOBase, Iterable)) or isinstance(data, (str, Mapping)) | |
399 | ): | |
400 | raise TypeError('data must be bytes, iterable of bytes, or a file-like object') | |
401 | ||
402 | if data == self._data and self._data is None: | |
403 | self.headers.pop('Content-Length', None) | |
404 | ||
405 | # https://docs.python.org/3/library/urllib.request.html#urllib.request.Request.data | |
406 | if data != self._data: | |
407 | if self._data is not None: | |
408 | self.headers.pop('Content-Length', None) | |
409 | self._data = data | |
410 | ||
411 | if self._data is None: | |
412 | self.headers.pop('Content-Type', None) | |
413 | ||
414 | if 'Content-Type' not in self.headers and self._data is not None: | |
415 | self.headers['Content-Type'] = 'application/x-www-form-urlencoded' | |
416 | ||
417 | @property | |
418 | def headers(self) -> HTTPHeaderDict: | |
419 | return self._headers | |
420 | ||
421 | @headers.setter | |
422 | def headers(self, new_headers: Mapping): | |
423 | """Replaces headers of the request. If not a CaseInsensitiveDict, it will be converted to one.""" | |
424 | if isinstance(new_headers, HTTPHeaderDict): | |
425 | self._headers = new_headers | |
426 | elif isinstance(new_headers, Mapping): | |
427 | self._headers = HTTPHeaderDict(new_headers) | |
428 | else: | |
429 | raise TypeError('headers must be a mapping') | |
430 | ||
431 | def update(self, url=None, data=None, headers=None, query=None): | |
71baa490 | 432 | self.data = data if data is not None else self.data |
227bf1a3 | 433 | self.headers.update(headers or {}) |
434 | self.url = update_url_query(url or self.url, query or {}) | |
435 | ||
436 | def copy(self): | |
437 | return self.__class__( | |
438 | url=self.url, | |
439 | headers=copy.deepcopy(self.headers), | |
440 | proxies=copy.deepcopy(self.proxies), | |
441 | data=self._data, | |
442 | extensions=copy.copy(self.extensions), | |
443 | method=self._method, | |
444 | ) | |
445 | ||
446 | ||
447 | HEADRequest = functools.partial(Request, method='HEAD') | |
448 | PUTRequest = functools.partial(Request, method='PUT') | |
449 | ||
450 | ||
451 | class Response(io.IOBase): | |
452 | """ | |
453 | Base class for HTTP response adapters. | |
454 | ||
455 | By default, it provides a basic wrapper for a file-like response object. | |
456 | ||
457 | Interface partially backwards-compatible with addinfourl and http.client.HTTPResponse. | |
458 | ||
459 | @param fp: Original, file-like, response. | |
460 | @param url: URL that this is a response of. | |
461 | @param headers: response headers. | |
462 | @param status: Response HTTP status code. Default is 200 OK. | |
463 | @param reason: HTTP status reason. Will use built-in reasons based on status code if not provided. | |
464 | """ | |
465 | ||
466 | def __init__( | |
467 | self, | |
468 | fp: typing.IO, | |
469 | url: str, | |
470 | headers: Mapping[str, str], | |
471 | status: int = 200, | |
472 | reason: str = None): | |
473 | ||
474 | self.fp = fp | |
475 | self.headers = Message() | |
476 | for name, value in headers.items(): | |
477 | self.headers.add_header(name, value) | |
478 | self.status = status | |
479 | self.url = url | |
480 | try: | |
481 | self.reason = reason or HTTPStatus(status).phrase | |
482 | except ValueError: | |
483 | self.reason = None | |
484 | ||
485 | def readable(self): | |
486 | return self.fp.readable() | |
487 | ||
488 | def read(self, amt: int = None) -> bytes: | |
489 | # Expected errors raised here should be of type RequestError or subclasses. | |
490 | # Subclasses should redefine this method with more precise error handling. | |
491 | try: | |
492 | return self.fp.read(amt) | |
493 | except Exception as e: | |
494 | raise TransportError(cause=e) from e | |
495 | ||
496 | def close(self): | |
497 | self.fp.close() | |
498 | return super().close() | |
499 | ||
500 | def get_header(self, name, default=None): | |
501 | """Get header for name. | |
502 | If there are multiple matching headers, return all seperated by comma.""" | |
503 | headers = self.headers.get_all(name) | |
504 | if not headers: | |
505 | return default | |
506 | if name.title() == 'Set-Cookie': | |
507 | # Special case, only get the first one | |
508 | # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.3-4.1 | |
509 | return headers[0] | |
510 | return ', '.join(headers) | |
511 | ||
512 | # The following methods are for compatability reasons and are deprecated | |
513 | @property | |
514 | def code(self): | |
3d2623a8 | 515 | deprecation_warning('Response.code is deprecated, use Response.status', stacklevel=2) |
227bf1a3 | 516 | return self.status |
517 | ||
518 | def getcode(self): | |
3d2623a8 | 519 | deprecation_warning('Response.getcode() is deprecated, use Response.status', stacklevel=2) |
227bf1a3 | 520 | return self.status |
521 | ||
522 | def geturl(self): | |
3d2623a8 | 523 | deprecation_warning('Response.geturl() is deprecated, use Response.url', stacklevel=2) |
227bf1a3 | 524 | return self.url |
525 | ||
526 | def info(self): | |
3d2623a8 | 527 | deprecation_warning('Response.info() is deprecated, use Response.headers', stacklevel=2) |
227bf1a3 | 528 | return self.headers |
529 | ||
530 | def getheader(self, name, default=None): | |
3d2623a8 | 531 | deprecation_warning('Response.getheader() is deprecated, use Response.get_header', stacklevel=2) |
227bf1a3 | 532 | return self.get_header(name, default) |