]>
Commit | Line | Data |
---|---|---|
227bf1a3 | 1 | from __future__ import annotations |
2 | ||
3 | import abc | |
4 | import copy | |
5 | import enum | |
6 | import functools | |
7 | import io | |
8 | import typing | |
9 | import urllib.parse | |
10 | import urllib.request | |
11 | import urllib.response | |
12 | from collections.abc import Iterable, Mapping | |
13 | from email.message import Message | |
14 | from http import HTTPStatus | |
15 | from http.cookiejar import CookieJar | |
16 | ||
17 | from ._helper import make_ssl_context, wrap_request_errors | |
18 | from .exceptions import ( | |
19 | NoSupportingHandlers, | |
20 | RequestError, | |
21 | TransportError, | |
22 | UnsupportedRequest, | |
23 | ) | |
86aea0d3 | 24 | from ..compat.types import NoneType |
227bf1a3 | 25 | from ..utils import ( |
26 | bug_reports_message, | |
27 | classproperty, | |
3d2623a8 | 28 | deprecation_warning, |
227bf1a3 | 29 | error_to_str, |
227bf1a3 | 30 | update_url_query, |
31 | ) | |
4bf91228 | 32 | from ..utils.networking import HTTPHeaderDict, normalize_url |
227bf1a3 | 33 | |
34 | if typing.TYPE_CHECKING: | |
35 | RequestData = bytes | Iterable[bytes] | typing.IO | None | |
36 | ||
37 | ||
38 | class RequestDirector: | |
39 | """RequestDirector class | |
40 | ||
41 | Helper class that, when given a request, forward it to a RequestHandler that supports it. | |
42 | ||
43 | @param logger: Logger instance. | |
44 | @param verbose: Print debug request information to stdout. | |
45 | """ | |
46 | ||
47 | def __init__(self, logger, verbose=False): | |
48 | self.handlers: dict[str, RequestHandler] = {} | |
49 | self.logger = logger # TODO(Grub4k): default logger | |
50 | self.verbose = verbose | |
51 | ||
52 | def close(self): | |
53 | for handler in self.handlers.values(): | |
54 | handler.close() | |
55 | ||
56 | def add_handler(self, handler: RequestHandler): | |
57 | """Add a handler. If a handler of the same RH_KEY exists, it will overwrite it""" | |
58 | assert isinstance(handler, RequestHandler), 'handler must be a RequestHandler' | |
59 | self.handlers[handler.RH_KEY] = handler | |
60 | ||
61 | def _print_verbose(self, msg): | |
62 | if self.verbose: | |
63 | self.logger.stdout(f'director: {msg}') | |
64 | ||
65 | def send(self, request: Request) -> Response: | |
66 | """ | |
67 | Passes a request onto a suitable RequestHandler | |
68 | """ | |
69 | if not self.handlers: | |
70 | raise RequestError('No request handlers configured') | |
71 | ||
72 | assert isinstance(request, Request) | |
73 | ||
74 | unexpected_errors = [] | |
75 | unsupported_errors = [] | |
76 | # TODO (future): add a per-request preference system | |
77 | for handler in reversed(list(self.handlers.values())): | |
78 | self._print_verbose(f'Checking if "{handler.RH_NAME}" supports this request.') | |
79 | try: | |
80 | handler.validate(request) | |
81 | except UnsupportedRequest as e: | |
82 | self._print_verbose( | |
83 | f'"{handler.RH_NAME}" cannot handle this request (reason: {error_to_str(e)})') | |
84 | unsupported_errors.append(e) | |
85 | continue | |
86 | ||
87 | self._print_verbose(f'Sending request via "{handler.RH_NAME}"') | |
88 | try: | |
89 | response = handler.send(request) | |
90 | except RequestError: | |
91 | raise | |
92 | except Exception as e: | |
93 | self.logger.error( | |
94 | f'[{handler.RH_NAME}] Unexpected error: {error_to_str(e)}{bug_reports_message()}', | |
95 | is_error=False) | |
96 | unexpected_errors.append(e) | |
97 | continue | |
98 | ||
99 | assert isinstance(response, Response) | |
100 | return response | |
101 | ||
102 | raise NoSupportingHandlers(unsupported_errors, unexpected_errors) | |
103 | ||
104 | ||
105 | _REQUEST_HANDLERS = {} | |
106 | ||
107 | ||
62b5c94c | 108 | def register_rh(handler): |
227bf1a3 | 109 | """Register a RequestHandler class""" |
110 | assert issubclass(handler, RequestHandler), f'{handler} must be a subclass of RequestHandler' | |
111 | assert handler.RH_KEY not in _REQUEST_HANDLERS, f'RequestHandler {handler.RH_KEY} already registered' | |
112 | _REQUEST_HANDLERS[handler.RH_KEY] = handler | |
113 | return handler | |
114 | ||
115 | ||
116 | class Features(enum.Enum): | |
117 | ALL_PROXY = enum.auto() | |
118 | NO_PROXY = enum.auto() | |
119 | ||
120 | ||
121 | class RequestHandler(abc.ABC): | |
122 | ||
123 | """Request Handler class | |
124 | ||
125 | Request handlers are class that, given a Request, | |
126 | process the request from start to finish and return a Response. | |
127 | ||
128 | Concrete subclasses need to redefine the _send(request) method, | |
129 | which handles the underlying request logic and returns a Response. | |
130 | ||
131 | RH_NAME class variable may contain a display name for the RequestHandler. | |
132 | By default, this is generated from the class name. | |
133 | ||
134 | The concrete request handler MUST have "RH" as the suffix in the class name. | |
135 | ||
136 | All exceptions raised by a RequestHandler should be an instance of RequestError. | |
137 | Any other exception raised will be treated as a handler issue. | |
138 | ||
139 | If a Request is not supported by the handler, an UnsupportedRequest | |
140 | should be raised with a reason. | |
141 | ||
142 | By default, some checks are done on the request in _validate() based on the following class variables: | |
143 | - `_SUPPORTED_URL_SCHEMES`: a tuple of supported url schemes. | |
144 | Any Request with an url scheme not in this list will raise an UnsupportedRequest. | |
145 | ||
146 | - `_SUPPORTED_PROXY_SCHEMES`: a tuple of support proxy url schemes. Any Request that contains | |
147 | a proxy url with an url scheme not in this list will raise an UnsupportedRequest. | |
148 | ||
149 | - `_SUPPORTED_FEATURES`: a tuple of supported features, as defined in Features enum. | |
86aea0d3 | 150 | |
227bf1a3 | 151 | The above may be set to None to disable the checks. |
152 | ||
153 | Parameters: | |
154 | @param logger: logger instance | |
155 | @param headers: HTTP Headers to include when sending requests. | |
156 | @param cookiejar: Cookiejar to use for requests. | |
157 | @param timeout: Socket timeout to use when sending requests. | |
158 | @param proxies: Proxies to use for sending requests. | |
159 | @param source_address: Client-side IP address to bind to for requests. | |
160 | @param verbose: Print debug request and traffic information to stdout. | |
161 | @param prefer_system_certs: Whether to prefer system certificates over other means (e.g. certifi). | |
162 | @param client_cert: SSL client certificate configuration. | |
163 | dict with {client_certificate, client_certificate_key, client_certificate_password} | |
164 | @param verify: Verify SSL certificates | |
165 | @param legacy_ssl_support: Enable legacy SSL options such as legacy server connect and older cipher support. | |
166 | ||
167 | Some configuration options may be available for individual Requests too. In this case, | |
168 | either the Request configuration option takes precedence or they are merged. | |
169 | ||
170 | Requests may have additional optional parameters defined as extensions. | |
171 | RequestHandler subclasses may choose to support custom extensions. | |
172 | ||
86aea0d3 | 173 | If an extension is supported, subclasses should extend _check_extensions(extensions) |
174 | to pop and validate the extension. | |
175 | - Extensions left in `extensions` are treated as unsupported and UnsupportedRequest will be raised. | |
176 | ||
227bf1a3 | 177 | The following extensions are defined for RequestHandler: |
86aea0d3 | 178 | - `cookiejar`: Cookiejar to use for this request. |
179 | - `timeout`: socket timeout to use for this request. | |
180 | To enable these, add extensions.pop('<extension>', None) to _check_extensions | |
227bf1a3 | 181 | |
182 | Apart from the url protocol, proxies dict may contain the following keys: | |
183 | - `all`: proxy to use for all protocols. Used as a fallback if no proxy is set for a specific protocol. | |
184 | - `no`: comma seperated list of hostnames (optionally with port) to not use a proxy for. | |
185 | Note: a RequestHandler may not support these, as defined in `_SUPPORTED_FEATURES`. | |
186 | ||
187 | """ | |
188 | ||
189 | _SUPPORTED_URL_SCHEMES = () | |
190 | _SUPPORTED_PROXY_SCHEMES = () | |
191 | _SUPPORTED_FEATURES = () | |
192 | ||
193 | def __init__( | |
194 | self, *, | |
195 | logger, # TODO(Grub4k): default logger | |
196 | headers: HTTPHeaderDict = None, | |
197 | cookiejar: CookieJar = None, | |
198 | timeout: float | int | None = None, | |
199 | proxies: dict = None, | |
200 | source_address: str = None, | |
201 | verbose: bool = False, | |
202 | prefer_system_certs: bool = False, | |
203 | client_cert: dict[str, str | None] = None, | |
204 | verify: bool = True, | |
205 | legacy_ssl_support: bool = False, | |
206 | **_, | |
207 | ): | |
208 | ||
209 | self._logger = logger | |
210 | self.headers = headers or {} | |
211 | self.cookiejar = cookiejar if cookiejar is not None else CookieJar() | |
212 | self.timeout = float(timeout or 20) | |
213 | self.proxies = proxies or {} | |
214 | self.source_address = source_address | |
215 | self.verbose = verbose | |
216 | self.prefer_system_certs = prefer_system_certs | |
217 | self._client_cert = client_cert or {} | |
218 | self.verify = verify | |
219 | self.legacy_ssl_support = legacy_ssl_support | |
220 | super().__init__() | |
221 | ||
222 | def _make_sslcontext(self): | |
223 | return make_ssl_context( | |
224 | verify=self.verify, | |
225 | legacy_support=self.legacy_ssl_support, | |
226 | use_certifi=not self.prefer_system_certs, | |
227 | **self._client_cert, | |
228 | ) | |
229 | ||
230 | def _merge_headers(self, request_headers): | |
231 | return HTTPHeaderDict(self.headers, request_headers) | |
232 | ||
233 | def _check_url_scheme(self, request: Request): | |
234 | scheme = urllib.parse.urlparse(request.url).scheme.lower() | |
235 | if self._SUPPORTED_URL_SCHEMES is not None and scheme not in self._SUPPORTED_URL_SCHEMES: | |
236 | raise UnsupportedRequest(f'Unsupported url scheme: "{scheme}"') | |
237 | return scheme # for further processing | |
238 | ||
239 | def _check_proxies(self, proxies): | |
240 | for proxy_key, proxy_url in proxies.items(): | |
241 | if proxy_url is None: | |
242 | continue | |
243 | if proxy_key == 'no': | |
244 | if self._SUPPORTED_FEATURES is not None and Features.NO_PROXY not in self._SUPPORTED_FEATURES: | |
245 | raise UnsupportedRequest('"no" proxy is not supported') | |
246 | continue | |
247 | if ( | |
248 | proxy_key == 'all' | |
249 | and self._SUPPORTED_FEATURES is not None | |
250 | and Features.ALL_PROXY not in self._SUPPORTED_FEATURES | |
251 | ): | |
252 | raise UnsupportedRequest('"all" proxy is not supported') | |
253 | ||
254 | # Unlikely this handler will use this proxy, so ignore. | |
255 | # This is to allow a case where a proxy may be set for a protocol | |
256 | # for one handler in which such protocol (and proxy) is not supported by another handler. | |
257 | if self._SUPPORTED_URL_SCHEMES is not None and proxy_key not in (*self._SUPPORTED_URL_SCHEMES, 'all'): | |
258 | continue | |
259 | ||
260 | if self._SUPPORTED_PROXY_SCHEMES is None: | |
261 | # Skip proxy scheme checks | |
262 | continue | |
263 | ||
bbeacff7 | 264 | try: |
265 | if urllib.request._parse_proxy(proxy_url)[0] is None: | |
266 | # Scheme-less proxies are not supported | |
267 | raise UnsupportedRequest(f'Proxy "{proxy_url}" missing scheme') | |
268 | except ValueError as e: | |
269 | # parse_proxy may raise on some invalid proxy urls such as "/a/b/c" | |
270 | raise UnsupportedRequest(f'Invalid proxy url "{proxy_url}": {e}') | |
227bf1a3 | 271 | |
272 | scheme = urllib.parse.urlparse(proxy_url).scheme.lower() | |
273 | if scheme not in self._SUPPORTED_PROXY_SCHEMES: | |
274 | raise UnsupportedRequest(f'Unsupported proxy type: "{scheme}"') | |
275 | ||
227bf1a3 | 276 | def _check_extensions(self, extensions): |
86aea0d3 | 277 | """Check extensions for unsupported extensions. Subclasses should extend this.""" |
278 | assert isinstance(extensions.get('cookiejar'), (CookieJar, NoneType)) | |
279 | assert isinstance(extensions.get('timeout'), (float, int, NoneType)) | |
227bf1a3 | 280 | |
281 | def _validate(self, request): | |
282 | self._check_url_scheme(request) | |
283 | self._check_proxies(request.proxies or self.proxies) | |
86aea0d3 | 284 | extensions = request.extensions.copy() |
285 | self._check_extensions(extensions) | |
286 | if extensions: | |
287 | # TODO: add support for optional extensions | |
288 | raise UnsupportedRequest(f'Unsupported extensions: {", ".join(extensions.keys())}') | |
227bf1a3 | 289 | |
290 | @wrap_request_errors | |
291 | def validate(self, request: Request): | |
292 | if not isinstance(request, Request): | |
293 | raise TypeError('Expected an instance of Request') | |
294 | self._validate(request) | |
295 | ||
296 | @wrap_request_errors | |
297 | def send(self, request: Request) -> Response: | |
298 | if not isinstance(request, Request): | |
299 | raise TypeError('Expected an instance of Request') | |
300 | return self._send(request) | |
301 | ||
302 | @abc.abstractmethod | |
303 | def _send(self, request: Request): | |
304 | """Handle a request from start to finish. Redefine in subclasses.""" | |
305 | ||
306 | def close(self): | |
307 | pass | |
308 | ||
309 | @classproperty | |
310 | def RH_NAME(cls): | |
311 | return cls.__name__[:-2] | |
312 | ||
313 | @classproperty | |
314 | def RH_KEY(cls): | |
315 | assert cls.__name__.endswith('RH'), 'RequestHandler class names must end with "RH"' | |
316 | return cls.__name__[:-2] | |
317 | ||
318 | def __enter__(self): | |
319 | return self | |
320 | ||
321 | def __exit__(self, *args): | |
322 | self.close() | |
323 | ||
324 | ||
325 | class Request: | |
326 | """ | |
327 | Represents a request to be made. | |
328 | Partially backwards-compatible with urllib.request.Request. | |
329 | ||
330 | @param url: url to send. Will be sanitized. | |
331 | @param data: payload data to send. Must be bytes, iterable of bytes, a file-like object or None | |
332 | @param headers: headers to send. | |
333 | @param proxies: proxy dict mapping of proto:proxy to use for the request and any redirects. | |
334 | @param query: URL query parameters to update the url with. | |
335 | @param method: HTTP method to use. If no method specified, will use POST if payload data is present else GET | |
336 | @param extensions: Dictionary of Request extensions to add, as supported by handlers. | |
337 | """ | |
338 | ||
339 | def __init__( | |
340 | self, | |
341 | url: str, | |
342 | data: RequestData = None, | |
343 | headers: typing.Mapping = None, | |
344 | proxies: dict = None, | |
345 | query: dict = None, | |
346 | method: str = None, | |
347 | extensions: dict = None | |
348 | ): | |
349 | ||
350 | self._headers = HTTPHeaderDict() | |
351 | self._data = None | |
352 | ||
353 | if query: | |
354 | url = update_url_query(url, query) | |
355 | ||
356 | self.url = url | |
357 | self.method = method | |
358 | if headers: | |
359 | self.headers = headers | |
360 | self.data = data # note: must be done after setting headers | |
361 | self.proxies = proxies or {} | |
362 | self.extensions = extensions or {} | |
363 | ||
364 | @property | |
365 | def url(self): | |
366 | return self._url | |
367 | ||
368 | @url.setter | |
369 | def url(self, url): | |
370 | if not isinstance(url, str): | |
371 | raise TypeError('url must be a string') | |
372 | elif url.startswith('//'): | |
373 | url = 'http:' + url | |
4bf91228 | 374 | self._url = normalize_url(url) |
227bf1a3 | 375 | |
376 | @property | |
377 | def method(self): | |
378 | return self._method or ('POST' if self.data is not None else 'GET') | |
379 | ||
380 | @method.setter | |
381 | def method(self, method): | |
382 | if method is None: | |
383 | self._method = None | |
384 | elif isinstance(method, str): | |
385 | self._method = method.upper() | |
386 | else: | |
387 | raise TypeError('method must be a string') | |
388 | ||
389 | @property | |
390 | def data(self): | |
391 | return self._data | |
392 | ||
393 | @data.setter | |
394 | def data(self, data: RequestData): | |
395 | # Try catch some common mistakes | |
396 | if data is not None and ( | |
397 | not isinstance(data, (bytes, io.IOBase, Iterable)) or isinstance(data, (str, Mapping)) | |
398 | ): | |
399 | raise TypeError('data must be bytes, iterable of bytes, or a file-like object') | |
400 | ||
401 | if data == self._data and self._data is None: | |
402 | self.headers.pop('Content-Length', None) | |
403 | ||
404 | # https://docs.python.org/3/library/urllib.request.html#urllib.request.Request.data | |
405 | if data != self._data: | |
406 | if self._data is not None: | |
407 | self.headers.pop('Content-Length', None) | |
408 | self._data = data | |
409 | ||
410 | if self._data is None: | |
411 | self.headers.pop('Content-Type', None) | |
412 | ||
413 | if 'Content-Type' not in self.headers and self._data is not None: | |
414 | self.headers['Content-Type'] = 'application/x-www-form-urlencoded' | |
415 | ||
416 | @property | |
417 | def headers(self) -> HTTPHeaderDict: | |
418 | return self._headers | |
419 | ||
420 | @headers.setter | |
421 | def headers(self, new_headers: Mapping): | |
422 | """Replaces headers of the request. If not a CaseInsensitiveDict, it will be converted to one.""" | |
423 | if isinstance(new_headers, HTTPHeaderDict): | |
424 | self._headers = new_headers | |
425 | elif isinstance(new_headers, Mapping): | |
426 | self._headers = HTTPHeaderDict(new_headers) | |
427 | else: | |
428 | raise TypeError('headers must be a mapping') | |
429 | ||
430 | def update(self, url=None, data=None, headers=None, query=None): | |
71baa490 | 431 | self.data = data if data is not None else self.data |
227bf1a3 | 432 | self.headers.update(headers or {}) |
433 | self.url = update_url_query(url or self.url, query or {}) | |
434 | ||
435 | def copy(self): | |
436 | return self.__class__( | |
437 | url=self.url, | |
438 | headers=copy.deepcopy(self.headers), | |
439 | proxies=copy.deepcopy(self.proxies), | |
440 | data=self._data, | |
441 | extensions=copy.copy(self.extensions), | |
442 | method=self._method, | |
443 | ) | |
444 | ||
445 | ||
446 | HEADRequest = functools.partial(Request, method='HEAD') | |
447 | PUTRequest = functools.partial(Request, method='PUT') | |
448 | ||
449 | ||
450 | class Response(io.IOBase): | |
451 | """ | |
452 | Base class for HTTP response adapters. | |
453 | ||
454 | By default, it provides a basic wrapper for a file-like response object. | |
455 | ||
456 | Interface partially backwards-compatible with addinfourl and http.client.HTTPResponse. | |
457 | ||
458 | @param fp: Original, file-like, response. | |
459 | @param url: URL that this is a response of. | |
460 | @param headers: response headers. | |
461 | @param status: Response HTTP status code. Default is 200 OK. | |
462 | @param reason: HTTP status reason. Will use built-in reasons based on status code if not provided. | |
463 | """ | |
464 | ||
465 | def __init__( | |
466 | self, | |
467 | fp: typing.IO, | |
468 | url: str, | |
469 | headers: Mapping[str, str], | |
470 | status: int = 200, | |
471 | reason: str = None): | |
472 | ||
473 | self.fp = fp | |
474 | self.headers = Message() | |
475 | for name, value in headers.items(): | |
476 | self.headers.add_header(name, value) | |
477 | self.status = status | |
478 | self.url = url | |
479 | try: | |
480 | self.reason = reason or HTTPStatus(status).phrase | |
481 | except ValueError: | |
482 | self.reason = None | |
483 | ||
484 | def readable(self): | |
485 | return self.fp.readable() | |
486 | ||
487 | def read(self, amt: int = None) -> bytes: | |
488 | # Expected errors raised here should be of type RequestError or subclasses. | |
489 | # Subclasses should redefine this method with more precise error handling. | |
490 | try: | |
491 | return self.fp.read(amt) | |
492 | except Exception as e: | |
493 | raise TransportError(cause=e) from e | |
494 | ||
495 | def close(self): | |
496 | self.fp.close() | |
497 | return super().close() | |
498 | ||
499 | def get_header(self, name, default=None): | |
500 | """Get header for name. | |
501 | If there are multiple matching headers, return all seperated by comma.""" | |
502 | headers = self.headers.get_all(name) | |
503 | if not headers: | |
504 | return default | |
505 | if name.title() == 'Set-Cookie': | |
506 | # Special case, only get the first one | |
507 | # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.3-4.1 | |
508 | return headers[0] | |
509 | return ', '.join(headers) | |
510 | ||
511 | # The following methods are for compatability reasons and are deprecated | |
512 | @property | |
513 | def code(self): | |
3d2623a8 | 514 | deprecation_warning('Response.code is deprecated, use Response.status', stacklevel=2) |
227bf1a3 | 515 | return self.status |
516 | ||
517 | def getcode(self): | |
3d2623a8 | 518 | deprecation_warning('Response.getcode() is deprecated, use Response.status', stacklevel=2) |
227bf1a3 | 519 | return self.status |
520 | ||
521 | def geturl(self): | |
3d2623a8 | 522 | deprecation_warning('Response.geturl() is deprecated, use Response.url', stacklevel=2) |
227bf1a3 | 523 | return self.url |
524 | ||
525 | def info(self): | |
3d2623a8 | 526 | deprecation_warning('Response.info() is deprecated, use Response.headers', stacklevel=2) |
227bf1a3 | 527 | return self.headers |
528 | ||
529 | def getheader(self, name, default=None): | |
3d2623a8 | 530 | deprecation_warning('Response.getheader() is deprecated, use Response.get_header', stacklevel=2) |
227bf1a3 | 531 | return self.get_header(name, default) |