]>
Commit | Line | Data |
---|---|---|
e0df8241 JR |
1 | from __future__ import absolute_import |
2 | ||
3 | import errno | |
4 | import logging | |
5 | import re | |
6 | import socket | |
7 | import sys | |
8 | import warnings | |
9 | from socket import error as SocketError | |
10 | from socket import timeout as SocketTimeout | |
11 | ||
12 | from .connection import ( | |
13 | BaseSSLError, | |
14 | BrokenPipeError, | |
15 | DummyConnection, | |
16 | HTTPConnection, | |
17 | HTTPException, | |
18 | HTTPSConnection, | |
19 | VerifiedHTTPSConnection, | |
20 | port_by_scheme, | |
21 | ) | |
22 | from .exceptions import ( | |
23 | ClosedPoolError, | |
24 | EmptyPoolError, | |
25 | HeaderParsingError, | |
26 | HostChangedError, | |
27 | InsecureRequestWarning, | |
28 | LocationValueError, | |
29 | MaxRetryError, | |
30 | NewConnectionError, | |
31 | ProtocolError, | |
32 | ProxyError, | |
33 | ReadTimeoutError, | |
34 | SSLError, | |
35 | TimeoutError, | |
36 | ) | |
37 | from .packages import six | |
38 | from .packages.six.moves import queue | |
39 | from .request import RequestMethods | |
40 | from .response import HTTPResponse | |
41 | from .util.connection import is_connection_dropped | |
42 | from .util.proxy import connection_requires_http_tunnel | |
43 | from .util.queue import LifoQueue | |
44 | from .util.request import set_file_position | |
45 | from .util.response import assert_header_parsing | |
46 | from .util.retry import Retry | |
47 | from .util.ssl_match_hostname import CertificateError | |
48 | from .util.timeout import Timeout | |
49 | from .util.url import Url, _encode_target | |
50 | from .util.url import _normalize_host as normalize_host | |
51 | from .util.url import get_host, parse_url | |
52 | ||
53 | try: # Platform-specific: Python 3 | |
54 | import weakref | |
55 | ||
56 | weakref_finalize = weakref.finalize | |
57 | except AttributeError: # Platform-specific: Python 2 | |
58 | from .packages.backports.weakref_finalize import weakref_finalize | |
59 | ||
60 | xrange = six.moves.xrange | |
61 | ||
62 | log = logging.getLogger(__name__) | |
63 | ||
64 | _Default = object() | |
65 | ||
66 | ||
67 | # Pool objects | |
68 | class ConnectionPool(object): | |
69 | """ | |
70 | Base class for all connection pools, such as | |
71 | :class:`.HTTPConnectionPool` and :class:`.HTTPSConnectionPool`. | |
72 | ||
73 | .. note:: | |
74 | ConnectionPool.urlopen() does not normalize or percent-encode target URIs | |
75 | which is useful if your target server doesn't support percent-encoded | |
76 | target URIs. | |
77 | """ | |
78 | ||
79 | scheme = None | |
80 | QueueCls = LifoQueue | |
81 | ||
82 | def __init__(self, host, port=None): | |
83 | if not host: | |
84 | raise LocationValueError("No host specified.") | |
85 | ||
86 | self.host = _normalize_host(host, scheme=self.scheme) | |
87 | self._proxy_host = host.lower() | |
88 | self.port = port | |
89 | ||
90 | def __str__(self): | |
91 | return "%s(host=%r, port=%r)" % (type(self).__name__, self.host, self.port) | |
92 | ||
93 | def __enter__(self): | |
94 | return self | |
95 | ||
96 | def __exit__(self, exc_type, exc_val, exc_tb): | |
97 | self.close() | |
98 | # Return False to re-raise any potential exceptions | |
99 | return False | |
100 | ||
101 | def close(self): | |
102 | """ | |
103 | Close all pooled connections and disable the pool. | |
104 | """ | |
105 | pass | |
106 | ||
107 | ||
108 | # This is taken from http://hg.python.org/cpython/file/7aaba721ebc0/Lib/socket.py#l252 | |
109 | _blocking_errnos = {errno.EAGAIN, errno.EWOULDBLOCK} | |
110 | ||
111 | ||
112 | class HTTPConnectionPool(ConnectionPool, RequestMethods): | |
113 | """ | |
114 | Thread-safe connection pool for one host. | |
115 | ||
116 | :param host: | |
117 | Host used for this HTTP Connection (e.g. "localhost"), passed into | |
118 | :class:`http.client.HTTPConnection`. | |
119 | ||
120 | :param port: | |
121 | Port used for this HTTP Connection (None is equivalent to 80), passed | |
122 | into :class:`http.client.HTTPConnection`. | |
123 | ||
124 | :param strict: | |
125 | Causes BadStatusLine to be raised if the status line can't be parsed | |
126 | as a valid HTTP/1.0 or 1.1 status line, passed into | |
127 | :class:`http.client.HTTPConnection`. | |
128 | ||
129 | .. note:: | |
130 | Only works in Python 2. This parameter is ignored in Python 3. | |
131 | ||
132 | :param timeout: | |
133 | Socket timeout in seconds for each individual connection. This can | |
134 | be a float or integer, which sets the timeout for the HTTP request, | |
135 | or an instance of :class:`urllib3.util.Timeout` which gives you more | |
136 | fine-grained control over request timeouts. After the constructor has | |
137 | been parsed, this is always a `urllib3.util.Timeout` object. | |
138 | ||
139 | :param maxsize: | |
140 | Number of connections to save that can be reused. More than 1 is useful | |
141 | in multithreaded situations. If ``block`` is set to False, more | |
142 | connections will be created but they will not be saved once they've | |
143 | been used. | |
144 | ||
145 | :param block: | |
146 | If set to True, no more than ``maxsize`` connections will be used at | |
147 | a time. When no free connections are available, the call will block | |
148 | until a connection has been released. This is a useful side effect for | |
149 | particular multithreaded situations where one does not want to use more | |
150 | than maxsize connections per host to prevent flooding. | |
151 | ||
152 | :param headers: | |
153 | Headers to include with all requests, unless other headers are given | |
154 | explicitly. | |
155 | ||
156 | :param retries: | |
157 | Retry configuration to use by default with requests in this pool. | |
158 | ||
159 | :param _proxy: | |
160 | Parsed proxy URL, should not be used directly, instead, see | |
161 | :class:`urllib3.ProxyManager` | |
162 | ||
163 | :param _proxy_headers: | |
164 | A dictionary with proxy headers, should not be used directly, | |
165 | instead, see :class:`urllib3.ProxyManager` | |
166 | ||
167 | :param \\**conn_kw: | |
168 | Additional parameters are used to create fresh :class:`urllib3.connection.HTTPConnection`, | |
169 | :class:`urllib3.connection.HTTPSConnection` instances. | |
170 | """ | |
171 | ||
172 | scheme = "http" | |
173 | ConnectionCls = HTTPConnection | |
174 | ResponseCls = HTTPResponse | |
175 | ||
176 | def __init__( | |
177 | self, | |
178 | host, | |
179 | port=None, | |
180 | strict=False, | |
181 | timeout=Timeout.DEFAULT_TIMEOUT, | |
182 | maxsize=1, | |
183 | block=False, | |
184 | headers=None, | |
185 | retries=None, | |
186 | _proxy=None, | |
187 | _proxy_headers=None, | |
188 | _proxy_config=None, | |
189 | **conn_kw | |
190 | ): | |
191 | ConnectionPool.__init__(self, host, port) | |
192 | RequestMethods.__init__(self, headers) | |
193 | ||
194 | self.strict = strict | |
195 | ||
196 | if not isinstance(timeout, Timeout): | |
197 | timeout = Timeout.from_float(timeout) | |
198 | ||
199 | if retries is None: | |
200 | retries = Retry.DEFAULT | |
201 | ||
202 | self.timeout = timeout | |
203 | self.retries = retries | |
204 | ||
205 | self.pool = self.QueueCls(maxsize) | |
206 | self.block = block | |
207 | ||
208 | self.proxy = _proxy | |
209 | self.proxy_headers = _proxy_headers or {} | |
210 | self.proxy_config = _proxy_config | |
211 | ||
212 | # Fill the queue up so that doing get() on it will block properly | |
213 | for _ in xrange(maxsize): | |
214 | self.pool.put(None) | |
215 | ||
216 | # These are mostly for testing and debugging purposes. | |
217 | self.num_connections = 0 | |
218 | self.num_requests = 0 | |
219 | self.conn_kw = conn_kw | |
220 | ||
221 | if self.proxy: | |
222 | # Enable Nagle's algorithm for proxies, to avoid packet fragmentation. | |
223 | # We cannot know if the user has added default socket options, so we cannot replace the | |
224 | # list. | |
225 | self.conn_kw.setdefault("socket_options", []) | |
226 | ||
227 | self.conn_kw["proxy"] = self.proxy | |
228 | self.conn_kw["proxy_config"] = self.proxy_config | |
229 | ||
230 | # Do not pass 'self' as callback to 'finalize'. | |
231 | # Then the 'finalize' would keep an endless living (leak) to self. | |
232 | # By just passing a reference to the pool allows the garbage collector | |
233 | # to free self if nobody else has a reference to it. | |
234 | pool = self.pool | |
235 | ||
236 | # Close all the HTTPConnections in the pool before the | |
237 | # HTTPConnectionPool object is garbage collected. | |
238 | weakref_finalize(self, _close_pool_connections, pool) | |
239 | ||
240 | def _new_conn(self): | |
241 | """ | |
242 | Return a fresh :class:`HTTPConnection`. | |
243 | """ | |
244 | self.num_connections += 1 | |
245 | log.debug( | |
246 | "Starting new HTTP connection (%d): %s:%s", | |
247 | self.num_connections, | |
248 | self.host, | |
249 | self.port or "80", | |
250 | ) | |
251 | ||
252 | conn = self.ConnectionCls( | |
253 | host=self.host, | |
254 | port=self.port, | |
255 | timeout=self.timeout.connect_timeout, | |
256 | strict=self.strict, | |
257 | **self.conn_kw | |
258 | ) | |
259 | return conn | |
260 | ||
261 | def _get_conn(self, timeout=None): | |
262 | """ | |
263 | Get a connection. Will return a pooled connection if one is available. | |
264 | ||
265 | If no connections are available and :prop:`.block` is ``False``, then a | |
266 | fresh connection is returned. | |
267 | ||
268 | :param timeout: | |
269 | Seconds to wait before giving up and raising | |
270 | :class:`urllib3.exceptions.EmptyPoolError` if the pool is empty and | |
271 | :prop:`.block` is ``True``. | |
272 | """ | |
273 | conn = None | |
274 | try: | |
275 | conn = self.pool.get(block=self.block, timeout=timeout) | |
276 | ||
277 | except AttributeError: # self.pool is None | |
278 | raise ClosedPoolError(self, "Pool is closed.") | |
279 | ||
280 | except queue.Empty: | |
281 | if self.block: | |
282 | raise EmptyPoolError( | |
283 | self, | |
284 | "Pool reached maximum size and no more connections are allowed.", | |
285 | ) | |
286 | pass # Oh well, we'll create a new connection then | |
287 | ||
288 | # If this is a persistent connection, check if it got disconnected | |
289 | if conn and is_connection_dropped(conn): | |
290 | log.debug("Resetting dropped connection: %s", self.host) | |
291 | conn.close() | |
292 | if getattr(conn, "auto_open", 1) == 0: | |
293 | # This is a proxied connection that has been mutated by | |
294 | # http.client._tunnel() and cannot be reused (since it would | |
295 | # attempt to bypass the proxy) | |
296 | conn = None | |
297 | ||
298 | return conn or self._new_conn() | |
299 | ||
300 | def _put_conn(self, conn): | |
301 | """ | |
302 | Put a connection back into the pool. | |
303 | ||
304 | :param conn: | |
305 | Connection object for the current host and port as returned by | |
306 | :meth:`._new_conn` or :meth:`._get_conn`. | |
307 | ||
308 | If the pool is already full, the connection is closed and discarded | |
309 | because we exceeded maxsize. If connections are discarded frequently, | |
310 | then maxsize should be increased. | |
311 | ||
312 | If the pool is closed, then the connection will be closed and discarded. | |
313 | """ | |
314 | try: | |
315 | self.pool.put(conn, block=False) | |
316 | return # Everything is dandy, done. | |
317 | except AttributeError: | |
318 | # self.pool is None. | |
319 | pass | |
320 | except queue.Full: | |
321 | # This should never happen if self.block == True | |
322 | log.warning( | |
323 | "Connection pool is full, discarding connection: %s. Connection pool size: %s", | |
324 | self.host, | |
325 | self.pool.qsize(), | |
326 | ) | |
327 | # Connection never got put back into the pool, close it. | |
328 | if conn: | |
329 | conn.close() | |
330 | ||
331 | def _validate_conn(self, conn): | |
332 | """ | |
333 | Called right before a request is made, after the socket is created. | |
334 | """ | |
335 | pass | |
336 | ||
337 | def _prepare_proxy(self, conn): | |
338 | # Nothing to do for HTTP connections. | |
339 | pass | |
340 | ||
341 | def _get_timeout(self, timeout): | |
342 | """Helper that always returns a :class:`urllib3.util.Timeout`""" | |
343 | if timeout is _Default: | |
344 | return self.timeout.clone() | |
345 | ||
346 | if isinstance(timeout, Timeout): | |
347 | return timeout.clone() | |
348 | else: | |
349 | # User passed us an int/float. This is for backwards compatibility, | |
350 | # can be removed later | |
351 | return Timeout.from_float(timeout) | |
352 | ||
353 | def _raise_timeout(self, err, url, timeout_value): | |
354 | """Is the error actually a timeout? Will raise a ReadTimeout or pass""" | |
355 | ||
356 | if isinstance(err, SocketTimeout): | |
357 | raise ReadTimeoutError( | |
358 | self, url, "Read timed out. (read timeout=%s)" % timeout_value | |
359 | ) | |
360 | ||
361 | # See the above comment about EAGAIN in Python 3. In Python 2 we have | |
362 | # to specifically catch it and throw the timeout error | |
363 | if hasattr(err, "errno") and err.errno in _blocking_errnos: | |
364 | raise ReadTimeoutError( | |
365 | self, url, "Read timed out. (read timeout=%s)" % timeout_value | |
366 | ) | |
367 | ||
368 | # Catch possible read timeouts thrown as SSL errors. If not the | |
369 | # case, rethrow the original. We need to do this because of: | |
370 | # http://bugs.python.org/issue10272 | |
371 | if "timed out" in str(err) or "did not complete (read)" in str( | |
372 | err | |
373 | ): # Python < 2.7.4 | |
374 | raise ReadTimeoutError( | |
375 | self, url, "Read timed out. (read timeout=%s)" % timeout_value | |
376 | ) | |
377 | ||
378 | def _make_request( | |
379 | self, conn, method, url, timeout=_Default, chunked=False, **httplib_request_kw | |
380 | ): | |
381 | """ | |
382 | Perform a request on a given urllib connection object taken from our | |
383 | pool. | |
384 | ||
385 | :param conn: | |
386 | a connection from one of our connection pools | |
387 | ||
388 | :param timeout: | |
389 | Socket timeout in seconds for the request. This can be a | |
390 | float or integer, which will set the same timeout value for | |
391 | the socket connect and the socket read, or an instance of | |
392 | :class:`urllib3.util.Timeout`, which gives you more fine-grained | |
393 | control over your timeouts. | |
394 | """ | |
395 | self.num_requests += 1 | |
396 | ||
397 | timeout_obj = self._get_timeout(timeout) | |
398 | timeout_obj.start_connect() | |
399 | conn.timeout = Timeout.resolve_default_timeout(timeout_obj.connect_timeout) | |
400 | ||
401 | # Trigger any extra validation we need to do. | |
402 | try: | |
403 | self._validate_conn(conn) | |
404 | except (SocketTimeout, BaseSSLError) as e: | |
405 | # Py2 raises this as a BaseSSLError, Py3 raises it as socket timeout. | |
406 | self._raise_timeout(err=e, url=url, timeout_value=conn.timeout) | |
407 | raise | |
408 | ||
409 | # conn.request() calls http.client.*.request, not the method in | |
410 | # urllib3.request. It also calls makefile (recv) on the socket. | |
411 | try: | |
412 | if chunked: | |
413 | conn.request_chunked(method, url, **httplib_request_kw) | |
414 | else: | |
415 | conn.request(method, url, **httplib_request_kw) | |
416 | ||
417 | # We are swallowing BrokenPipeError (errno.EPIPE) since the server is | |
418 | # legitimately able to close the connection after sending a valid response. | |
419 | # With this behaviour, the received response is still readable. | |
420 | except BrokenPipeError: | |
421 | # Python 3 | |
422 | pass | |
423 | except IOError as e: | |
424 | # Python 2 and macOS/Linux | |
425 | # EPIPE and ESHUTDOWN are BrokenPipeError on Python 2, and EPROTOTYPE is needed on macOS | |
426 | # https://erickt.github.io/blog/2014/11/19/adventures-in-debugging-a-potential-osx-kernel-bug/ | |
427 | if e.errno not in { | |
428 | errno.EPIPE, | |
429 | errno.ESHUTDOWN, | |
430 | errno.EPROTOTYPE, | |
431 | }: | |
432 | raise | |
433 | ||
434 | # Reset the timeout for the recv() on the socket | |
435 | read_timeout = timeout_obj.read_timeout | |
436 | ||
437 | # App Engine doesn't have a sock attr | |
438 | if getattr(conn, "sock", None): | |
439 | # In Python 3 socket.py will catch EAGAIN and return None when you | |
440 | # try and read into the file pointer created by http.client, which | |
441 | # instead raises a BadStatusLine exception. Instead of catching | |
442 | # the exception and assuming all BadStatusLine exceptions are read | |
443 | # timeouts, check for a zero timeout before making the request. | |
444 | if read_timeout == 0: | |
445 | raise ReadTimeoutError( | |
446 | self, url, "Read timed out. (read timeout=%s)" % read_timeout | |
447 | ) | |
448 | if read_timeout is Timeout.DEFAULT_TIMEOUT: | |
449 | conn.sock.settimeout(socket.getdefaulttimeout()) | |
450 | else: # None or a value | |
451 | conn.sock.settimeout(read_timeout) | |
452 | ||
453 | # Receive the response from the server | |
454 | try: | |
455 | try: | |
456 | # Python 2.7, use buffering of HTTP responses | |
457 | httplib_response = conn.getresponse(buffering=True) | |
458 | except TypeError: | |
459 | # Python 3 | |
460 | try: | |
461 | httplib_response = conn.getresponse() | |
462 | except BaseException as e: | |
463 | # Remove the TypeError from the exception chain in | |
464 | # Python 3 (including for exceptions like SystemExit). | |
465 | # Otherwise it looks like a bug in the code. | |
466 | six.raise_from(e, None) | |
467 | except (SocketTimeout, BaseSSLError, SocketError) as e: | |
468 | self._raise_timeout(err=e, url=url, timeout_value=read_timeout) | |
469 | raise | |
470 | ||
471 | # AppEngine doesn't have a version attr. | |
472 | http_version = getattr(conn, "_http_vsn_str", "HTTP/?") | |
473 | log.debug( | |
474 | '%s://%s:%s "%s %s %s" %s %s', | |
475 | self.scheme, | |
476 | self.host, | |
477 | self.port, | |
478 | method, | |
479 | url, | |
480 | http_version, | |
481 | httplib_response.status, | |
482 | httplib_response.length, | |
483 | ) | |
484 | ||
485 | try: | |
486 | assert_header_parsing(httplib_response.msg) | |
487 | except (HeaderParsingError, TypeError) as hpe: # Platform-specific: Python 3 | |
488 | log.warning( | |
489 | "Failed to parse headers (url=%s): %s", | |
490 | self._absolute_url(url), | |
491 | hpe, | |
492 | exc_info=True, | |
493 | ) | |
494 | ||
495 | return httplib_response | |
496 | ||
497 | def _absolute_url(self, path): | |
498 | return Url(scheme=self.scheme, host=self.host, port=self.port, path=path).url | |
499 | ||
500 | def close(self): | |
501 | """ | |
502 | Close all pooled connections and disable the pool. | |
503 | """ | |
504 | if self.pool is None: | |
505 | return | |
506 | # Disable access to the pool | |
507 | old_pool, self.pool = self.pool, None | |
508 | ||
509 | # Close all the HTTPConnections in the pool. | |
510 | _close_pool_connections(old_pool) | |
511 | ||
512 | def is_same_host(self, url): | |
513 | """ | |
514 | Check if the given ``url`` is a member of the same host as this | |
515 | connection pool. | |
516 | """ | |
517 | if url.startswith("/"): | |
518 | return True | |
519 | ||
520 | # TODO: Add optional support for socket.gethostbyname checking. | |
521 | scheme, host, port = get_host(url) | |
522 | if host is not None: | |
523 | host = _normalize_host(host, scheme=scheme) | |
524 | ||
525 | # Use explicit default port for comparison when none is given | |
526 | if self.port and not port: | |
527 | port = port_by_scheme.get(scheme) | |
528 | elif not self.port and port == port_by_scheme.get(scheme): | |
529 | port = None | |
530 | ||
531 | return (scheme, host, port) == (self.scheme, self.host, self.port) | |
532 | ||
533 | def urlopen( | |
534 | self, | |
535 | method, | |
536 | url, | |
537 | body=None, | |
538 | headers=None, | |
539 | retries=None, | |
540 | redirect=True, | |
541 | assert_same_host=True, | |
542 | timeout=_Default, | |
543 | pool_timeout=None, | |
544 | release_conn=None, | |
545 | chunked=False, | |
546 | body_pos=None, | |
547 | **response_kw | |
548 | ): | |
549 | """ | |
550 | Get a connection from the pool and perform an HTTP request. This is the | |
551 | lowest level call for making a request, so you'll need to specify all | |
552 | the raw details. | |
553 | ||
554 | .. note:: | |
555 | ||
556 | More commonly, it's appropriate to use a convenience method provided | |
557 | by :class:`.RequestMethods`, such as :meth:`request`. | |
558 | ||
559 | .. note:: | |
560 | ||
561 | `release_conn` will only behave as expected if | |
562 | `preload_content=False` because we want to make | |
563 | `preload_content=False` the default behaviour someday soon without | |
564 | breaking backwards compatibility. | |
565 | ||
566 | :param method: | |
567 | HTTP request method (such as GET, POST, PUT, etc.) | |
568 | ||
569 | :param url: | |
570 | The URL to perform the request on. | |
571 | ||
572 | :param body: | |
573 | Data to send in the request body, either :class:`str`, :class:`bytes`, | |
574 | an iterable of :class:`str`/:class:`bytes`, or a file-like object. | |
575 | ||
576 | :param headers: | |
577 | Dictionary of custom headers to send, such as User-Agent, | |
578 | If-None-Match, etc. If None, pool headers are used. If provided, | |
579 | these headers completely replace any pool-specific headers. | |
580 | ||
581 | :param retries: | |
582 | Configure the number of retries to allow before raising a | |
583 | :class:`~urllib3.exceptions.MaxRetryError` exception. | |
584 | ||
585 | Pass ``None`` to retry until you receive a response. Pass a | |
586 | :class:`~urllib3.util.retry.Retry` object for fine-grained control | |
587 | over different types of retries. | |
588 | Pass an integer number to retry connection errors that many times, | |
589 | but no other types of errors. Pass zero to never retry. | |
590 | ||
591 | If ``False``, then retries are disabled and any exception is raised | |
592 | immediately. Also, instead of raising a MaxRetryError on redirects, | |
593 | the redirect response will be returned. | |
594 | ||
595 | :type retries: :class:`~urllib3.util.retry.Retry`, False, or an int. | |
596 | ||
597 | :param redirect: | |
598 | If True, automatically handle redirects (status codes 301, 302, | |
599 | 303, 307, 308). Each redirect counts as a retry. Disabling retries | |
600 | will disable redirect, too. | |
601 | ||
602 | :param assert_same_host: | |
603 | If ``True``, will make sure that the host of the pool requests is | |
604 | consistent else will raise HostChangedError. When ``False``, you can | |
605 | use the pool on an HTTP proxy and request foreign hosts. | |
606 | ||
607 | :param timeout: | |
608 | If specified, overrides the default timeout for this one | |
609 | request. It may be a float (in seconds) or an instance of | |
610 | :class:`urllib3.util.Timeout`. | |
611 | ||
612 | :param pool_timeout: | |
613 | If set and the pool is set to block=True, then this method will | |
614 | block for ``pool_timeout`` seconds and raise EmptyPoolError if no | |
615 | connection is available within the time period. | |
616 | ||
617 | :param release_conn: | |
618 | If False, then the urlopen call will not release the connection | |
619 | back into the pool once a response is received (but will release if | |
620 | you read the entire contents of the response such as when | |
621 | `preload_content=True`). This is useful if you're not preloading | |
622 | the response's content immediately. You will need to call | |
623 | ``r.release_conn()`` on the response ``r`` to return the connection | |
624 | back into the pool. If None, it takes the value of | |
625 | ``response_kw.get('preload_content', True)``. | |
626 | ||
627 | :param chunked: | |
628 | If True, urllib3 will send the body using chunked transfer | |
629 | encoding. Otherwise, urllib3 will send the body using the standard | |
630 | content-length form. Defaults to False. | |
631 | ||
632 | :param int body_pos: | |
633 | Position to seek to in file-like body in the event of a retry or | |
634 | redirect. Typically this won't need to be set because urllib3 will | |
635 | auto-populate the value when needed. | |
636 | ||
637 | :param \\**response_kw: | |
638 | Additional parameters are passed to | |
639 | :meth:`urllib3.response.HTTPResponse.from_httplib` | |
640 | """ | |
641 | ||
642 | parsed_url = parse_url(url) | |
643 | destination_scheme = parsed_url.scheme | |
644 | ||
645 | if headers is None: | |
646 | headers = self.headers | |
647 | ||
648 | if not isinstance(retries, Retry): | |
649 | retries = Retry.from_int(retries, redirect=redirect, default=self.retries) | |
650 | ||
651 | if release_conn is None: | |
652 | release_conn = response_kw.get("preload_content", True) | |
653 | ||
654 | # Check host | |
655 | if assert_same_host and not self.is_same_host(url): | |
656 | raise HostChangedError(self, url, retries) | |
657 | ||
658 | # Ensure that the URL we're connecting to is properly encoded | |
659 | if url.startswith("/"): | |
660 | url = six.ensure_str(_encode_target(url)) | |
661 | else: | |
662 | url = six.ensure_str(parsed_url.url) | |
663 | ||
664 | conn = None | |
665 | ||
666 | # Track whether `conn` needs to be released before | |
667 | # returning/raising/recursing. Update this variable if necessary, and | |
668 | # leave `release_conn` constant throughout the function. That way, if | |
669 | # the function recurses, the original value of `release_conn` will be | |
670 | # passed down into the recursive call, and its value will be respected. | |
671 | # | |
672 | # See issue #651 [1] for details. | |
673 | # | |
674 | # [1] <https://github.com/urllib3/urllib3/issues/651> | |
675 | release_this_conn = release_conn | |
676 | ||
677 | http_tunnel_required = connection_requires_http_tunnel( | |
678 | self.proxy, self.proxy_config, destination_scheme | |
679 | ) | |
680 | ||
681 | # Merge the proxy headers. Only done when not using HTTP CONNECT. We | |
682 | # have to copy the headers dict so we can safely change it without those | |
683 | # changes being reflected in anyone else's copy. | |
684 | if not http_tunnel_required: | |
685 | headers = headers.copy() | |
686 | headers.update(self.proxy_headers) | |
687 | ||
688 | # Must keep the exception bound to a separate variable or else Python 3 | |
689 | # complains about UnboundLocalError. | |
690 | err = None | |
691 | ||
692 | # Keep track of whether we cleanly exited the except block. This | |
693 | # ensures we do proper cleanup in finally. | |
694 | clean_exit = False | |
695 | ||
696 | # Rewind body position, if needed. Record current position | |
697 | # for future rewinds in the event of a redirect/retry. | |
698 | body_pos = set_file_position(body, body_pos) | |
699 | ||
700 | try: | |
701 | # Request a connection from the queue. | |
702 | timeout_obj = self._get_timeout(timeout) | |
703 | conn = self._get_conn(timeout=pool_timeout) | |
704 | ||
705 | conn.timeout = timeout_obj.connect_timeout | |
706 | ||
707 | is_new_proxy_conn = self.proxy is not None and not getattr( | |
708 | conn, "sock", None | |
709 | ) | |
710 | if is_new_proxy_conn and http_tunnel_required: | |
711 | self._prepare_proxy(conn) | |
712 | ||
713 | # Make the request on the httplib connection object. | |
714 | httplib_response = self._make_request( | |
715 | conn, | |
716 | method, | |
717 | url, | |
718 | timeout=timeout_obj, | |
719 | body=body, | |
720 | headers=headers, | |
721 | chunked=chunked, | |
722 | ) | |
723 | ||
724 | # If we're going to release the connection in ``finally:``, then | |
725 | # the response doesn't need to know about the connection. Otherwise | |
726 | # it will also try to release it and we'll have a double-release | |
727 | # mess. | |
728 | response_conn = conn if not release_conn else None | |
729 | ||
730 | # Pass method to Response for length checking | |
731 | response_kw["request_method"] = method | |
732 | ||
733 | # Import httplib's response into our own wrapper object | |
734 | response = self.ResponseCls.from_httplib( | |
735 | httplib_response, | |
736 | pool=self, | |
737 | connection=response_conn, | |
738 | retries=retries, | |
739 | **response_kw | |
740 | ) | |
741 | ||
742 | # Everything went great! | |
743 | clean_exit = True | |
744 | ||
745 | except EmptyPoolError: | |
746 | # Didn't get a connection from the pool, no need to clean up | |
747 | clean_exit = True | |
748 | release_this_conn = False | |
749 | raise | |
750 | ||
751 | except ( | |
752 | TimeoutError, | |
753 | HTTPException, | |
754 | SocketError, | |
755 | ProtocolError, | |
756 | BaseSSLError, | |
757 | SSLError, | |
758 | CertificateError, | |
759 | ) as e: | |
760 | # Discard the connection for these exceptions. It will be | |
761 | # replaced during the next _get_conn() call. | |
762 | clean_exit = False | |
763 | ||
764 | def _is_ssl_error_message_from_http_proxy(ssl_error): | |
765 | # We're trying to detect the message 'WRONG_VERSION_NUMBER' but | |
766 | # SSLErrors are kinda all over the place when it comes to the message, | |
767 | # so we try to cover our bases here! | |
768 | message = " ".join(re.split("[^a-z]", str(ssl_error).lower())) | |
769 | return ( | |
770 | "wrong version number" in message or "unknown protocol" in message | |
771 | ) | |
772 | ||
773 | # Try to detect a common user error with proxies which is to | |
774 | # set an HTTP proxy to be HTTPS when it should be 'http://' | |
775 | # (ie {'http': 'http://proxy', 'https': 'https://proxy'}) | |
776 | # Instead we add a nice error message and point to a URL. | |
777 | if ( | |
778 | isinstance(e, BaseSSLError) | |
779 | and self.proxy | |
780 | and _is_ssl_error_message_from_http_proxy(e) | |
781 | and conn.proxy | |
782 | and conn.proxy.scheme == "https" | |
783 | ): | |
784 | e = ProxyError( | |
785 | "Your proxy appears to only use HTTP and not HTTPS, " | |
786 | "try changing your proxy URL to be HTTP. See: " | |
787 | "https://urllib3.readthedocs.io/en/1.26.x/advanced-usage.html" | |
788 | "#https-proxy-error-http-proxy", | |
789 | SSLError(e), | |
790 | ) | |
791 | elif isinstance(e, (BaseSSLError, CertificateError)): | |
792 | e = SSLError(e) | |
793 | elif isinstance(e, (SocketError, NewConnectionError)) and self.proxy: | |
794 | e = ProxyError("Cannot connect to proxy.", e) | |
795 | elif isinstance(e, (SocketError, HTTPException)): | |
796 | e = ProtocolError("Connection aborted.", e) | |
797 | ||
798 | retries = retries.increment( | |
799 | method, url, error=e, _pool=self, _stacktrace=sys.exc_info()[2] | |
800 | ) | |
801 | retries.sleep() | |
802 | ||
803 | # Keep track of the error for the retry warning. | |
804 | err = e | |
805 | ||
806 | finally: | |
807 | if not clean_exit: | |
808 | # We hit some kind of exception, handled or otherwise. We need | |
809 | # to throw the connection away unless explicitly told not to. | |
810 | # Close the connection, set the variable to None, and make sure | |
811 | # we put the None back in the pool to avoid leaking it. | |
812 | conn = conn and conn.close() | |
813 | release_this_conn = True | |
814 | ||
815 | if release_this_conn: | |
816 | # Put the connection back to be reused. If the connection is | |
817 | # expired then it will be None, which will get replaced with a | |
818 | # fresh connection during _get_conn. | |
819 | self._put_conn(conn) | |
820 | ||
821 | if not conn: | |
822 | # Try again | |
823 | log.warning( | |
824 | "Retrying (%r) after connection broken by '%r': %s", retries, err, url | |
825 | ) | |
826 | return self.urlopen( | |
827 | method, | |
828 | url, | |
829 | body, | |
830 | headers, | |
831 | retries, | |
832 | redirect, | |
833 | assert_same_host, | |
834 | timeout=timeout, | |
835 | pool_timeout=pool_timeout, | |
836 | release_conn=release_conn, | |
837 | chunked=chunked, | |
838 | body_pos=body_pos, | |
839 | **response_kw | |
840 | ) | |
841 | ||
842 | # Handle redirect? | |
843 | redirect_location = redirect and response.get_redirect_location() | |
844 | if redirect_location: | |
845 | if response.status == 303: | |
846 | method = "GET" | |
847 | ||
848 | try: | |
849 | retries = retries.increment(method, url, response=response, _pool=self) | |
850 | except MaxRetryError: | |
851 | if retries.raise_on_redirect: | |
852 | response.drain_conn() | |
853 | raise | |
854 | return response | |
855 | ||
856 | response.drain_conn() | |
857 | retries.sleep_for_retry(response) | |
858 | log.debug("Redirecting %s -> %s", url, redirect_location) | |
859 | return self.urlopen( | |
860 | method, | |
861 | redirect_location, | |
862 | body, | |
863 | headers, | |
864 | retries=retries, | |
865 | redirect=redirect, | |
866 | assert_same_host=assert_same_host, | |
867 | timeout=timeout, | |
868 | pool_timeout=pool_timeout, | |
869 | release_conn=release_conn, | |
870 | chunked=chunked, | |
871 | body_pos=body_pos, | |
872 | **response_kw | |
873 | ) | |
874 | ||
875 | # Check if we should retry the HTTP response. | |
876 | has_retry_after = bool(response.headers.get("Retry-After")) | |
877 | if retries.is_retry(method, response.status, has_retry_after): | |
878 | try: | |
879 | retries = retries.increment(method, url, response=response, _pool=self) | |
880 | except MaxRetryError: | |
881 | if retries.raise_on_status: | |
882 | response.drain_conn() | |
883 | raise | |
884 | return response | |
885 | ||
886 | response.drain_conn() | |
887 | retries.sleep(response) | |
888 | log.debug("Retry: %s", url) | |
889 | return self.urlopen( | |
890 | method, | |
891 | url, | |
892 | body, | |
893 | headers, | |
894 | retries=retries, | |
895 | redirect=redirect, | |
896 | assert_same_host=assert_same_host, | |
897 | timeout=timeout, | |
898 | pool_timeout=pool_timeout, | |
899 | release_conn=release_conn, | |
900 | chunked=chunked, | |
901 | body_pos=body_pos, | |
902 | **response_kw | |
903 | ) | |
904 | ||
905 | return response | |
906 | ||
907 | ||
908 | class HTTPSConnectionPool(HTTPConnectionPool): | |
909 | """ | |
910 | Same as :class:`.HTTPConnectionPool`, but HTTPS. | |
911 | ||
912 | :class:`.HTTPSConnection` uses one of ``assert_fingerprint``, | |
913 | ``assert_hostname`` and ``host`` in this order to verify connections. | |
914 | If ``assert_hostname`` is False, no verification is done. | |
915 | ||
916 | The ``key_file``, ``cert_file``, ``cert_reqs``, ``ca_certs``, | |
917 | ``ca_cert_dir``, ``ssl_version``, ``key_password`` are only used if :mod:`ssl` | |
918 | is available and are fed into :meth:`urllib3.util.ssl_wrap_socket` to upgrade | |
919 | the connection socket into an SSL socket. | |
920 | """ | |
921 | ||
922 | scheme = "https" | |
923 | ConnectionCls = HTTPSConnection | |
924 | ||
925 | def __init__( | |
926 | self, | |
927 | host, | |
928 | port=None, | |
929 | strict=False, | |
930 | timeout=Timeout.DEFAULT_TIMEOUT, | |
931 | maxsize=1, | |
932 | block=False, | |
933 | headers=None, | |
934 | retries=None, | |
935 | _proxy=None, | |
936 | _proxy_headers=None, | |
937 | key_file=None, | |
938 | cert_file=None, | |
939 | cert_reqs=None, | |
940 | key_password=None, | |
941 | ca_certs=None, | |
942 | ssl_version=None, | |
943 | assert_hostname=None, | |
944 | assert_fingerprint=None, | |
945 | ca_cert_dir=None, | |
946 | **conn_kw | |
947 | ): | |
948 | ||
949 | HTTPConnectionPool.__init__( | |
950 | self, | |
951 | host, | |
952 | port, | |
953 | strict, | |
954 | timeout, | |
955 | maxsize, | |
956 | block, | |
957 | headers, | |
958 | retries, | |
959 | _proxy, | |
960 | _proxy_headers, | |
961 | **conn_kw | |
962 | ) | |
963 | ||
964 | self.key_file = key_file | |
965 | self.cert_file = cert_file | |
966 | self.cert_reqs = cert_reqs | |
967 | self.key_password = key_password | |
968 | self.ca_certs = ca_certs | |
969 | self.ca_cert_dir = ca_cert_dir | |
970 | self.ssl_version = ssl_version | |
971 | self.assert_hostname = assert_hostname | |
972 | self.assert_fingerprint = assert_fingerprint | |
973 | ||
974 | def _prepare_conn(self, conn): | |
975 | """ | |
976 | Prepare the ``connection`` for :meth:`urllib3.util.ssl_wrap_socket` | |
977 | and establish the tunnel if proxy is used. | |
978 | """ | |
979 | ||
980 | if isinstance(conn, VerifiedHTTPSConnection): | |
981 | conn.set_cert( | |
982 | key_file=self.key_file, | |
983 | key_password=self.key_password, | |
984 | cert_file=self.cert_file, | |
985 | cert_reqs=self.cert_reqs, | |
986 | ca_certs=self.ca_certs, | |
987 | ca_cert_dir=self.ca_cert_dir, | |
988 | assert_hostname=self.assert_hostname, | |
989 | assert_fingerprint=self.assert_fingerprint, | |
990 | ) | |
991 | conn.ssl_version = self.ssl_version | |
992 | return conn | |
993 | ||
994 | def _prepare_proxy(self, conn): | |
995 | """ | |
996 | Establishes a tunnel connection through HTTP CONNECT. | |
997 | ||
998 | Tunnel connection is established early because otherwise httplib would | |
999 | improperly set Host: header to proxy's IP:port. | |
1000 | """ | |
1001 | ||
1002 | conn.set_tunnel(self._proxy_host, self.port, self.proxy_headers) | |
1003 | ||
1004 | if self.proxy.scheme == "https": | |
1005 | conn.tls_in_tls_required = True | |
1006 | ||
1007 | conn.connect() | |
1008 | ||
1009 | def _new_conn(self): | |
1010 | """ | |
1011 | Return a fresh :class:`http.client.HTTPSConnection`. | |
1012 | """ | |
1013 | self.num_connections += 1 | |
1014 | log.debug( | |
1015 | "Starting new HTTPS connection (%d): %s:%s", | |
1016 | self.num_connections, | |
1017 | self.host, | |
1018 | self.port or "443", | |
1019 | ) | |
1020 | ||
1021 | if not self.ConnectionCls or self.ConnectionCls is DummyConnection: | |
1022 | raise SSLError( | |
1023 | "Can't connect to HTTPS URL because the SSL module is not available." | |
1024 | ) | |
1025 | ||
1026 | actual_host = self.host | |
1027 | actual_port = self.port | |
1028 | if self.proxy is not None: | |
1029 | actual_host = self.proxy.host | |
1030 | actual_port = self.proxy.port | |
1031 | ||
1032 | conn = self.ConnectionCls( | |
1033 | host=actual_host, | |
1034 | port=actual_port, | |
1035 | timeout=self.timeout.connect_timeout, | |
1036 | strict=self.strict, | |
1037 | cert_file=self.cert_file, | |
1038 | key_file=self.key_file, | |
1039 | key_password=self.key_password, | |
1040 | **self.conn_kw | |
1041 | ) | |
1042 | ||
1043 | return self._prepare_conn(conn) | |
1044 | ||
1045 | def _validate_conn(self, conn): | |
1046 | """ | |
1047 | Called right before a request is made, after the socket is created. | |
1048 | """ | |
1049 | super(HTTPSConnectionPool, self)._validate_conn(conn) | |
1050 | ||
1051 | # Force connect early to allow us to validate the connection. | |
1052 | if not getattr(conn, "sock", None): # AppEngine might not have `.sock` | |
1053 | conn.connect() | |
1054 | ||
1055 | if not conn.is_verified: | |
1056 | warnings.warn( | |
1057 | ( | |
1058 | "Unverified HTTPS request is being made to host '%s'. " | |
1059 | "Adding certificate verification is strongly advised. See: " | |
1060 | "https://urllib3.readthedocs.io/en/1.26.x/advanced-usage.html" | |
1061 | "#ssl-warnings" % conn.host | |
1062 | ), | |
1063 | InsecureRequestWarning, | |
1064 | ) | |
1065 | ||
1066 | if getattr(conn, "proxy_is_verified", None) is False: | |
1067 | warnings.warn( | |
1068 | ( | |
1069 | "Unverified HTTPS connection done to an HTTPS proxy. " | |
1070 | "Adding certificate verification is strongly advised. See: " | |
1071 | "https://urllib3.readthedocs.io/en/1.26.x/advanced-usage.html" | |
1072 | "#ssl-warnings" | |
1073 | ), | |
1074 | InsecureRequestWarning, | |
1075 | ) | |
1076 | ||
1077 | ||
1078 | def connection_from_url(url, **kw): | |
1079 | """ | |
1080 | Given a url, return an :class:`.ConnectionPool` instance of its host. | |
1081 | ||
1082 | This is a shortcut for not having to parse out the scheme, host, and port | |
1083 | of the url before creating an :class:`.ConnectionPool` instance. | |
1084 | ||
1085 | :param url: | |
1086 | Absolute URL string that must include the scheme. Port is optional. | |
1087 | ||
1088 | :param \\**kw: | |
1089 | Passes additional parameters to the constructor of the appropriate | |
1090 | :class:`.ConnectionPool`. Useful for specifying things like | |
1091 | timeout, maxsize, headers, etc. | |
1092 | ||
1093 | Example:: | |
1094 | ||
1095 | >>> conn = connection_from_url('http://google.com/') | |
1096 | >>> r = conn.request('GET', '/') | |
1097 | """ | |
1098 | scheme, host, port = get_host(url) | |
1099 | port = port or port_by_scheme.get(scheme, 80) | |
1100 | if scheme == "https": | |
1101 | return HTTPSConnectionPool(host, port=port, **kw) | |
1102 | else: | |
1103 | return HTTPConnectionPool(host, port=port, **kw) | |
1104 | ||
1105 | ||
1106 | def _normalize_host(host, scheme): | |
1107 | """ | |
1108 | Normalize hosts for comparisons and use with sockets. | |
1109 | """ | |
1110 | ||
1111 | host = normalize_host(host, scheme) | |
1112 | ||
1113 | # httplib doesn't like it when we include brackets in IPv6 addresses | |
1114 | # Specifically, if we include brackets but also pass the port then | |
1115 | # httplib crazily doubles up the square brackets on the Host header. | |
1116 | # Instead, we need to make sure we never pass ``None`` as the port. | |
1117 | # However, for backward compatibility reasons we can't actually | |
1118 | # *assert* that. See http://bugs.python.org/issue28539 | |
1119 | if host.startswith("[") and host.endswith("]"): | |
1120 | host = host[1:-1] | |
1121 | return host | |
1122 | ||
1123 | ||
1124 | def _close_pool_connections(pool): | |
1125 | """Drains a queue of connections and closes each one.""" | |
1126 | try: | |
1127 | while True: | |
1128 | conn = pool.get(block=False) | |
1129 | if conn: | |
1130 | conn.close() | |
1131 | except queue.Empty: | |
1132 | pass # Done. |