5 This module provides utility functions that are used within Requests
6 that are also useful for external consumption.
20 from collections
import OrderedDict
22 from pip
._vendor
.urllib3
.util
import make_headers
, parse_url
25 from .__version
__ import __version__
27 # to_native_string is unused here, but imported here for backwards compatibility
28 from ._internal
_utils
import ( # noqa: F401
29 _HEADER_VALIDATORS_BYTE
,
30 _HEADER_VALIDATORS_STR
,
39 getproxies_environment
,
42 from .compat
import parse_http_list
as _parse_list_header
45 proxy_bypass_environment
,
52 from .cookies
import cookiejar_from_dict
53 from .exceptions
import (
57 UnrewindableBodyError
,
59 from .structures
import CaseInsensitiveDict
61 NETRC_FILES
= (".netrc", "_netrc")
63 DEFAULT_CA_BUNDLE_PATH
= certs
.where()
65 DEFAULT_PORTS
= {"http": 80, "https": 443}
67 # Ensure that ', ' is used to preserve previous delimiter behavior.
68 DEFAULT_ACCEPT_ENCODING
= ", ".join(
69 re
.split(r
",\s*", make_headers(accept_encoding
=True)["accept-encoding"])
73 if sys
.platform
== "win32":
74 # provide a proxy_bypass version on Windows without DNS lookups
76 def proxy_bypass_registry(host
):
83 internetSettings
= winreg
.OpenKey(
84 winreg
.HKEY_CURRENT_USER
,
85 r
"Software\Microsoft\Windows\CurrentVersion\Internet Settings",
87 # ProxyEnable could be REG_SZ or REG_DWORD, normalizing it
88 proxyEnable
= int(winreg
.QueryValueEx(internetSettings
, "ProxyEnable")[0])
89 # ProxyOverride is almost always a string
90 proxyOverride
= winreg
.QueryValueEx(internetSettings
, "ProxyOverride")[0]
91 except (OSError, ValueError):
93 if not proxyEnable
or not proxyOverride
:
96 # make a check value list from the registry entry: replace the
97 # '<local>' string by the localhost entry and the corresponding
99 proxyOverride
= proxyOverride
.split(";")
100 # now check if we match one of the registry values.
101 for test
in proxyOverride
:
102 if test
== "<local>":
105 test
= test
.replace(".", r
"\.") # mask dots
106 test
= test
.replace("*", r
".*") # change glob sequence
107 test
= test
.replace("?", r
".") # change glob char
108 if re
.match(test
, host
, re
.I
):
112 def proxy_bypass(host
): # noqa
113 """Return True, if the host should be bypassed.
115 Checks proxy settings gathered from the environment, if specified,
118 if getproxies_environment():
119 return proxy_bypass_environment(host
)
121 return proxy_bypass_registry(host
)
124 def dict_to_sequence(d
):
125 """Returns an internal sequence dictionary update."""
127 if hasattr(d
, "items"):
137 if hasattr(o
, "__len__"):
138 total_length
= len(o
)
140 elif hasattr(o
, "len"):
143 elif hasattr(o
, "fileno"):
146 except (io
.UnsupportedOperation
, AttributeError):
147 # AttributeError is a surprising exception, seeing as how we've just checked
148 # that `hasattr(o, 'fileno')`. It happens for objects obtained via
149 # `Tarfile.extractfile()`, per issue 5229.
152 total_length
= os
.fstat(fileno
).st_size
154 # Having used fstat to determine the file length, we need to
155 # confirm that this file was opened up in binary mode.
156 if "b" not in o
.mode
:
159 "Requests has determined the content-length for this "
160 "request using the binary size of the file: however, the "
161 "file has been opened in text mode (i.e. without the 'b' "
162 "flag in the mode). This may lead to an incorrect "
163 "content-length. In Requests 3.0, support will be removed "
164 "for files in text mode."
169 if hasattr(o
, "tell"):
171 current_position
= o
.tell()
173 # This can happen in some weird situations, such as when the file
174 # is actually a special file descriptor like stdin. In this
175 # instance, we don't know what the length is, so set it to zero and
176 # let requests chunk it instead.
177 if total_length
is not None:
178 current_position
= total_length
180 if hasattr(o
, "seek") and total_length
is None:
181 # StringIO and BytesIO have seek but no usable fileno
183 # seek to end of file
185 total_length
= o
.tell()
187 # seek back to current position to support
188 # partially read file-like objects
189 o
.seek(current_position
or 0)
193 if total_length
is None:
196 return max(0, total_length
- current_position
)
199 def get_netrc_auth(url
, raise_errors
=False):
200 """Returns the Requests tuple auth for a given url from netrc."""
202 netrc_file
= os
.environ
.get("NETRC")
203 if netrc_file
is not None:
204 netrc_locations
= (netrc_file
,)
206 netrc_locations
= (f
"~/{f}" for f
in NETRC_FILES
)
209 from netrc
import NetrcParseError
, netrc
213 for f
in netrc_locations
:
215 loc
= os
.path
.expanduser(f
)
217 # os.path.expanduser can fail when $HOME is undefined and
218 # getpwuid fails. See https://bugs.python.org/issue20164 &
219 # https://github.com/psf/requests/issues/1846
222 if os
.path
.exists(loc
):
226 # Abort early if there isn't one.
227 if netrc_path
is None:
232 # Strip port numbers from netloc. This weird `if...encode`` dance is
233 # used for Python 3.2, which doesn't support unicode literals.
235 if isinstance(url
, str):
236 splitstr
= splitstr
.decode("ascii")
237 host
= ri
.netloc
.split(splitstr
)[0]
240 _netrc
= netrc(netrc_path
).authenticators(host
)
242 # Return with login / password
243 login_i
= 0 if _netrc
[0] else 1
244 return (_netrc
[login_i
], _netrc
[2])
245 except (NetrcParseError
, OSError):
246 # If there was a parsing error or a permissions issue reading the file,
247 # we'll just skip netrc auth unless explicitly asked to raise errors.
251 # App Engine hackiness.
252 except (ImportError, AttributeError):
256 def guess_filename(obj
):
257 """Tries to guess the filename of the given object."""
258 name
= getattr(obj
, "name", None)
259 if name
and isinstance(name
, basestring
) and name
[0] != "<" and name
[-1] != ">":
260 return os
.path
.basename(name
)
263 def extract_zipped_paths(path
):
264 """Replace nonexistent paths that look like they refer to a member of a zip
265 archive with the location of an extracted copy of the target, or else
266 just return the provided path unchanged.
268 if os
.path
.exists(path
):
269 # this is already a valid path, no need to do anything further
272 # find the first valid part of the provided path and treat that as a zip archive
273 # assume the rest of the path is the name of a member in the archive
274 archive
, member
= os
.path
.split(path
)
275 while archive
and not os
.path
.exists(archive
):
276 archive
, prefix
= os
.path
.split(archive
)
278 # If we don't check for an empty prefix after the split (in other words, archive remains unchanged after the split),
279 # we _can_ end up in an infinite loop on a rare corner case affecting a small number of users
281 member
= "/".join([prefix
, member
])
283 if not zipfile
.is_zipfile(archive
):
286 zip_file
= zipfile
.ZipFile(archive
)
287 if member
not in zip_file
.namelist():
290 # we have a valid zip archive and a valid member of that archive
291 tmp
= tempfile
.gettempdir()
292 extracted_path
= os
.path
.join(tmp
, member
.split("/")[-1])
293 if not os
.path
.exists(extracted_path
):
294 # use read + write to avoid the creating nested folders, we only want the file, avoids mkdir racing condition
295 with atomic_open(extracted_path
) as file_handler
:
296 file_handler
.write(zip_file
.read(member
))
297 return extracted_path
300 @contextlib.contextmanager
301 def atomic_open(filename
):
302 """Write a file to the disk in an atomic fashion"""
303 tmp_descriptor
, tmp_name
= tempfile
.mkstemp(dir=os
.path
.dirname(filename
))
305 with os
.fdopen(tmp_descriptor
, "wb") as tmp_handler
:
307 os
.replace(tmp_name
, filename
)
308 except BaseException
:
313 def from_key_val_list(value
):
314 """Take an object and test to see if it can be represented as a
315 dictionary. Unless it can not be represented as such, return an
320 >>> from_key_val_list([('key', 'val')])
321 OrderedDict([('key', 'val')])
322 >>> from_key_val_list('string')
323 Traceback (most recent call last):
325 ValueError: cannot encode objects that are not 2-tuples
326 >>> from_key_val_list({'key': 'val'})
327 OrderedDict([('key', 'val')])
334 if isinstance(value
, (str, bytes, bool, int)):
335 raise ValueError("cannot encode objects that are not 2-tuples")
337 return OrderedDict(value
)
340 def to_key_val_list(value
):
341 """Take an object and test to see if it can be represented as a
342 dictionary. If it can be, return a list of tuples, e.g.,
346 >>> to_key_val_list([('key', 'val')])
348 >>> to_key_val_list({'key': 'val'})
350 >>> to_key_val_list('string')
351 Traceback (most recent call last):
353 ValueError: cannot encode objects that are not 2-tuples
360 if isinstance(value
, (str, bytes, bool, int)):
361 raise ValueError("cannot encode objects that are not 2-tuples")
363 if isinstance(value
, Mapping
):
364 value
= value
.items()
369 # From mitsuhiko/werkzeug (used with permission).
370 def parse_list_header(value
):
371 """Parse lists as described by RFC 2068 Section 2.
373 In particular, parse comma-separated lists where the elements of
374 the list may include quoted-strings. A quoted-string could
375 contain a comma. A non-quoted string could have quotes in the
376 middle. Quotes are removed automatically after parsing.
378 It basically works like :func:`parse_set_header` just that items
379 may appear multiple times and case sensitivity is preserved.
381 The return value is a standard :class:`list`:
383 >>> parse_list_header('token, "quoted value"')
384 ['token', 'quoted value']
386 To create a header from the :class:`list` again, use the
387 :func:`dump_header` function.
389 :param value: a string with a list header.
390 :return: :class:`list`
394 for item
in _parse_list_header(value
):
395 if item
[:1] == item
[-1:] == '"':
396 item
= unquote_header_value(item
[1:-1])
401 # From mitsuhiko/werkzeug (used with permission).
402 def parse_dict_header(value
):
403 """Parse lists of key, value pairs as described by RFC 2068 Section 2 and
404 convert them into a python dict:
406 >>> d = parse_dict_header('foo="is a fish", bar="as well"')
409 >>> sorted(d.items())
410 [('bar', 'as well'), ('foo', 'is a fish')]
412 If there is no value for a key it will be `None`:
414 >>> parse_dict_header('key_without_value')
415 {'key_without_value': None}
417 To create a header from the :class:`dict` again, use the
418 :func:`dump_header` function.
420 :param value: a string with a dict header.
421 :return: :class:`dict`
425 for item
in _parse_list_header(value
):
429 name
, value
= item
.split("=", 1)
430 if value
[:1] == value
[-1:] == '"':
431 value
= unquote_header_value(value
[1:-1])
436 # From mitsuhiko/werkzeug (used with permission).
437 def unquote_header_value(value
, is_filename
=False):
438 r
"""Unquotes a header value. (Reversal of :func:`quote_header_value`).
439 This does not use the real unquoting but what browsers are actually
442 :param value: the header value to unquote.
445 if value
and value
[0] == value
[-1] == '"':
446 # this is not the real unquoting, but fixing this so that the
447 # RFC is met will result in bugs with internet explorer and
448 # probably some other browsers as well. IE for example is
449 # uploading files with "C:\foo\bar.txt" as filename
452 # if this is a filename and the starting characters look like
453 # a UNC path, then just return the value without quotes. Using the
454 # replace sequence below on a UNC path has the effect of turning
455 # the leading double slash into a single slash and then
456 # _fix_ie_filename() doesn't work correctly. See #458.
457 if not is_filename
or value
[:2] != "\\\\":
458 return value
.replace("\\\\", "\\").replace('\\"', '"')
462 def dict_from_cookiejar(cj
):
463 """Returns a key/value dictionary from a CookieJar.
465 :param cj: CookieJar object to extract cookies from.
472 cookie_dict
[cookie
.name
] = cookie
.value
477 def add_dict_to_cookiejar(cj
, cookie_dict
):
478 """Returns a CookieJar from a key/value dictionary.
480 :param cj: CookieJar to insert cookies into.
481 :param cookie_dict: Dict of key/values to insert into CookieJar.
485 return cookiejar_from_dict(cookie_dict
, cj
)
488 def get_encodings_from_content(content
):
489 """Returns encodings from given content string.
491 :param content: bytestring to extract encodings from.
495 "In requests 3.0, get_encodings_from_content will be removed. For "
496 "more information, please see the discussion on issue #2266. (This"
497 " warning should only appear once.)"
502 charset_re
= re
.compile(r
'<meta.*?charset=["\']*(.+?
)["\'>]', flags=re.I)
503 pragma_re = re.compile(r'<meta.*?content=["\']*;?charset
=(.+?
)["\'>]', flags=re.I)
504 xml_re = re.compile(r'^<\?xml.*?encoding=["\']*(.+?
)["\'>]')
507 charset_re.findall(content)
508 + pragma_re.findall(content)
509 + xml_re.findall(content)
513 def _parse_content_type_header(header):
514 """Returns content type and parameters from given header
516 :param header: string
517 :return: tuple containing content type and dictionary of
521 tokens = header.split(";")
522 content_type, params = tokens[0].strip(), tokens[1:]
524 items_to_strip = "\"' "
527 param = param.strip()
529 key, value = param, True
530 index_of_equals = param.find("=")
531 if index_of_equals != -1:
532 key = param[:index_of_equals].strip(items_to_strip)
533 value = param[index_of_equals + 1 :].strip(items_to_strip)
534 params_dict[key.lower()] = value
535 return content_type, params_dict
538 def get_encoding_from_headers(headers):
539 """Returns encodings from given HTTP Header Dict.
541 :param headers: dictionary to extract encoding from.
545 content_type = headers.get("content-type")
550 content_type, params = _parse_content_type_header(content_type)
552 if "charset" in params:
553 return params["charset"].strip("'\"")
555 if "text
" in content_type:
558 if "application
/json
" in content_type:
559 # Assume UTF-8 based on RFC 4627: https://www.ietf.org/rfc/rfc4627.txt since the charset was unset
563 def stream_decode_response_unicode(iterator, r):
564 """Stream decodes an iterator."""
566 if r.encoding is None:
570 decoder = codecs.getincrementaldecoder(r.encoding)(errors="replace
")
571 for chunk in iterator:
572 rv = decoder.decode(chunk)
575 rv = decoder.decode(b"", final=True)
580 def iter_slices(string, slice_length):
581 """Iterate over slices of a string."""
583 if slice_length is None or slice_length <= 0:
584 slice_length = len(string)
585 while pos < len(string):
586 yield string[pos : pos + slice_length]
590 def get_unicode_from_response(r):
591 """Returns the requested content back in unicode.
593 :param r: Response object to get unicode content from.
597 1. charset from content-type
598 2. fall back and replace all unicode characters
604 "In requests
3.0, get_unicode_from_response will be removed
. For
"
605 "more information
, please see the discussion on issue
#2266. (This"
606 " warning should only appear once.)"
613 # Try charset from content-type
614 encoding
= get_encoding_from_headers(r
.headers
)
618 return str(r
.content
, encoding
)
620 tried_encodings
.append(encoding
)
624 return str(r
.content
, encoding
, errors
="replace")
629 # The unreserved URI characters (RFC 3986)
630 UNRESERVED_SET
= frozenset(
631 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + "0123456789-._~"
635 def unquote_unreserved(uri
):
636 """Un-escape any percent-escape sequences in a URI that are unreserved
637 characters. This leaves all reserved, illegal and non-ASCII bytes encoded.
641 parts
= uri
.split("%")
642 for i
in range(1, len(parts
)):
644 if len(h
) == 2 and h
.isalnum():
648 raise InvalidURL(f
"Invalid percent-escape sequence: '{h}'")
650 if c
in UNRESERVED_SET
:
651 parts
[i
] = c
+ parts
[i
][2:]
653 parts
[i
] = f
"%{parts[i]}"
655 parts
[i
] = f
"%{parts[i]}"
656 return "".join(parts
)
659 def requote_uri(uri
):
660 """Re-quote the given URI.
662 This function passes the given URI through an unquote/quote cycle to
663 ensure that it is fully and consistently quoted.
667 safe_with_percent
= "!#$%&'()*+,/:;=?@[]~"
668 safe_without_percent
= "!#$&'()*+,/:;=?@[]~"
670 # Unquote only the unreserved characters
671 # Then quote only illegal characters (do not quote reserved,
672 # unreserved, or '%')
673 return quote(unquote_unreserved(uri
), safe
=safe_with_percent
)
675 # We couldn't unquote the given URI, so let's try quoting it, but
676 # there may be unquoted '%'s in the URI. We need to make sure they're
677 # properly quoted so they do not cause issues elsewhere.
678 return quote(uri
, safe
=safe_without_percent
)
681 def address_in_network(ip
, net
):
682 """This function allows you to check if an IP belongs to a network subnet
684 Example: returns True if ip = 192.168.1.1 and net = 192.168.1.0/24
685 returns False if ip = 192.168.1.1 and net = 192.168.100.0/24
689 ipaddr
= struct
.unpack("=L", socket
.inet_aton(ip
))[0]
690 netaddr
, bits
= net
.split("/")
691 netmask
= struct
.unpack("=L", socket
.inet_aton(dotted_netmask(int(bits
))))[0]
692 network
= struct
.unpack("=L", socket
.inet_aton(netaddr
))[0] & netmask
693 return (ipaddr
& netmask
) == (network
& netmask
)
696 def dotted_netmask(mask
):
697 """Converts mask from /xx format to xxx.xxx.xxx.xxx
699 Example: if mask is 24 function returns 255.255.255.0
703 bits
= 0xFFFFFFFF ^
(1 << 32 - mask
) - 1
704 return socket
.inet_ntoa(struct
.pack(">I", bits
))
707 def is_ipv4_address(string_ip
):
712 socket
.inet_aton(string_ip
)
718 def is_valid_cidr(string_network
):
720 Very simple check of the cidr format in no_proxy variable.
724 if string_network
.count("/") == 1:
726 mask
= int(string_network
.split("/")[1])
730 if mask
< 1 or mask
> 32:
734 socket
.inet_aton(string_network
.split("/")[0])
742 @contextlib.contextmanager
743 def set_environ(env_name
, value
):
744 """Set the environment variable 'env_name' to 'value'
746 Save previous value, yield, and then restore the previous value stored in
747 the environment variable 'env_name'.
749 If 'value' is None, do nothing"""
750 value_changed
= value
is not None
752 old_value
= os
.environ
.get(env_name
)
753 os
.environ
[env_name
] = value
758 if old_value
is None:
759 del os
.environ
[env_name
]
761 os
.environ
[env_name
] = old_value
764 def should_bypass_proxies(url
, no_proxy
):
766 Returns whether we should bypass proxies or not.
770 # Prioritize lowercase environment variables over uppercase
771 # to keep a consistent behaviour with other http projects (curl, wget).
773 return os
.environ
.get(key
) or os
.environ
.get(key
.upper())
775 # First check whether no_proxy is defined. If it is, check that the URL
776 # we're getting isn't in the no_proxy list.
777 no_proxy_arg
= no_proxy
779 no_proxy
= get_proxy("no_proxy")
780 parsed
= urlparse(url
)
782 if parsed
.hostname
is None:
783 # URLs don't always have hostnames, e.g. file:/// urls.
787 # We need to check whether we match here. We need to see if we match
788 # the end of the hostname, both with and without the port.
789 no_proxy
= (host
for host
in no_proxy
.replace(" ", "").split(",") if host
)
791 if is_ipv4_address(parsed
.hostname
):
792 for proxy_ip
in no_proxy
:
793 if is_valid_cidr(proxy_ip
):
794 if address_in_network(parsed
.hostname
, proxy_ip
):
796 elif parsed
.hostname
== proxy_ip
:
797 # If no_proxy ip was defined in plain IP notation instead of cidr notation &
798 # matches the IP of the index
801 host_with_port
= parsed
.hostname
803 host_with_port
+= f
":{parsed.port}"
805 for host
in no_proxy
:
806 if parsed
.hostname
.endswith(host
) or host_with_port
.endswith(host
):
807 # The URL does match something in no_proxy, so we don't want
808 # to apply the proxies on this URL.
811 with set_environ("no_proxy", no_proxy_arg
):
812 # parsed.hostname can be `None` in cases such as a file URI.
814 bypass
= proxy_bypass(parsed
.hostname
)
815 except (TypeError, socket
.gaierror
):
824 def get_environ_proxies(url
, no_proxy
=None):
826 Return a dict of environment proxies.
830 if should_bypass_proxies(url
, no_proxy
=no_proxy
):
836 def select_proxy(url
, proxies
):
837 """Select a proxy for the url, if applicable.
839 :param url: The url being for the request
840 :param proxies: A dictionary of schemes or schemes and hosts to proxy URLs
842 proxies
= proxies
or {}
843 urlparts
= urlparse(url
)
844 if urlparts
.hostname
is None:
845 return proxies
.get(urlparts
.scheme
, proxies
.get("all"))
848 urlparts
.scheme
+ "://" + urlparts
.hostname
,
850 "all://" + urlparts
.hostname
,
854 for proxy_key
in proxy_keys
:
855 if proxy_key
in proxies
:
856 proxy
= proxies
[proxy_key
]
862 def resolve_proxies(request
, proxies
, trust_env
=True):
863 """This method takes proxy information from a request and configuration
864 input to resolve a mapping of target proxies. This will consider settings
865 such a NO_PROXY to strip proxy configurations.
867 :param request: Request or PreparedRequest
868 :param proxies: A dictionary of schemes or schemes and hosts to proxy URLs
869 :param trust_env: Boolean declaring whether to trust environment configs
873 proxies
= proxies
if proxies
is not None else {}
875 scheme
= urlparse(url
).scheme
876 no_proxy
= proxies
.get("no_proxy")
877 new_proxies
= proxies
.copy()
879 if trust_env
and not should_bypass_proxies(url
, no_proxy
=no_proxy
):
880 environ_proxies
= get_environ_proxies(url
, no_proxy
=no_proxy
)
882 proxy
= environ_proxies
.get(scheme
, environ_proxies
.get("all"))
885 new_proxies
.setdefault(scheme
, proxy
)
889 def default_user_agent(name
="python-requests"):
891 Return a string representing the default user agent.
895 return f
"{name}/{__version__}"
898 def default_headers():
900 :rtype: requests.structures.CaseInsensitiveDict
902 return CaseInsensitiveDict(
904 "User-Agent": default_user_agent(),
905 "Accept-Encoding": DEFAULT_ACCEPT_ENCODING
,
907 "Connection": "keep-alive",
912 def parse_header_links(value
):
913 """Return a list of parsed link headers proxies.
915 i.e. Link: <http:/.../front.jpeg>; rel=front; type="image/jpeg",<http://.../back.jpeg>; rel=back;type="image/jpeg"
922 replace_chars
= " '\""
924 value
= value
.strip(replace_chars
)
928 for val
in re
.split(", *<", value
):
930 url
, params
= val
.split(";", 1)
932 url
, params
= val
, ""
934 link
= {"url": url.strip("<> '\"")}
936 for param
in params
.split(";"):
938 key
, value
= param
.split("=")
942 link
[key
.strip(replace_chars
)] = value
.strip(replace_chars
)
949 # Null bytes; no need to recreate these on each call to guess_json_utf
950 _null
= "\x00".encode("ascii") # encoding to ASCII for Python 3
955 def guess_json_utf(data
):
959 # JSON always starts with two ASCII characters, so detection is as
960 # easy as counting the nulls and from their location and count
961 # determine the encoding. Also detect a BOM, if present.
963 if sample
in (codecs
.BOM_UTF32_LE
, codecs
.BOM_UTF32_BE
):
964 return "utf-32" # BOM included
965 if sample
[:3] == codecs
.BOM_UTF8
:
966 return "utf-8-sig" # BOM included, MS style (discouraged)
967 if sample
[:2] in (codecs
.BOM_UTF16_LE
, codecs
.BOM_UTF16_BE
):
968 return "utf-16" # BOM included
969 nullcount
= sample
.count(_null
)
973 if sample
[::2] == _null2
: # 1st and 3rd are null
975 if sample
[1::2] == _null2
: # 2nd and 4th are null
977 # Did not detect 2 valid UTF-16 ascii-range characters
979 if sample
[:3] == _null3
:
981 if sample
[1:] == _null3
:
983 # Did not detect a valid UTF-32 ascii-range character
987 def prepend_scheme_if_needed(url
, new_scheme
):
988 """Given a URL that may or may not have a scheme, prepend the given scheme.
989 Does not replace a present scheme with the one provided as an argument.
993 parsed
= parse_url(url
)
994 scheme
, auth
, host
, port
, path
, query
, fragment
= parsed
996 # A defect in urlparse determines that there isn't a netloc present in some
997 # urls. We previously assumed parsing was overly cautious, and swapped the
998 # netloc and path. Due to a lack of tests on the original defect, this is
999 # maintained with parse_url for backwards compatibility.
1000 netloc
= parsed
.netloc
1002 netloc
, path
= path
, netloc
1005 # parse_url doesn't provide the netloc with auth
1006 # so we'll add it ourselves.
1007 netloc
= "@".join([auth
, netloc
])
1013 return urlunparse((scheme
, netloc
, path
, "", query
, fragment
))
1016 def get_auth_from_url(url
):
1017 """Given a url with authentication components, extract them into a tuple of
1022 parsed
= urlparse(url
)
1025 auth
= (unquote(parsed
.username
), unquote(parsed
.password
))
1026 except (AttributeError, TypeError):
1032 def check_header_validity(header
):
1033 """Verifies that header parts don't contain leading whitespace
1034 reserved characters, or return characters.
1036 :param header: tuple, in the format (name, value).
1038 name
, value
= header
1039 _validate_header_part(header
, name
, 0)
1040 _validate_header_part(header
, value
, 1)
1043 def _validate_header_part(header
, header_part
, header_validator_index
):
1044 if isinstance(header_part
, str):
1045 validator
= _HEADER_VALIDATORS_STR
[header_validator_index
]
1046 elif isinstance(header_part
, bytes):
1047 validator
= _HEADER_VALIDATORS_BYTE
[header_validator_index
]
1049 raise InvalidHeader(
1050 f
"Header part ({header_part!r}) from {header} "
1051 f
"must be of type str or bytes, not {type(header_part)}"
1054 if not validator
.match(header_part
):
1055 header_kind
= "name" if header_validator_index
== 0 else "value"
1056 raise InvalidHeader(
1057 f
"Invalid leading whitespace, reserved character(s), or return"
1058 f
"character(s) in header {header_kind}: {header_part!r}"
1062 def urldefragauth(url
):
1064 Given a url remove the fragment and the authentication part.
1068 scheme
, netloc
, path
, params
, query
, fragment
= urlparse(url
)
1070 # see func:`prepend_scheme_if_needed`
1072 netloc
, path
= path
, netloc
1074 netloc
= netloc
.rsplit("@", 1)[-1]
1076 return urlunparse((scheme
, netloc
, path
, params
, query
, ""))
1079 def rewind_body(prepared_request
):
1080 """Move file pointer back to its recorded starting position
1081 so it can be read again on redirect.
1083 body_seek
= getattr(prepared_request
.body
, "seek", None)
1084 if body_seek
is not None and isinstance(
1085 prepared_request
._body
_position
, integer_types
1088 body_seek(prepared_request
._body
_position
)
1090 raise UnrewindableBodyError(
1091 "An error occurred when rewinding request body for redirect."
1094 raise UnrewindableBodyError("Unable to rewind request body for redirect.")