]>
jfr.im git - dlqueue.git/blob - venv/lib/python3.11/site-packages/werkzeug/urls.py
1 from __future__
import annotations
6 from urllib
.parse
import quote
7 from urllib
.parse
import unquote
8 from urllib
.parse
import urlencode
9 from urllib
.parse
import urlsplit
10 from urllib
.parse
import urlunsplit
12 from .datastructures
import iter_multi_items
15 def _codec_error_url_quote(e
: UnicodeError) -> tuple[str, int]:
16 """Used in :func:`uri_to_iri` after unquoting to re-quote any
19 # the docs state that UnicodeError does have these attributes,
20 # but mypy isn't picking them up
21 out
= quote(e
.object[e
.start
: e
.end
], safe
="") # type: ignore
22 return out
, e
.end
# type: ignore
25 codecs
.register_error("werkzeug.url_quote", _codec_error_url_quote
)
28 def _make_unquote_part(name
: str, chars
: str) -> t
.Callable
[[str], str]:
29 """Create a function that unquotes all percent encoded characters except those
30 given. This allows working with unquoted characters if possible while not changing
31 the meaning of a given part of a URL.
33 choices
= "|".join(f
"{ord(c):02X}" for c
in sorted(chars
))
34 pattern
= re
.compile(f
"((?:%(?:{choices}))+)", re
.I
)
36 def _unquote_partial(value
: str) -> str:
37 parts
= iter(pattern
.split(value
))
41 out
.append(unquote(part
, "utf-8", "werkzeug.url_quote"))
42 out
.append(next(parts
, ""))
46 _unquote_partial
.__name
__ = f
"_unquote_{name}"
47 return _unquote_partial
50 # characters that should remain quoted in URL parts
51 # based on https://url.spec.whatwg.org/#percent-encoded-bytes
52 # always keep all controls, space, and % quoted
53 _always_unsafe
= bytes((*range(0x21), 0x25, 0x7F)).decode()
54 _unquote_fragment
= _make_unquote_part("fragment", _always_unsafe
)
55 _unquote_query
= _make_unquote_part("query", _always_unsafe
+ "&=+#")
56 _unquote_path
= _make_unquote_part("path", _always_unsafe
+ "/?#")
57 _unquote_user
= _make_unquote_part("user", _always_unsafe
+ ":@/?#")
60 def uri_to_iri(uri
: str) -> str:
61 """Convert a URI to an IRI. All valid UTF-8 characters are unquoted,
62 leaving all reserved and invalid characters quoted. If the URL has
63 a domain, it is decoded from Punycode.
65 >>> uri_to_iri("http://xn--n3h.net/p%C3%A5th?q=%C3%A8ry%DF")
66 'http://\\u2603.net/p\\xe5th?q=\\xe8ry%DF'
68 :param uri: The URI to convert.
70 .. versionchanged:: 3.0
71 Passing a tuple or bytes, and the ``charset`` and ``errors`` parameters,
74 .. versionchanged:: 2.3
75 Which characters remain quoted is specific to each part of the URL.
77 .. versionchanged:: 0.15
78 All reserved and invalid characters remain quoted. Previously,
79 only some reserved characters were preserved, and invalid bytes
80 were replaced instead of left quoted.
85 path
= _unquote_path(parts
.path
)
86 query
= _unquote_query(parts
.query
)
87 fragment
= _unquote_fragment(parts
.fragment
)
90 netloc
= _decode_idna(parts
.hostname
)
95 netloc
= f
"[{netloc}]"
98 netloc
= f
"{netloc}:{parts.port}"
101 auth
= _unquote_user(parts
.username
)
104 password
= _unquote_user(parts
.password
)
105 auth
= f
"{auth}:{password}"
107 netloc
= f
"{auth}@{netloc}"
109 return urlunsplit((parts
.scheme
, netloc
, path
, query
, fragment
))
112 def iri_to_uri(iri
: str) -> str:
113 """Convert an IRI to a URI. All non-ASCII and unsafe characters are
114 quoted. If the URL has a domain, it is encoded to Punycode.
116 >>> iri_to_uri('http://\\u2603.net/p\\xe5th?q=\\xe8ry%DF')
117 'http://xn--n3h.net/p%C3%A5th?q=%C3%A8ry%DF'
119 :param iri: The IRI to convert.
121 .. versionchanged:: 3.0
122 Passing a tuple or bytes, the ``charset`` and ``errors`` parameters,
123 and the ``safe_conversion`` parameter, are removed.
125 .. versionchanged:: 2.3
126 Which characters remain unquoted is specific to each part of the URL.
128 .. versionchanged:: 0.15
129 All reserved characters remain unquoted. Previously, only some reserved
130 characters were left unquoted.
132 .. versionchanged:: 0.9.6
133 The ``safe_conversion`` parameter was added.
135 .. versionadded:: 0.6
137 parts
= urlsplit(iri
)
138 # safe = https://url.spec.whatwg.org/#url-path-segment-string
139 # as well as percent for things that are already quoted
140 path
= quote(parts
.path
, safe
="%!$&'()*+,/:;=@")
141 query
= quote(parts
.query
, safe
="%!$&'()*+,/:;=?@")
142 fragment
= quote(parts
.fragment
, safe
="%!#$&'()*+,/:;=?@")
145 netloc
= parts
.hostname
.encode("idna").decode("ascii")
150 netloc
= f
"[{netloc}]"
153 netloc
= f
"{netloc}:{parts.port}"
156 auth
= quote(parts
.username
, safe
="%!$&'()*+,;=")
159 password
= quote(parts
.password
, safe
="%!$&'()*+,;=")
160 auth
= f
"{auth}:{password}"
162 netloc
= f
"{auth}@{netloc}"
164 return urlunsplit((parts
.scheme
, netloc
, path
, query
, fragment
))
167 def _invalid_iri_to_uri(iri
: str) -> str:
168 """The URL scheme ``itms-services://`` must contain the ``//`` even though it does
169 not have a host component. There may be other invalid schemes as well. Currently,
170 responses will always call ``iri_to_uri`` on the redirect ``Location`` header, which
171 removes the ``//``. For now, if the IRI only contains ASCII and does not contain
172 spaces, pass it on as-is. In Werkzeug 3.0, this should become a
173 ``response.process_location`` flag.
182 if len(iri
.split(None, 1)) == 1:
185 return iri_to_uri(iri
)
188 def _decode_idna(domain
: str) -> str:
190 data
= domain
.encode("ascii")
191 except UnicodeEncodeError:
192 # If the domain is not ASCII, it's decoded already.
196 # Try decoding in one shot.
197 return data
.decode("idna")
198 except UnicodeDecodeError:
201 # Decode each part separately, leaving invalid parts as punycode.
204 for part
in data
.split(b
"."):
206 parts
.append(part
.decode("idna"))
207 except UnicodeDecodeError:
208 parts
.append(part
.decode("ascii"))
210 return ".".join(parts
)
213 def _urlencode(query
: t
.Mapping
[str, str] | t
.Iterable
[tuple[str, str]]) -> str:
214 items
= [x
for x
in iter_multi_items(query
) if x
[1] is not None]
215 # safe = https://url.spec.whatwg.org/#percent-encoded-bytes
216 return urlencode(items
, safe
="!$'()*,/:;?@")