]> jfr.im git - dlqueue.git/blob - venv/lib/python3.11/site-packages/werkzeug/urls.py
init: venv aand flask
[dlqueue.git] / venv / lib / python3.11 / site-packages / werkzeug / urls.py
1 from __future__ import annotations
2
3 import codecs
4 import re
5 import typing as t
6 from urllib.parse import quote
7 from urllib.parse import unquote
8 from urllib.parse import urlencode
9 from urllib.parse import urlsplit
10 from urllib.parse import urlunsplit
11
12 from .datastructures import iter_multi_items
13
14
15 def _codec_error_url_quote(e: UnicodeError) -> tuple[str, int]:
16 """Used in :func:`uri_to_iri` after unquoting to re-quote any
17 invalid bytes.
18 """
19 # the docs state that UnicodeError does have these attributes,
20 # but mypy isn't picking them up
21 out = quote(e.object[e.start : e.end], safe="") # type: ignore
22 return out, e.end # type: ignore
23
24
25 codecs.register_error("werkzeug.url_quote", _codec_error_url_quote)
26
27
28 def _make_unquote_part(name: str, chars: str) -> t.Callable[[str], str]:
29 """Create a function that unquotes all percent encoded characters except those
30 given. This allows working with unquoted characters if possible while not changing
31 the meaning of a given part of a URL.
32 """
33 choices = "|".join(f"{ord(c):02X}" for c in sorted(chars))
34 pattern = re.compile(f"((?:%(?:{choices}))+)", re.I)
35
36 def _unquote_partial(value: str) -> str:
37 parts = iter(pattern.split(value))
38 out = []
39
40 for part in parts:
41 out.append(unquote(part, "utf-8", "werkzeug.url_quote"))
42 out.append(next(parts, ""))
43
44 return "".join(out)
45
46 _unquote_partial.__name__ = f"_unquote_{name}"
47 return _unquote_partial
48
49
50 # characters that should remain quoted in URL parts
51 # based on https://url.spec.whatwg.org/#percent-encoded-bytes
52 # always keep all controls, space, and % quoted
53 _always_unsafe = bytes((*range(0x21), 0x25, 0x7F)).decode()
54 _unquote_fragment = _make_unquote_part("fragment", _always_unsafe)
55 _unquote_query = _make_unquote_part("query", _always_unsafe + "&=+#")
56 _unquote_path = _make_unquote_part("path", _always_unsafe + "/?#")
57 _unquote_user = _make_unquote_part("user", _always_unsafe + ":@/?#")
58
59
60 def uri_to_iri(uri: str) -> str:
61 """Convert a URI to an IRI. All valid UTF-8 characters are unquoted,
62 leaving all reserved and invalid characters quoted. If the URL has
63 a domain, it is decoded from Punycode.
64
65 >>> uri_to_iri("http://xn--n3h.net/p%C3%A5th?q=%C3%A8ry%DF")
66 'http://\\u2603.net/p\\xe5th?q=\\xe8ry%DF'
67
68 :param uri: The URI to convert.
69
70 .. versionchanged:: 3.0
71 Passing a tuple or bytes, and the ``charset`` and ``errors`` parameters,
72 are removed.
73
74 .. versionchanged:: 2.3
75 Which characters remain quoted is specific to each part of the URL.
76
77 .. versionchanged:: 0.15
78 All reserved and invalid characters remain quoted. Previously,
79 only some reserved characters were preserved, and invalid bytes
80 were replaced instead of left quoted.
81
82 .. versionadded:: 0.6
83 """
84 parts = urlsplit(uri)
85 path = _unquote_path(parts.path)
86 query = _unquote_query(parts.query)
87 fragment = _unquote_fragment(parts.fragment)
88
89 if parts.hostname:
90 netloc = _decode_idna(parts.hostname)
91 else:
92 netloc = ""
93
94 if ":" in netloc:
95 netloc = f"[{netloc}]"
96
97 if parts.port:
98 netloc = f"{netloc}:{parts.port}"
99
100 if parts.username:
101 auth = _unquote_user(parts.username)
102
103 if parts.password:
104 password = _unquote_user(parts.password)
105 auth = f"{auth}:{password}"
106
107 netloc = f"{auth}@{netloc}"
108
109 return urlunsplit((parts.scheme, netloc, path, query, fragment))
110
111
112 def iri_to_uri(iri: str) -> str:
113 """Convert an IRI to a URI. All non-ASCII and unsafe characters are
114 quoted. If the URL has a domain, it is encoded to Punycode.
115
116 >>> iri_to_uri('http://\\u2603.net/p\\xe5th?q=\\xe8ry%DF')
117 'http://xn--n3h.net/p%C3%A5th?q=%C3%A8ry%DF'
118
119 :param iri: The IRI to convert.
120
121 .. versionchanged:: 3.0
122 Passing a tuple or bytes, the ``charset`` and ``errors`` parameters,
123 and the ``safe_conversion`` parameter, are removed.
124
125 .. versionchanged:: 2.3
126 Which characters remain unquoted is specific to each part of the URL.
127
128 .. versionchanged:: 0.15
129 All reserved characters remain unquoted. Previously, only some reserved
130 characters were left unquoted.
131
132 .. versionchanged:: 0.9.6
133 The ``safe_conversion`` parameter was added.
134
135 .. versionadded:: 0.6
136 """
137 parts = urlsplit(iri)
138 # safe = https://url.spec.whatwg.org/#url-path-segment-string
139 # as well as percent for things that are already quoted
140 path = quote(parts.path, safe="%!$&'()*+,/:;=@")
141 query = quote(parts.query, safe="%!$&'()*+,/:;=?@")
142 fragment = quote(parts.fragment, safe="%!#$&'()*+,/:;=?@")
143
144 if parts.hostname:
145 netloc = parts.hostname.encode("idna").decode("ascii")
146 else:
147 netloc = ""
148
149 if ":" in netloc:
150 netloc = f"[{netloc}]"
151
152 if parts.port:
153 netloc = f"{netloc}:{parts.port}"
154
155 if parts.username:
156 auth = quote(parts.username, safe="%!$&'()*+,;=")
157
158 if parts.password:
159 password = quote(parts.password, safe="%!$&'()*+,;=")
160 auth = f"{auth}:{password}"
161
162 netloc = f"{auth}@{netloc}"
163
164 return urlunsplit((parts.scheme, netloc, path, query, fragment))
165
166
167 def _invalid_iri_to_uri(iri: str) -> str:
168 """The URL scheme ``itms-services://`` must contain the ``//`` even though it does
169 not have a host component. There may be other invalid schemes as well. Currently,
170 responses will always call ``iri_to_uri`` on the redirect ``Location`` header, which
171 removes the ``//``. For now, if the IRI only contains ASCII and does not contain
172 spaces, pass it on as-is. In Werkzeug 3.0, this should become a
173 ``response.process_location`` flag.
174
175 :meta private:
176 """
177 try:
178 iri.encode("ascii")
179 except UnicodeError:
180 pass
181 else:
182 if len(iri.split(None, 1)) == 1:
183 return iri
184
185 return iri_to_uri(iri)
186
187
188 def _decode_idna(domain: str) -> str:
189 try:
190 data = domain.encode("ascii")
191 except UnicodeEncodeError:
192 # If the domain is not ASCII, it's decoded already.
193 return domain
194
195 try:
196 # Try decoding in one shot.
197 return data.decode("idna")
198 except UnicodeDecodeError:
199 pass
200
201 # Decode each part separately, leaving invalid parts as punycode.
202 parts = []
203
204 for part in data.split(b"."):
205 try:
206 parts.append(part.decode("idna"))
207 except UnicodeDecodeError:
208 parts.append(part.decode("ascii"))
209
210 return ".".join(parts)
211
212
213 def _urlencode(query: t.Mapping[str, str] | t.Iterable[tuple[str, str]]) -> str:
214 items = [x for x in iter_multi_items(query) if x[1] is not None]
215 # safe = https://url.spec.whatwg.org/#percent-encoded-bytes
216 return urlencode(items, safe="!$'()*,/:;?@")