]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/utils/networking.py
[ie/brightcove] Upgrade requests to HTTPS (#10202)
[yt-dlp.git] / yt_dlp / utils / networking.py
index e6515ec8eecfe8d76035a500b25f7728ec2b1224..933b164be95b06f93e9c1def65574e243106a346 100644 (file)
@@ -65,7 +65,9 @@ def __init__(self, *args, **kwargs):
         self.update(kwargs)
 
     def __setitem__(self, key, value):
-        super().__setitem__(key.title(), str(value))
+        if isinstance(value, bytes):
+            value = value.decode('latin-1')
+        super().__setitem__(key.title(), str(value).strip())
 
     def __getitem__(self, key):
         return super().__getitem__(key.title())
@@ -110,7 +112,7 @@ def clean_proxies(proxies: dict, headers: HTTPHeaderDict):
 
             replace_scheme = {
                 'socks5': 'socks5h',  # compat: socks5 was treated as socks5h
-                'socks': 'socks4'  # compat: non-standard
+                'socks': 'socks4',  # compat: non-standard
             }
             if proxy_scheme in replace_scheme:
                 proxies[proxy_key] = urllib.parse.urlunparse(
@@ -121,3 +123,42 @@ def clean_headers(headers: HTTPHeaderDict):
     if 'Youtubedl-No-Compression' in headers:  # compat
         del headers['Youtubedl-No-Compression']
         headers['Accept-Encoding'] = 'identity'
+    headers.pop('Ytdl-socks-proxy', None)
+
+
+def remove_dot_segments(path):
+    # Implements RFC3986 5.2.4 remote_dot_segments
+    # Pseudo-code: https://tools.ietf.org/html/rfc3986#section-5.2.4
+    # https://github.com/urllib3/urllib3/blob/ba49f5c4e19e6bca6827282feb77a3c9f937e64b/src/urllib3/util/url.py#L263
+    output = []
+    segments = path.split('/')
+    for s in segments:
+        if s == '.':
+            continue
+        elif s == '..':
+            if output:
+                output.pop()
+        else:
+            output.append(s)
+    if not segments[0] and (not output or output[0]):
+        output.insert(0, '')
+    if segments[-1] in ('.', '..'):
+        output.append('')
+    return '/'.join(output)
+
+
+def escape_rfc3986(s):
+    """Escape non-ASCII characters as suggested by RFC 3986"""
+    return urllib.parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
+
+
+def normalize_url(url):
+    """Normalize URL as suggested by RFC 3986"""
+    url_parsed = urllib.parse.urlparse(url)
+    return url_parsed._replace(
+        netloc=url_parsed.netloc.encode('idna').decode('ascii'),
+        path=escape_rfc3986(remove_dot_segments(url_parsed.path)),
+        params=escape_rfc3986(url_parsed.params),
+        query=escape_rfc3986(url_parsed.query),
+        fragment=escape_rfc3986(url_parsed.fragment),
+    ).geturl()