from __future__ import annotations
import functools
-import gzip
import http.client
import io
import socket
make_socks_proxy_opts,
select_proxy,
)
-from .common import Features, RequestHandler, Response, register
+from .common import Features, RequestHandler, Response, register_rh
from .exceptions import (
CertificateVerifyError,
HTTPError,
from ..dependencies import brotli
from ..socks import ProxyError as SocksProxyError
from ..socks import sockssocket
-from ..utils import escape_url, update_url_query
+from ..utils import update_url_query
+from ..utils.networking import normalize_url
SUPPORTED_ENCODINGS = ['gzip', 'deflate']
CONTENT_DECODE_ERRORS = [zlib.error, OSError]
@staticmethod
def gz(data):
- gz = gzip.GzipFile(fileobj=io.BytesIO(data), mode='rb')
- try:
- return gz.read()
- except OSError as original_oserror:
- # There may be junk add the end of the file
- # See http://stackoverflow.com/q/4928560/35070 for details
- for i in range(1, 1024):
- try:
- gz = gzip.GzipFile(fileobj=io.BytesIO(data[:-i]), mode='rb')
- return gz.read()
- except OSError:
- continue
- else:
- raise original_oserror
+ # There may be junk added the end of the file
+ # We ignore it by only ever decoding a single gzip payload
+ if not data:
+ return data
+ return zlib.decompress(data, wbits=zlib.MAX_WBITS | 16)
def http_request(self, req):
# According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
# Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
# the code of this workaround has been moved here from YoutubeDL.urlopen()
url = req.get_full_url()
- url_escaped = escape_url(url)
+ url_escaped = normalize_url(url)
# Substitute URL if any change after escaping
if url != url_escaped:
if location:
# As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
location = location.encode('iso-8859-1').decode()
- location_escaped = escape_url(location)
+ location_escaped = normalize_url(location)
if location != location_escaped:
del resp.headers['Location']
resp.headers['Location'] = location_escaped
def update_Request(req, url=None, data=None, headers=None, query=None):
req_headers = req.headers.copy()
req_headers.update(headers or {})
- req_data = data or req.data
+ req_data = data if data is not None else req.data
req_url = update_url_query(url or req.get_full_url(), query)
req_get_method = req.get_method()
if req_get_method == 'HEAD':
raise TransportError(cause=e) from e
-@register
+@register_rh
class UrllibRH(RequestHandler, InstanceStoreMixin):
_SUPPORTED_URL_SCHEMES = ('http', 'https', 'data', 'ftp')
_SUPPORTED_PROXY_SCHEMES = ('http', 'socks4', 'socks4a', 'socks5', 'socks5h')
if self.enable_file_urls:
self._SUPPORTED_URL_SCHEMES = (*self._SUPPORTED_URL_SCHEMES, 'file')
+ def _check_extensions(self, extensions):
+ super()._check_extensions(extensions)
+ extensions.pop('cookiejar', None)
+ extensions.pop('timeout', None)
+
def _create_instance(self, proxies, cookiejar):
opener = urllib.request.OpenerDirector()
handlers = [
except urllib.error.HTTPError as e:
if isinstance(e.fp, (http.client.HTTPResponse, urllib.response.addinfourl)):
# Prevent file object from being closed when urllib.error.HTTPError is destroyed.
- e._closer.file = None
+ e._closer.close_called = True
raise HTTPError(UrllibResponseAdapter(e.fp), redirect_loop='redirect error' in str(e)) from e
raise # unexpected
except urllib.error.URLError as e: