]> jfr.im git - yt-dlp.git/blob - yt_dlp/networking/_curlcffi.py
[networking] Add `extensions` attribute to `Response` (#9756)
[yt-dlp.git] / yt_dlp / networking / _curlcffi.py
1 from __future__ import annotations
2
3 import io
4 import math
5 import urllib.parse
6
7 from ._helper import InstanceStoreMixin, select_proxy
8 from .common import (
9 Features,
10 Request,
11 Response,
12 register_preference,
13 register_rh,
14 )
15 from .exceptions import (
16 CertificateVerifyError,
17 HTTPError,
18 IncompleteRead,
19 ProxyError,
20 SSLError,
21 TransportError,
22 )
23 from .impersonate import ImpersonateRequestHandler, ImpersonateTarget
24 from ..dependencies import curl_cffi
25 from ..utils import int_or_none
26
27 if curl_cffi is None:
28 raise ImportError('curl_cffi is not installed')
29
30 curl_cffi_version = tuple(int_or_none(x, default=0) for x in curl_cffi.__version__.split('.'))
31
32 if curl_cffi_version != (0, 5, 10):
33 curl_cffi._yt_dlp__version = f'{curl_cffi.__version__} (unsupported)'
34 raise ImportError('Only curl_cffi 0.5.10 is supported')
35
36 import curl_cffi.requests
37 from curl_cffi.const import CurlECode, CurlOpt
38
39
40 class CurlCFFIResponseReader(io.IOBase):
41 def __init__(self, response: curl_cffi.requests.Response):
42 self._response = response
43 self._iterator = response.iter_content()
44 self._buffer = b''
45 self.bytes_read = 0
46
47 def readable(self):
48 return True
49
50 def read(self, size=None):
51 exception_raised = True
52 try:
53 while self._iterator and (size is None or len(self._buffer) < size):
54 chunk = next(self._iterator, None)
55 if chunk is None:
56 self._iterator = None
57 break
58 self._buffer += chunk
59 self.bytes_read += len(chunk)
60
61 if size is None:
62 size = len(self._buffer)
63 data = self._buffer[:size]
64 self._buffer = self._buffer[size:]
65
66 # "free" the curl instance if the response is fully read.
67 # curl_cffi doesn't do this automatically and only allows one open response per thread
68 if not self._iterator and not self._buffer:
69 self.close()
70 exception_raised = False
71 return data
72 finally:
73 if exception_raised:
74 self.close()
75
76 def close(self):
77 if not self.closed:
78 self._response.close()
79 self._buffer = b''
80 super().close()
81
82
83 class CurlCFFIResponseAdapter(Response):
84 fp: CurlCFFIResponseReader
85
86 def __init__(self, response: curl_cffi.requests.Response):
87 super().__init__(
88 fp=CurlCFFIResponseReader(response),
89 headers=response.headers,
90 url=response.url,
91 status=response.status_code)
92
93 def read(self, amt=None):
94 try:
95 return self.fp.read(amt)
96 except curl_cffi.requests.errors.RequestsError as e:
97 if e.code == CurlECode.PARTIAL_FILE:
98 content_length = int_or_none(e.response.headers.get('Content-Length'))
99 raise IncompleteRead(
100 partial=self.fp.bytes_read,
101 expected=content_length - self.fp.bytes_read if content_length is not None else None,
102 cause=e) from e
103 raise TransportError(cause=e) from e
104
105
106 @register_rh
107 class CurlCFFIRH(ImpersonateRequestHandler, InstanceStoreMixin):
108 RH_NAME = 'curl_cffi'
109 _SUPPORTED_URL_SCHEMES = ('http', 'https')
110 _SUPPORTED_FEATURES = (Features.NO_PROXY, Features.ALL_PROXY)
111 _SUPPORTED_PROXY_SCHEMES = ('http', 'https', 'socks4', 'socks4a', 'socks5', 'socks5h')
112 _SUPPORTED_IMPERSONATE_TARGET_MAP = {
113 ImpersonateTarget('chrome', '110', 'windows', '10'): curl_cffi.requests.BrowserType.chrome110,
114 ImpersonateTarget('chrome', '107', 'windows', '10'): curl_cffi.requests.BrowserType.chrome107,
115 ImpersonateTarget('chrome', '104', 'windows', '10'): curl_cffi.requests.BrowserType.chrome104,
116 ImpersonateTarget('chrome', '101', 'windows', '10'): curl_cffi.requests.BrowserType.chrome101,
117 ImpersonateTarget('chrome', '100', 'windows', '10'): curl_cffi.requests.BrowserType.chrome100,
118 ImpersonateTarget('chrome', '99', 'windows', '10'): curl_cffi.requests.BrowserType.chrome99,
119 ImpersonateTarget('edge', '101', 'windows', '10'): curl_cffi.requests.BrowserType.edge101,
120 ImpersonateTarget('edge', '99', 'windows', '10'): curl_cffi.requests.BrowserType.edge99,
121 ImpersonateTarget('safari', '15.5', 'macos', '12'): curl_cffi.requests.BrowserType.safari15_5,
122 ImpersonateTarget('safari', '15.3', 'macos', '11'): curl_cffi.requests.BrowserType.safari15_3,
123 ImpersonateTarget('chrome', '99', 'android', '12'): curl_cffi.requests.BrowserType.chrome99_android,
124 }
125
126 def _create_instance(self, cookiejar=None):
127 return curl_cffi.requests.Session(cookies=cookiejar)
128
129 def _check_extensions(self, extensions):
130 super()._check_extensions(extensions)
131 extensions.pop('impersonate', None)
132 extensions.pop('cookiejar', None)
133 extensions.pop('timeout', None)
134
135 def send(self, request: Request) -> Response:
136 target = self._get_request_target(request)
137 try:
138 response = super().send(request)
139 except HTTPError as e:
140 e.response.extensions['impersonate'] = target
141 raise
142 response.extensions['impersonate'] = target
143 return response
144
145 def _send(self, request: Request):
146 max_redirects_exceeded = False
147 session: curl_cffi.requests.Session = self._get_instance(
148 cookiejar=self._get_cookiejar(request) if 'cookie' not in request.headers else None)
149
150 if self.verbose:
151 session.curl.setopt(CurlOpt.VERBOSE, 1)
152
153 proxies = self._get_proxies(request)
154 if 'no' in proxies:
155 session.curl.setopt(CurlOpt.NOPROXY, proxies['no'])
156 proxies.pop('no', None)
157
158 # curl doesn't support per protocol proxies, so we select the one that matches the request protocol
159 proxy = select_proxy(request.url, proxies=proxies)
160 if proxy:
161 session.curl.setopt(CurlOpt.PROXY, proxy)
162 scheme = urllib.parse.urlparse(request.url).scheme.lower()
163 if scheme != 'http':
164 # Enable HTTP CONNECT for HTTPS urls.
165 # Don't use CONNECT for http for compatibility with urllib behaviour.
166 # See: https://curl.se/libcurl/c/CURLOPT_HTTPPROXYTUNNEL.html
167 session.curl.setopt(CurlOpt.HTTPPROXYTUNNEL, 1)
168
169 headers = self._get_impersonate_headers(request)
170
171 if self._client_cert:
172 session.curl.setopt(CurlOpt.SSLCERT, self._client_cert['client_certificate'])
173 client_certificate_key = self._client_cert.get('client_certificate_key')
174 client_certificate_password = self._client_cert.get('client_certificate_password')
175 if client_certificate_key:
176 session.curl.setopt(CurlOpt.SSLKEY, client_certificate_key)
177 if client_certificate_password:
178 session.curl.setopt(CurlOpt.KEYPASSWD, client_certificate_password)
179
180 timeout = self._calculate_timeout(request)
181
182 # set CURLOPT_LOW_SPEED_LIMIT and CURLOPT_LOW_SPEED_TIME to act as a read timeout. [1]
183 # curl_cffi does not currently do this. [2]
184 # Note: CURLOPT_LOW_SPEED_TIME is in seconds, so we need to round up to the nearest second. [3]
185 # [1] https://unix.stackexchange.com/a/305311
186 # [2] https://github.com/yifeikong/curl_cffi/issues/156
187 # [3] https://curl.se/libcurl/c/CURLOPT_LOW_SPEED_TIME.html
188 session.curl.setopt(CurlOpt.LOW_SPEED_LIMIT, 1) # 1 byte per second
189 session.curl.setopt(CurlOpt.LOW_SPEED_TIME, math.ceil(timeout))
190
191 try:
192 curl_response = session.request(
193 method=request.method,
194 url=request.url,
195 headers=headers,
196 data=request.data,
197 verify=self.verify,
198 max_redirects=5,
199 timeout=timeout,
200 impersonate=self._SUPPORTED_IMPERSONATE_TARGET_MAP.get(
201 self._get_request_target(request)),
202 interface=self.source_address,
203 stream=True
204 )
205 except curl_cffi.requests.errors.RequestsError as e:
206 if e.code == CurlECode.PEER_FAILED_VERIFICATION:
207 raise CertificateVerifyError(cause=e) from e
208
209 elif e.code == CurlECode.SSL_CONNECT_ERROR:
210 raise SSLError(cause=e) from e
211
212 elif e.code == CurlECode.TOO_MANY_REDIRECTS:
213 max_redirects_exceeded = True
214 curl_response = e.response
215
216 elif e.code == CurlECode.PROXY:
217 raise ProxyError(cause=e) from e
218 else:
219 raise TransportError(cause=e) from e
220
221 response = CurlCFFIResponseAdapter(curl_response)
222
223 if not 200 <= response.status < 300:
224 raise HTTPError(response, redirect_loop=max_redirects_exceeded)
225
226 return response
227
228
229 @register_preference(CurlCFFIRH)
230 def curl_cffi_preference(rh, request):
231 return -100