]> jfr.im git - yt-dlp.git/blob - test/test_networking.py
[networking] Remove `_CompatHTTPError` (#8871)
[yt-dlp.git] / test / test_networking.py
1 #!/usr/bin/env python3
2
3 # Allow direct execution
4 import os
5 import sys
6
7 import pytest
8
9 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
10
11 import gzip
12 import http.client
13 import http.cookiejar
14 import http.server
15 import io
16 import pathlib
17 import random
18 import ssl
19 import tempfile
20 import threading
21 import time
22 import urllib.error
23 import urllib.request
24 import warnings
25 import zlib
26 from email.message import Message
27 from http.cookiejar import CookieJar
28
29 from test.helper import FakeYDL, http_server_port, verify_address_availability
30 from yt_dlp.cookies import YoutubeDLCookieJar
31 from yt_dlp.dependencies import brotli, requests, urllib3
32 from yt_dlp.networking import (
33 HEADRequest,
34 PUTRequest,
35 Request,
36 RequestDirector,
37 RequestHandler,
38 Response,
39 )
40 from yt_dlp.networking._urllib import UrllibRH
41 from yt_dlp.networking.exceptions import (
42 CertificateVerifyError,
43 HTTPError,
44 IncompleteRead,
45 NoSupportingHandlers,
46 ProxyError,
47 RequestError,
48 SSLError,
49 TransportError,
50 UnsupportedRequest,
51 )
52 from yt_dlp.utils._utils import _YDLLogger as FakeLogger
53 from yt_dlp.utils.networking import HTTPHeaderDict
54
55 from test.conftest import validate_and_send
56
57 TEST_DIR = os.path.dirname(os.path.abspath(__file__))
58
59
60 def _build_proxy_handler(name):
61 class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
62 proxy_name = name
63
64 def log_message(self, format, *args):
65 pass
66
67 def do_GET(self):
68 self.send_response(200)
69 self.send_header('Content-Type', 'text/plain; charset=utf-8')
70 self.end_headers()
71 self.wfile.write('{self.proxy_name}: {self.path}'.format(self=self).encode())
72 return HTTPTestRequestHandler
73
74
75 class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
76 protocol_version = 'HTTP/1.1'
77
78 def log_message(self, format, *args):
79 pass
80
81 def _headers(self):
82 payload = str(self.headers).encode()
83 self.send_response(200)
84 self.send_header('Content-Type', 'application/json')
85 self.send_header('Content-Length', str(len(payload)))
86 self.end_headers()
87 self.wfile.write(payload)
88
89 def _redirect(self):
90 self.send_response(int(self.path[len('/redirect_'):]))
91 self.send_header('Location', '/method')
92 self.send_header('Content-Length', '0')
93 self.end_headers()
94
95 def _method(self, method, payload=None):
96 self.send_response(200)
97 self.send_header('Content-Length', str(len(payload or '')))
98 self.send_header('Method', method)
99 self.end_headers()
100 if payload:
101 self.wfile.write(payload)
102
103 def _status(self, status):
104 payload = f'<html>{status} NOT FOUND</html>'.encode()
105 self.send_response(int(status))
106 self.send_header('Content-Type', 'text/html; charset=utf-8')
107 self.send_header('Content-Length', str(len(payload)))
108 self.end_headers()
109 self.wfile.write(payload)
110
111 def _read_data(self):
112 if 'Content-Length' in self.headers:
113 return self.rfile.read(int(self.headers['Content-Length']))
114
115 def do_POST(self):
116 data = self._read_data() + str(self.headers).encode()
117 if self.path.startswith('/redirect_'):
118 self._redirect()
119 elif self.path.startswith('/method'):
120 self._method('POST', data)
121 elif self.path.startswith('/headers'):
122 self._headers()
123 else:
124 self._status(404)
125
126 def do_HEAD(self):
127 if self.path.startswith('/redirect_'):
128 self._redirect()
129 elif self.path.startswith('/method'):
130 self._method('HEAD')
131 else:
132 self._status(404)
133
134 def do_PUT(self):
135 data = self._read_data() + str(self.headers).encode()
136 if self.path.startswith('/redirect_'):
137 self._redirect()
138 elif self.path.startswith('/method'):
139 self._method('PUT', data)
140 else:
141 self._status(404)
142
143 def do_GET(self):
144 if self.path == '/video.html':
145 payload = b'<html><video src="/vid.mp4" /></html>'
146 self.send_response(200)
147 self.send_header('Content-Type', 'text/html; charset=utf-8')
148 self.send_header('Content-Length', str(len(payload)))
149 self.end_headers()
150 self.wfile.write(payload)
151 elif self.path == '/vid.mp4':
152 payload = b'\x00\x00\x00\x00\x20\x66\x74[video]'
153 self.send_response(200)
154 self.send_header('Content-Type', 'video/mp4')
155 self.send_header('Content-Length', str(len(payload)))
156 self.end_headers()
157 self.wfile.write(payload)
158 elif self.path == '/%E4%B8%AD%E6%96%87.html':
159 payload = b'<html><video src="/vid.mp4" /></html>'
160 self.send_response(200)
161 self.send_header('Content-Type', 'text/html; charset=utf-8')
162 self.send_header('Content-Length', str(len(payload)))
163 self.end_headers()
164 self.wfile.write(payload)
165 elif self.path == '/%c7%9f':
166 payload = b'<html><video src="/vid.mp4" /></html>'
167 self.send_response(200)
168 self.send_header('Content-Type', 'text/html; charset=utf-8')
169 self.send_header('Content-Length', str(len(payload)))
170 self.end_headers()
171 self.wfile.write(payload)
172 elif self.path.startswith('/redirect_loop'):
173 self.send_response(301)
174 self.send_header('Location', self.path)
175 self.send_header('Content-Length', '0')
176 self.end_headers()
177 elif self.path == '/redirect_dotsegments':
178 self.send_response(301)
179 # redirect to /headers but with dot segments before
180 self.send_header('Location', '/a/b/./../../headers')
181 self.send_header('Content-Length', '0')
182 self.end_headers()
183 elif self.path.startswith('/redirect_'):
184 self._redirect()
185 elif self.path.startswith('/method'):
186 self._method('GET', str(self.headers).encode())
187 elif self.path.startswith('/headers'):
188 self._headers()
189 elif self.path.startswith('/308-to-headers'):
190 self.send_response(308)
191 self.send_header('Location', '/headers')
192 self.send_header('Content-Length', '0')
193 self.end_headers()
194 elif self.path == '/trailing_garbage':
195 payload = b'<html><video src="/vid.mp4" /></html>'
196 self.send_response(200)
197 self.send_header('Content-Type', 'text/html; charset=utf-8')
198 self.send_header('Content-Encoding', 'gzip')
199 buf = io.BytesIO()
200 with gzip.GzipFile(fileobj=buf, mode='wb') as f:
201 f.write(payload)
202 compressed = buf.getvalue() + b'trailing garbage'
203 self.send_header('Content-Length', str(len(compressed)))
204 self.end_headers()
205 self.wfile.write(compressed)
206 elif self.path == '/302-non-ascii-redirect':
207 new_url = f'http://127.0.0.1:{http_server_port(self.server)}/中文.html'
208 self.send_response(301)
209 self.send_header('Location', new_url)
210 self.send_header('Content-Length', '0')
211 self.end_headers()
212 elif self.path == '/content-encoding':
213 encodings = self.headers.get('ytdl-encoding', '')
214 payload = b'<html><video src="/vid.mp4" /></html>'
215 for encoding in filter(None, (e.strip() for e in encodings.split(','))):
216 if encoding == 'br' and brotli:
217 payload = brotli.compress(payload)
218 elif encoding == 'gzip':
219 buf = io.BytesIO()
220 with gzip.GzipFile(fileobj=buf, mode='wb') as f:
221 f.write(payload)
222 payload = buf.getvalue()
223 elif encoding == 'deflate':
224 payload = zlib.compress(payload)
225 elif encoding == 'unsupported':
226 payload = b'raw'
227 break
228 else:
229 self._status(415)
230 return
231 self.send_response(200)
232 self.send_header('Content-Encoding', encodings)
233 self.send_header('Content-Length', str(len(payload)))
234 self.end_headers()
235 self.wfile.write(payload)
236 elif self.path.startswith('/gen_'):
237 payload = b'<html></html>'
238 self.send_response(int(self.path[len('/gen_'):]))
239 self.send_header('Content-Type', 'text/html; charset=utf-8')
240 self.send_header('Content-Length', str(len(payload)))
241 self.end_headers()
242 self.wfile.write(payload)
243 elif self.path.startswith('/incompleteread'):
244 payload = b'<html></html>'
245 self.send_response(200)
246 self.send_header('Content-Type', 'text/html; charset=utf-8')
247 self.send_header('Content-Length', '234234')
248 self.end_headers()
249 self.wfile.write(payload)
250 self.finish()
251 elif self.path.startswith('/timeout_'):
252 time.sleep(int(self.path[len('/timeout_'):]))
253 self._headers()
254 elif self.path == '/source_address':
255 payload = str(self.client_address[0]).encode()
256 self.send_response(200)
257 self.send_header('Content-Type', 'text/html; charset=utf-8')
258 self.send_header('Content-Length', str(len(payload)))
259 self.end_headers()
260 self.wfile.write(payload)
261 self.finish()
262 else:
263 self._status(404)
264
265 def send_header(self, keyword, value):
266 """
267 Forcibly allow HTTP server to send non percent-encoded non-ASCII characters in headers.
268 This is against what is defined in RFC 3986, however we need to test we support this
269 since some sites incorrectly do this.
270 """
271 if keyword.lower() == 'connection':
272 return super().send_header(keyword, value)
273
274 if not hasattr(self, '_headers_buffer'):
275 self._headers_buffer = []
276
277 self._headers_buffer.append(f'{keyword}: {value}\r\n'.encode())
278
279
280 class TestRequestHandlerBase:
281 @classmethod
282 def setup_class(cls):
283 cls.http_httpd = http.server.ThreadingHTTPServer(
284 ('127.0.0.1', 0), HTTPTestRequestHandler)
285 cls.http_port = http_server_port(cls.http_httpd)
286 cls.http_server_thread = threading.Thread(target=cls.http_httpd.serve_forever)
287 # FIXME: we should probably stop the http server thread after each test
288 # See: https://github.com/yt-dlp/yt-dlp/pull/7094#discussion_r1199746041
289 cls.http_server_thread.daemon = True
290 cls.http_server_thread.start()
291
292 # HTTPS server
293 certfn = os.path.join(TEST_DIR, 'testcert.pem')
294 cls.https_httpd = http.server.ThreadingHTTPServer(
295 ('127.0.0.1', 0), HTTPTestRequestHandler)
296 sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
297 sslctx.load_cert_chain(certfn, None)
298 cls.https_httpd.socket = sslctx.wrap_socket(cls.https_httpd.socket, server_side=True)
299 cls.https_port = http_server_port(cls.https_httpd)
300 cls.https_server_thread = threading.Thread(target=cls.https_httpd.serve_forever)
301 cls.https_server_thread.daemon = True
302 cls.https_server_thread.start()
303
304
305 class TestHTTPRequestHandler(TestRequestHandlerBase):
306 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
307 def test_verify_cert(self, handler):
308 with handler() as rh:
309 with pytest.raises(CertificateVerifyError):
310 validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
311
312 with handler(verify=False) as rh:
313 r = validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
314 assert r.status == 200
315 r.close()
316
317 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
318 def test_ssl_error(self, handler):
319 # HTTPS server with too old TLS version
320 # XXX: is there a better way to test this than to create a new server?
321 https_httpd = http.server.ThreadingHTTPServer(
322 ('127.0.0.1', 0), HTTPTestRequestHandler)
323 sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
324 https_httpd.socket = sslctx.wrap_socket(https_httpd.socket, server_side=True)
325 https_port = http_server_port(https_httpd)
326 https_server_thread = threading.Thread(target=https_httpd.serve_forever)
327 https_server_thread.daemon = True
328 https_server_thread.start()
329
330 with handler(verify=False) as rh:
331 with pytest.raises(SSLError, match=r'ssl(?:v3|/tls) alert handshake failure') as exc_info:
332 validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers'))
333 assert not issubclass(exc_info.type, CertificateVerifyError)
334
335 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
336 def test_percent_encode(self, handler):
337 with handler() as rh:
338 # Unicode characters should be encoded with uppercase percent-encoding
339 res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/中文.html'))
340 assert res.status == 200
341 res.close()
342 # don't normalize existing percent encodings
343 res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/%c7%9f'))
344 assert res.status == 200
345 res.close()
346
347 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
348 def test_remove_dot_segments(self, handler):
349 with handler() as rh:
350 # This isn't a comprehensive test,
351 # but it should be enough to check whether the handler is removing dot segments
352 res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/a/b/./../../headers'))
353 assert res.status == 200
354 assert res.url == f'http://127.0.0.1:{self.http_port}/headers'
355 res.close()
356
357 res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_dotsegments'))
358 assert res.status == 200
359 assert res.url == f'http://127.0.0.1:{self.http_port}/headers'
360 res.close()
361
362 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
363 def test_unicode_path_redirection(self, handler):
364 with handler() as rh:
365 r = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/302-non-ascii-redirect'))
366 assert r.url == f'http://127.0.0.1:{self.http_port}/%E4%B8%AD%E6%96%87.html'
367 r.close()
368
369 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
370 def test_raise_http_error(self, handler):
371 with handler() as rh:
372 for bad_status in (400, 500, 599, 302):
373 with pytest.raises(HTTPError):
374 validate_and_send(rh, Request('http://127.0.0.1:%d/gen_%d' % (self.http_port, bad_status)))
375
376 # Should not raise an error
377 validate_and_send(rh, Request('http://127.0.0.1:%d/gen_200' % self.http_port)).close()
378
379 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
380 def test_response_url(self, handler):
381 with handler() as rh:
382 # Response url should be that of the last url in redirect chain
383 res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_301'))
384 assert res.url == f'http://127.0.0.1:{self.http_port}/method'
385 res.close()
386 res2 = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_200'))
387 assert res2.url == f'http://127.0.0.1:{self.http_port}/gen_200'
388 res2.close()
389
390 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
391 def test_redirect(self, handler):
392 with handler() as rh:
393 def do_req(redirect_status, method, assert_no_content=False):
394 data = b'testdata' if method in ('POST', 'PUT') else None
395 res = validate_and_send(
396 rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_{redirect_status}', method=method, data=data))
397
398 headers = b''
399 data_sent = b''
400 if data is not None:
401 data_sent += res.read(len(data))
402 if data_sent != data:
403 headers += data_sent
404 data_sent = b''
405
406 headers += res.read()
407
408 if assert_no_content or data is None:
409 assert b'Content-Type' not in headers
410 assert b'Content-Length' not in headers
411 else:
412 assert b'Content-Type' in headers
413 assert b'Content-Length' in headers
414
415 return data_sent.decode(), res.headers.get('method', '')
416
417 # A 303 must either use GET or HEAD for subsequent request
418 assert do_req(303, 'POST', True) == ('', 'GET')
419 assert do_req(303, 'HEAD') == ('', 'HEAD')
420
421 assert do_req(303, 'PUT', True) == ('', 'GET')
422
423 # 301 and 302 turn POST only into a GET
424 assert do_req(301, 'POST', True) == ('', 'GET')
425 assert do_req(301, 'HEAD') == ('', 'HEAD')
426 assert do_req(302, 'POST', True) == ('', 'GET')
427 assert do_req(302, 'HEAD') == ('', 'HEAD')
428
429 assert do_req(301, 'PUT') == ('testdata', 'PUT')
430 assert do_req(302, 'PUT') == ('testdata', 'PUT')
431
432 # 307 and 308 should not change method
433 for m in ('POST', 'PUT'):
434 assert do_req(307, m) == ('testdata', m)
435 assert do_req(308, m) == ('testdata', m)
436
437 assert do_req(307, 'HEAD') == ('', 'HEAD')
438 assert do_req(308, 'HEAD') == ('', 'HEAD')
439
440 # These should not redirect and instead raise an HTTPError
441 for code in (300, 304, 305, 306):
442 with pytest.raises(HTTPError):
443 do_req(code, 'GET')
444
445 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
446 def test_request_cookie_header(self, handler):
447 # We should accept a Cookie header being passed as in normal headers and handle it appropriately.
448 with handler() as rh:
449 # Specified Cookie header should be used
450 res = validate_and_send(
451 rh, Request(
452 f'http://127.0.0.1:{self.http_port}/headers',
453 headers={'Cookie': 'test=test'})).read().decode()
454 assert 'Cookie: test=test' in res
455
456 # Specified Cookie header should be removed on any redirect
457 res = validate_and_send(
458 rh, Request(
459 f'http://127.0.0.1:{self.http_port}/308-to-headers',
460 headers={'Cookie': 'test=test'})).read().decode()
461 assert 'Cookie: test=test' not in res
462
463 # Specified Cookie header should override global cookiejar for that request
464 cookiejar = YoutubeDLCookieJar()
465 cookiejar.set_cookie(http.cookiejar.Cookie(
466 version=0, name='test', value='ytdlp', port=None, port_specified=False,
467 domain='127.0.0.1', domain_specified=True, domain_initial_dot=False, path='/',
468 path_specified=True, secure=False, expires=None, discard=False, comment=None,
469 comment_url=None, rest={}))
470
471 with handler(cookiejar=cookiejar) as rh:
472 data = validate_and_send(
473 rh, Request(f'http://127.0.0.1:{self.http_port}/headers', headers={'cookie': 'test=test'})).read()
474 assert b'Cookie: test=ytdlp' not in data
475 assert b'Cookie: test=test' in data
476
477 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
478 def test_redirect_loop(self, handler):
479 with handler() as rh:
480 with pytest.raises(HTTPError, match='redirect loop'):
481 validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_loop'))
482
483 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
484 def test_incompleteread(self, handler):
485 with handler(timeout=2) as rh:
486 with pytest.raises(IncompleteRead):
487 validate_and_send(rh, Request('http://127.0.0.1:%d/incompleteread' % self.http_port)).read()
488
489 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
490 def test_cookies(self, handler):
491 cookiejar = YoutubeDLCookieJar()
492 cookiejar.set_cookie(http.cookiejar.Cookie(
493 0, 'test', 'ytdlp', None, False, '127.0.0.1', True,
494 False, '/headers', True, False, None, False, None, None, {}))
495
496 with handler(cookiejar=cookiejar) as rh:
497 data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read()
498 assert b'Cookie: test=ytdlp' in data
499
500 # Per request
501 with handler() as rh:
502 data = validate_and_send(
503 rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions={'cookiejar': cookiejar})).read()
504 assert b'Cookie: test=ytdlp' in data
505
506 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
507 def test_headers(self, handler):
508
509 with handler(headers=HTTPHeaderDict({'test1': 'test', 'test2': 'test2'})) as rh:
510 # Global Headers
511 data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read()
512 assert b'Test1: test' in data
513
514 # Per request headers, merged with global
515 data = validate_and_send(rh, Request(
516 f'http://127.0.0.1:{self.http_port}/headers', headers={'test2': 'changed', 'test3': 'test3'})).read()
517 assert b'Test1: test' in data
518 assert b'Test2: changed' in data
519 assert b'Test2: test2' not in data
520 assert b'Test3: test3' in data
521
522 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
523 def test_timeout(self, handler):
524 with handler() as rh:
525 # Default timeout is 20 seconds, so this should go through
526 validate_and_send(
527 rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_3'))
528
529 with handler(timeout=0.5) as rh:
530 with pytest.raises(TransportError):
531 validate_and_send(
532 rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1'))
533
534 # Per request timeout, should override handler timeout
535 validate_and_send(
536 rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1', extensions={'timeout': 4}))
537
538 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
539 def test_source_address(self, handler):
540 source_address = f'127.0.0.{random.randint(5, 255)}'
541 # on some systems these loopback addresses we need for testing may not be available
542 # see: https://github.com/yt-dlp/yt-dlp/issues/8890
543 verify_address_availability(source_address)
544 with handler(source_address=source_address) as rh:
545 data = validate_and_send(
546 rh, Request(f'http://127.0.0.1:{self.http_port}/source_address')).read().decode()
547 assert source_address == data
548
549 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
550 def test_gzip_trailing_garbage(self, handler):
551 with handler() as rh:
552 data = validate_and_send(rh, Request(f'http://localhost:{self.http_port}/trailing_garbage')).read().decode()
553 assert data == '<html><video src="/vid.mp4" /></html>'
554
555 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
556 @pytest.mark.skipif(not brotli, reason='brotli support is not installed')
557 def test_brotli(self, handler):
558 with handler() as rh:
559 res = validate_and_send(
560 rh, Request(
561 f'http://127.0.0.1:{self.http_port}/content-encoding',
562 headers={'ytdl-encoding': 'br'}))
563 assert res.headers.get('Content-Encoding') == 'br'
564 assert res.read() == b'<html><video src="/vid.mp4" /></html>'
565
566 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
567 def test_deflate(self, handler):
568 with handler() as rh:
569 res = validate_and_send(
570 rh, Request(
571 f'http://127.0.0.1:{self.http_port}/content-encoding',
572 headers={'ytdl-encoding': 'deflate'}))
573 assert res.headers.get('Content-Encoding') == 'deflate'
574 assert res.read() == b'<html><video src="/vid.mp4" /></html>'
575
576 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
577 def test_gzip(self, handler):
578 with handler() as rh:
579 res = validate_and_send(
580 rh, Request(
581 f'http://127.0.0.1:{self.http_port}/content-encoding',
582 headers={'ytdl-encoding': 'gzip'}))
583 assert res.headers.get('Content-Encoding') == 'gzip'
584 assert res.read() == b'<html><video src="/vid.mp4" /></html>'
585
586 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
587 def test_multiple_encodings(self, handler):
588 with handler() as rh:
589 for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'):
590 res = validate_and_send(
591 rh, Request(
592 f'http://127.0.0.1:{self.http_port}/content-encoding',
593 headers={'ytdl-encoding': pair}))
594 assert res.headers.get('Content-Encoding') == pair
595 assert res.read() == b'<html><video src="/vid.mp4" /></html>'
596
597 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
598 def test_unsupported_encoding(self, handler):
599 with handler() as rh:
600 res = validate_and_send(
601 rh, Request(
602 f'http://127.0.0.1:{self.http_port}/content-encoding',
603 headers={'ytdl-encoding': 'unsupported'}))
604 assert res.headers.get('Content-Encoding') == 'unsupported'
605 assert res.read() == b'raw'
606
607 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
608 def test_read(self, handler):
609 with handler() as rh:
610 res = validate_and_send(
611 rh, Request(f'http://127.0.0.1:{self.http_port}/headers'))
612 assert res.readable()
613 assert res.read(1) == b'H'
614 assert res.read(3) == b'ost'
615
616
617 class TestHTTPProxy(TestRequestHandlerBase):
618 @classmethod
619 def setup_class(cls):
620 super().setup_class()
621 # HTTP Proxy server
622 cls.proxy = http.server.ThreadingHTTPServer(
623 ('127.0.0.1', 0), _build_proxy_handler('normal'))
624 cls.proxy_port = http_server_port(cls.proxy)
625 cls.proxy_thread = threading.Thread(target=cls.proxy.serve_forever)
626 cls.proxy_thread.daemon = True
627 cls.proxy_thread.start()
628
629 # Geo proxy server
630 cls.geo_proxy = http.server.ThreadingHTTPServer(
631 ('127.0.0.1', 0), _build_proxy_handler('geo'))
632 cls.geo_port = http_server_port(cls.geo_proxy)
633 cls.geo_proxy_thread = threading.Thread(target=cls.geo_proxy.serve_forever)
634 cls.geo_proxy_thread.daemon = True
635 cls.geo_proxy_thread.start()
636
637 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
638 def test_http_proxy(self, handler):
639 http_proxy = f'http://127.0.0.1:{self.proxy_port}'
640 geo_proxy = f'http://127.0.0.1:{self.geo_port}'
641
642 # Test global http proxy
643 # Test per request http proxy
644 # Test per request http proxy disables proxy
645 url = 'http://foo.com/bar'
646
647 # Global HTTP proxy
648 with handler(proxies={'http': http_proxy}) as rh:
649 res = validate_and_send(rh, Request(url)).read().decode()
650 assert res == f'normal: {url}'
651
652 # Per request proxy overrides global
653 res = validate_and_send(rh, Request(url, proxies={'http': geo_proxy})).read().decode()
654 assert res == f'geo: {url}'
655
656 # and setting to None disables all proxies for that request
657 real_url = f'http://127.0.0.1:{self.http_port}/headers'
658 res = validate_and_send(
659 rh, Request(real_url, proxies={'http': None})).read().decode()
660 assert res != f'normal: {real_url}'
661 assert 'Accept' in res
662
663 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
664 def test_noproxy(self, handler):
665 with handler(proxies={'proxy': f'http://127.0.0.1:{self.proxy_port}'}) as rh:
666 # NO_PROXY
667 for no_proxy in (f'127.0.0.1:{self.http_port}', '127.0.0.1', 'localhost'):
668 nop_response = validate_and_send(
669 rh, Request(f'http://127.0.0.1:{self.http_port}/headers', proxies={'no': no_proxy})).read().decode(
670 'utf-8')
671 assert 'Accept' in nop_response
672
673 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
674 def test_allproxy(self, handler):
675 url = 'http://foo.com/bar'
676 with handler() as rh:
677 response = validate_and_send(rh, Request(url, proxies={'all': f'http://127.0.0.1:{self.proxy_port}'})).read().decode(
678 'utf-8')
679 assert response == f'normal: {url}'
680
681 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
682 def test_http_proxy_with_idn(self, handler):
683 with handler(proxies={
684 'http': f'http://127.0.0.1:{self.proxy_port}',
685 }) as rh:
686 url = 'http://中文.tw/'
687 response = rh.send(Request(url)).read().decode()
688 # b'xn--fiq228c' is '中文'.encode('idna')
689 assert response == 'normal: http://xn--fiq228c.tw/'
690
691
692 class TestClientCertificate:
693
694 @classmethod
695 def setup_class(cls):
696 certfn = os.path.join(TEST_DIR, 'testcert.pem')
697 cls.certdir = os.path.join(TEST_DIR, 'testdata', 'certificate')
698 cacertfn = os.path.join(cls.certdir, 'ca.crt')
699 cls.httpd = http.server.ThreadingHTTPServer(('127.0.0.1', 0), HTTPTestRequestHandler)
700 sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
701 sslctx.verify_mode = ssl.CERT_REQUIRED
702 sslctx.load_verify_locations(cafile=cacertfn)
703 sslctx.load_cert_chain(certfn, None)
704 cls.httpd.socket = sslctx.wrap_socket(cls.httpd.socket, server_side=True)
705 cls.port = http_server_port(cls.httpd)
706 cls.server_thread = threading.Thread(target=cls.httpd.serve_forever)
707 cls.server_thread.daemon = True
708 cls.server_thread.start()
709
710 def _run_test(self, handler, **handler_kwargs):
711 with handler(
712 # Disable client-side validation of unacceptable self-signed testcert.pem
713 # The test is of a check on the server side, so unaffected
714 verify=False,
715 **handler_kwargs,
716 ) as rh:
717 validate_and_send(rh, Request(f'https://127.0.0.1:{self.port}/video.html')).read().decode()
718
719 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
720 def test_certificate_combined_nopass(self, handler):
721 self._run_test(handler, client_cert={
722 'client_certificate': os.path.join(self.certdir, 'clientwithkey.crt'),
723 })
724
725 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
726 def test_certificate_nocombined_nopass(self, handler):
727 self._run_test(handler, client_cert={
728 'client_certificate': os.path.join(self.certdir, 'client.crt'),
729 'client_certificate_key': os.path.join(self.certdir, 'client.key'),
730 })
731
732 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
733 def test_certificate_combined_pass(self, handler):
734 self._run_test(handler, client_cert={
735 'client_certificate': os.path.join(self.certdir, 'clientwithencryptedkey.crt'),
736 'client_certificate_password': 'foobar',
737 })
738
739 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
740 def test_certificate_nocombined_pass(self, handler):
741 self._run_test(handler, client_cert={
742 'client_certificate': os.path.join(self.certdir, 'client.crt'),
743 'client_certificate_key': os.path.join(self.certdir, 'clientencrypted.key'),
744 'client_certificate_password': 'foobar',
745 })
746
747
748 class TestUrllibRequestHandler(TestRequestHandlerBase):
749 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
750 def test_file_urls(self, handler):
751 # See https://github.com/ytdl-org/youtube-dl/issues/8227
752 tf = tempfile.NamedTemporaryFile(delete=False)
753 tf.write(b'foobar')
754 tf.close()
755 req = Request(pathlib.Path(tf.name).as_uri())
756 with handler() as rh:
757 with pytest.raises(UnsupportedRequest):
758 rh.validate(req)
759
760 # Test that urllib never loaded FileHandler
761 with pytest.raises(TransportError):
762 rh.send(req)
763
764 with handler(enable_file_urls=True) as rh:
765 res = validate_and_send(rh, req)
766 assert res.read() == b'foobar'
767 res.close()
768
769 os.unlink(tf.name)
770
771 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
772 def test_http_error_returns_content(self, handler):
773 # urllib HTTPError will try close the underlying response if reference to the HTTPError object is lost
774 def get_response():
775 with handler() as rh:
776 # headers url
777 try:
778 validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_404'))
779 except HTTPError as e:
780 return e.response
781
782 assert get_response().read() == b'<html></html>'
783
784 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
785 def test_verify_cert_error_text(self, handler):
786 # Check the output of the error message
787 with handler() as rh:
788 with pytest.raises(
789 CertificateVerifyError,
790 match=r'\[SSL: CERTIFICATE_VERIFY_FAILED\] certificate verify failed: self.signed certificate'
791 ):
792 validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
793
794 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
795 @pytest.mark.parametrize('req,match,version_check', [
796 # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1256
797 # bpo-39603: Check implemented in 3.7.9+, 3.8.5+
798 (
799 Request('http://127.0.0.1', method='GET\n'),
800 'method can\'t contain control characters',
801 lambda v: v < (3, 7, 9) or (3, 8, 0) <= v < (3, 8, 5)
802 ),
803 # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1265
804 # bpo-38576: Check implemented in 3.7.8+, 3.8.3+
805 (
806 Request('http://127.0.0. 1', method='GET'),
807 'URL can\'t contain control characters',
808 lambda v: v < (3, 7, 8) or (3, 8, 0) <= v < (3, 8, 3)
809 ),
810 # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1288C31-L1288C50
811 (Request('http://127.0.0.1', headers={'foo\n': 'bar'}), 'Invalid header name', None),
812 ])
813 def test_httplib_validation_errors(self, handler, req, match, version_check):
814 if version_check and version_check(sys.version_info):
815 pytest.skip(f'Python {sys.version} version does not have the required validation for this test.')
816
817 with handler() as rh:
818 with pytest.raises(RequestError, match=match) as exc_info:
819 validate_and_send(rh, req)
820 assert not isinstance(exc_info.value, TransportError)
821
822
823 class TestRequestsRequestHandler(TestRequestHandlerBase):
824 @pytest.mark.parametrize('raised,expected', [
825 (lambda: requests.exceptions.ConnectTimeout(), TransportError),
826 (lambda: requests.exceptions.ReadTimeout(), TransportError),
827 (lambda: requests.exceptions.Timeout(), TransportError),
828 (lambda: requests.exceptions.ConnectionError(), TransportError),
829 (lambda: requests.exceptions.ProxyError(), ProxyError),
830 (lambda: requests.exceptions.SSLError('12[CERTIFICATE_VERIFY_FAILED]34'), CertificateVerifyError),
831 (lambda: requests.exceptions.SSLError(), SSLError),
832 (lambda: requests.exceptions.InvalidURL(), RequestError),
833 (lambda: requests.exceptions.InvalidHeader(), RequestError),
834 # catch-all: https://github.com/psf/requests/blob/main/src/requests/adapters.py#L535
835 (lambda: urllib3.exceptions.HTTPError(), TransportError),
836 (lambda: requests.exceptions.RequestException(), RequestError)
837 # (lambda: requests.exceptions.TooManyRedirects(), HTTPError) - Needs a response object
838 ])
839 @pytest.mark.parametrize('handler', ['Requests'], indirect=True)
840 def test_request_error_mapping(self, handler, monkeypatch, raised, expected):
841 with handler() as rh:
842 def mock_get_instance(*args, **kwargs):
843 class MockSession:
844 def request(self, *args, **kwargs):
845 raise raised()
846 return MockSession()
847
848 monkeypatch.setattr(rh, '_get_instance', mock_get_instance)
849
850 with pytest.raises(expected) as exc_info:
851 rh.send(Request('http://fake'))
852
853 assert exc_info.type is expected
854
855 @pytest.mark.parametrize('raised,expected,match', [
856 (lambda: urllib3.exceptions.SSLError(), SSLError, None),
857 (lambda: urllib3.exceptions.TimeoutError(), TransportError, None),
858 (lambda: urllib3.exceptions.ReadTimeoutError(None, None, None), TransportError, None),
859 (lambda: urllib3.exceptions.ProtocolError(), TransportError, None),
860 (lambda: urllib3.exceptions.DecodeError(), TransportError, None),
861 (lambda: urllib3.exceptions.HTTPError(), TransportError, None), # catch-all
862 (
863 lambda: urllib3.exceptions.ProtocolError('error', http.client.IncompleteRead(partial=b'abc', expected=4)),
864 IncompleteRead,
865 '3 bytes read, 4 more expected'
866 ),
867 (
868 lambda: urllib3.exceptions.ProtocolError('error', urllib3.exceptions.IncompleteRead(partial=3, expected=5)),
869 IncompleteRead,
870 '3 bytes read, 5 more expected'
871 ),
872 ])
873 @pytest.mark.parametrize('handler', ['Requests'], indirect=True)
874 def test_response_error_mapping(self, handler, monkeypatch, raised, expected, match):
875 from requests.models import Response as RequestsResponse
876 from urllib3.response import HTTPResponse as Urllib3Response
877
878 from yt_dlp.networking._requests import RequestsResponseAdapter
879 requests_res = RequestsResponse()
880 requests_res.raw = Urllib3Response(body=b'', status=200)
881 res = RequestsResponseAdapter(requests_res)
882
883 def mock_read(*args, **kwargs):
884 raise raised()
885 monkeypatch.setattr(res.fp, 'read', mock_read)
886
887 with pytest.raises(expected, match=match) as exc_info:
888 res.read()
889
890 assert exc_info.type is expected
891
892
893 def run_validation(handler, error, req, **handler_kwargs):
894 with handler(**handler_kwargs) as rh:
895 if error:
896 with pytest.raises(error):
897 rh.validate(req)
898 else:
899 rh.validate(req)
900
901
902 class TestRequestHandlerValidation:
903
904 class ValidationRH(RequestHandler):
905 def _send(self, request):
906 raise RequestError('test')
907
908 class NoCheckRH(ValidationRH):
909 _SUPPORTED_FEATURES = None
910 _SUPPORTED_PROXY_SCHEMES = None
911 _SUPPORTED_URL_SCHEMES = None
912
913 def _check_extensions(self, extensions):
914 extensions.clear()
915
916 class HTTPSupportedRH(ValidationRH):
917 _SUPPORTED_URL_SCHEMES = ('http',)
918
919 URL_SCHEME_TESTS = [
920 # scheme, expected to fail, handler kwargs
921 ('Urllib', [
922 ('http', False, {}),
923 ('https', False, {}),
924 ('data', False, {}),
925 ('ftp', False, {}),
926 ('file', UnsupportedRequest, {}),
927 ('file', False, {'enable_file_urls': True}),
928 ]),
929 ('Requests', [
930 ('http', False, {}),
931 ('https', False, {}),
932 ]),
933 ('Websockets', [
934 ('ws', False, {}),
935 ('wss', False, {}),
936 ]),
937 (NoCheckRH, [('http', False, {})]),
938 (ValidationRH, [('http', UnsupportedRequest, {})])
939 ]
940
941 PROXY_SCHEME_TESTS = [
942 # scheme, expected to fail
943 ('Urllib', 'http', [
944 ('http', False),
945 ('https', UnsupportedRequest),
946 ('socks4', False),
947 ('socks4a', False),
948 ('socks5', False),
949 ('socks5h', False),
950 ('socks', UnsupportedRequest),
951 ]),
952 ('Requests', 'http', [
953 ('http', False),
954 ('https', False),
955 ('socks4', False),
956 ('socks4a', False),
957 ('socks5', False),
958 ('socks5h', False),
959 ]),
960 (NoCheckRH, 'http', [('http', False)]),
961 (HTTPSupportedRH, 'http', [('http', UnsupportedRequest)]),
962 ('Websockets', 'ws', [('http', UnsupportedRequest)]),
963 (NoCheckRH, 'http', [('http', False)]),
964 (HTTPSupportedRH, 'http', [('http', UnsupportedRequest)]),
965 ]
966
967 PROXY_KEY_TESTS = [
968 # key, expected to fail
969 ('Urllib', [
970 ('all', False),
971 ('unrelated', False),
972 ]),
973 ('Requests', [
974 ('all', False),
975 ('unrelated', False),
976 ]),
977 (NoCheckRH, [('all', False)]),
978 (HTTPSupportedRH, [('all', UnsupportedRequest)]),
979 (HTTPSupportedRH, [('no', UnsupportedRequest)]),
980 ]
981
982 EXTENSION_TESTS = [
983 ('Urllib', 'http', [
984 ({'cookiejar': 'notacookiejar'}, AssertionError),
985 ({'cookiejar': YoutubeDLCookieJar()}, False),
986 ({'cookiejar': CookieJar()}, AssertionError),
987 ({'timeout': 1}, False),
988 ({'timeout': 'notatimeout'}, AssertionError),
989 ({'unsupported': 'value'}, UnsupportedRequest),
990 ]),
991 ('Requests', 'http', [
992 ({'cookiejar': 'notacookiejar'}, AssertionError),
993 ({'cookiejar': YoutubeDLCookieJar()}, False),
994 ({'timeout': 1}, False),
995 ({'timeout': 'notatimeout'}, AssertionError),
996 ({'unsupported': 'value'}, UnsupportedRequest),
997 ]),
998 (NoCheckRH, 'http', [
999 ({'cookiejar': 'notacookiejar'}, False),
1000 ({'somerandom': 'test'}, False), # but any extension is allowed through
1001 ]),
1002 ('Websockets', 'ws', [
1003 ({'cookiejar': YoutubeDLCookieJar()}, False),
1004 ({'timeout': 2}, False),
1005 ]),
1006 ]
1007
1008 @pytest.mark.parametrize('handler,scheme,fail,handler_kwargs', [
1009 (handler_tests[0], scheme, fail, handler_kwargs)
1010 for handler_tests in URL_SCHEME_TESTS
1011 for scheme, fail, handler_kwargs in handler_tests[1]
1012
1013 ], indirect=['handler'])
1014 def test_url_scheme(self, handler, scheme, fail, handler_kwargs):
1015 run_validation(handler, fail, Request(f'{scheme}://'), **(handler_kwargs or {}))
1016
1017 @pytest.mark.parametrize('handler,fail', [('Urllib', False), ('Requests', False)], indirect=['handler'])
1018 def test_no_proxy(self, handler, fail):
1019 run_validation(handler, fail, Request('http://', proxies={'no': '127.0.0.1,github.com'}))
1020 run_validation(handler, fail, Request('http://'), proxies={'no': '127.0.0.1,github.com'})
1021
1022 @pytest.mark.parametrize('handler,proxy_key,fail', [
1023 (handler_tests[0], proxy_key, fail)
1024 for handler_tests in PROXY_KEY_TESTS
1025 for proxy_key, fail in handler_tests[1]
1026 ], indirect=['handler'])
1027 def test_proxy_key(self, handler, proxy_key, fail):
1028 run_validation(handler, fail, Request('http://', proxies={proxy_key: 'http://example.com'}))
1029 run_validation(handler, fail, Request('http://'), proxies={proxy_key: 'http://example.com'})
1030
1031 @pytest.mark.parametrize('handler,req_scheme,scheme,fail', [
1032 (handler_tests[0], handler_tests[1], scheme, fail)
1033 for handler_tests in PROXY_SCHEME_TESTS
1034 for scheme, fail in handler_tests[2]
1035 ], indirect=['handler'])
1036 def test_proxy_scheme(self, handler, req_scheme, scheme, fail):
1037 run_validation(handler, fail, Request(f'{req_scheme}://', proxies={req_scheme: f'{scheme}://example.com'}))
1038 run_validation(handler, fail, Request(f'{req_scheme}://'), proxies={req_scheme: f'{scheme}://example.com'})
1039
1040 @pytest.mark.parametrize('handler', ['Urllib', HTTPSupportedRH, 'Requests'], indirect=True)
1041 def test_empty_proxy(self, handler):
1042 run_validation(handler, False, Request('http://', proxies={'http': None}))
1043 run_validation(handler, False, Request('http://'), proxies={'http': None})
1044
1045 @pytest.mark.parametrize('proxy_url', ['//example.com', 'example.com', '127.0.0.1', '/a/b/c'])
1046 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
1047 def test_invalid_proxy_url(self, handler, proxy_url):
1048 run_validation(handler, UnsupportedRequest, Request('http://', proxies={'http': proxy_url}))
1049
1050 @pytest.mark.parametrize('handler,scheme,extensions,fail', [
1051 (handler_tests[0], handler_tests[1], extensions, fail)
1052 for handler_tests in EXTENSION_TESTS
1053 for extensions, fail in handler_tests[2]
1054 ], indirect=['handler'])
1055 def test_extension(self, handler, scheme, extensions, fail):
1056 run_validation(
1057 handler, fail, Request(f'{scheme}://', extensions=extensions))
1058
1059 def test_invalid_request_type(self):
1060 rh = self.ValidationRH(logger=FakeLogger())
1061 for method in (rh.validate, rh.send):
1062 with pytest.raises(TypeError, match='Expected an instance of Request'):
1063 method('not a request')
1064
1065
1066 class FakeResponse(Response):
1067 def __init__(self, request):
1068 # XXX: we could make request part of standard response interface
1069 self.request = request
1070 super().__init__(fp=io.BytesIO(b''), headers={}, url=request.url)
1071
1072
1073 class FakeRH(RequestHandler):
1074
1075 def _validate(self, request):
1076 return
1077
1078 def _send(self, request: Request):
1079 if request.url.startswith('ssl://'):
1080 raise SSLError(request.url[len('ssl://'):])
1081 return FakeResponse(request)
1082
1083
1084 class FakeRHYDL(FakeYDL):
1085 def __init__(self, *args, **kwargs):
1086 super().__init__(*args, **kwargs)
1087 self._request_director = self.build_request_director([FakeRH])
1088
1089
1090 class AllUnsupportedRHYDL(FakeYDL):
1091
1092 def __init__(self, *args, **kwargs):
1093
1094 class UnsupportedRH(RequestHandler):
1095 def _send(self, request: Request):
1096 pass
1097
1098 _SUPPORTED_FEATURES = ()
1099 _SUPPORTED_PROXY_SCHEMES = ()
1100 _SUPPORTED_URL_SCHEMES = ()
1101
1102 super().__init__(*args, **kwargs)
1103 self._request_director = self.build_request_director([UnsupportedRH])
1104
1105
1106 class TestRequestDirector:
1107
1108 def test_handler_operations(self):
1109 director = RequestDirector(logger=FakeLogger())
1110 handler = FakeRH(logger=FakeLogger())
1111 director.add_handler(handler)
1112 assert director.handlers.get(FakeRH.RH_KEY) is handler
1113
1114 # Handler should overwrite
1115 handler2 = FakeRH(logger=FakeLogger())
1116 director.add_handler(handler2)
1117 assert director.handlers.get(FakeRH.RH_KEY) is not handler
1118 assert director.handlers.get(FakeRH.RH_KEY) is handler2
1119 assert len(director.handlers) == 1
1120
1121 class AnotherFakeRH(FakeRH):
1122 pass
1123 director.add_handler(AnotherFakeRH(logger=FakeLogger()))
1124 assert len(director.handlers) == 2
1125 assert director.handlers.get(AnotherFakeRH.RH_KEY).RH_KEY == AnotherFakeRH.RH_KEY
1126
1127 director.handlers.pop(FakeRH.RH_KEY, None)
1128 assert director.handlers.get(FakeRH.RH_KEY) is None
1129 assert len(director.handlers) == 1
1130
1131 # RequestErrors should passthrough
1132 with pytest.raises(SSLError):
1133 director.send(Request('ssl://something'))
1134
1135 def test_send(self):
1136 director = RequestDirector(logger=FakeLogger())
1137 with pytest.raises(RequestError):
1138 director.send(Request('any://'))
1139 director.add_handler(FakeRH(logger=FakeLogger()))
1140 assert isinstance(director.send(Request('http://')), FakeResponse)
1141
1142 def test_unsupported_handlers(self):
1143 class SupportedRH(RequestHandler):
1144 _SUPPORTED_URL_SCHEMES = ['http']
1145
1146 def _send(self, request: Request):
1147 return Response(fp=io.BytesIO(b'supported'), headers={}, url=request.url)
1148
1149 director = RequestDirector(logger=FakeLogger())
1150 director.add_handler(SupportedRH(logger=FakeLogger()))
1151 director.add_handler(FakeRH(logger=FakeLogger()))
1152
1153 # First should take preference
1154 assert director.send(Request('http://')).read() == b'supported'
1155 assert director.send(Request('any://')).read() == b''
1156
1157 director.handlers.pop(FakeRH.RH_KEY)
1158 with pytest.raises(NoSupportingHandlers):
1159 director.send(Request('any://'))
1160
1161 def test_unexpected_error(self):
1162 director = RequestDirector(logger=FakeLogger())
1163
1164 class UnexpectedRH(FakeRH):
1165 def _send(self, request: Request):
1166 raise TypeError('something')
1167
1168 director.add_handler(UnexpectedRH(logger=FakeLogger))
1169 with pytest.raises(NoSupportingHandlers, match=r'1 unexpected error'):
1170 director.send(Request('any://'))
1171
1172 director.handlers.clear()
1173 assert len(director.handlers) == 0
1174
1175 # Should not be fatal
1176 director.add_handler(FakeRH(logger=FakeLogger()))
1177 director.add_handler(UnexpectedRH(logger=FakeLogger))
1178 assert director.send(Request('any://'))
1179
1180 def test_preference(self):
1181 director = RequestDirector(logger=FakeLogger())
1182 director.add_handler(FakeRH(logger=FakeLogger()))
1183
1184 class SomeRH(RequestHandler):
1185 _SUPPORTED_URL_SCHEMES = ['http']
1186
1187 def _send(self, request: Request):
1188 return Response(fp=io.BytesIO(b'supported'), headers={}, url=request.url)
1189
1190 def some_preference(rh, request):
1191 return (0 if not isinstance(rh, SomeRH)
1192 else 100 if 'prefer' in request.headers
1193 else -1)
1194
1195 director.add_handler(SomeRH(logger=FakeLogger()))
1196 director.preferences.add(some_preference)
1197
1198 assert director.send(Request('http://')).read() == b''
1199 assert director.send(Request('http://', headers={'prefer': '1'})).read() == b'supported'
1200
1201
1202 # XXX: do we want to move this to test_YoutubeDL.py?
1203 class TestYoutubeDLNetworking:
1204
1205 @staticmethod
1206 def build_handler(ydl, handler: RequestHandler = FakeRH):
1207 return ydl.build_request_director([handler]).handlers.get(handler.RH_KEY)
1208
1209 def test_compat_opener(self):
1210 with FakeYDL() as ydl:
1211 with warnings.catch_warnings():
1212 warnings.simplefilter('ignore', category=DeprecationWarning)
1213 assert isinstance(ydl._opener, urllib.request.OpenerDirector)
1214
1215 @pytest.mark.parametrize('proxy,expected', [
1216 ('http://127.0.0.1:8080', {'all': 'http://127.0.0.1:8080'}),
1217 ('', {'all': '__noproxy__'}),
1218 (None, {'http': 'http://127.0.0.1:8081', 'https': 'http://127.0.0.1:8081'}) # env, set https
1219 ])
1220 def test_proxy(self, proxy, expected):
1221 old_http_proxy = os.environ.get('HTTP_PROXY')
1222 try:
1223 os.environ['HTTP_PROXY'] = 'http://127.0.0.1:8081' # ensure that provided proxies override env
1224 with FakeYDL({'proxy': proxy}) as ydl:
1225 assert ydl.proxies == expected
1226 finally:
1227 if old_http_proxy:
1228 os.environ['HTTP_PROXY'] = old_http_proxy
1229
1230 def test_compat_request(self):
1231 with FakeRHYDL() as ydl:
1232 assert ydl.urlopen('test://')
1233 urllib_req = urllib.request.Request('http://foo.bar', data=b'test', method='PUT', headers={'X-Test': '1'})
1234 urllib_req.add_unredirected_header('Cookie', 'bob=bob')
1235 urllib_req.timeout = 2
1236 with warnings.catch_warnings():
1237 warnings.simplefilter('ignore', category=DeprecationWarning)
1238 req = ydl.urlopen(urllib_req).request
1239 assert req.url == urllib_req.get_full_url()
1240 assert req.data == urllib_req.data
1241 assert req.method == urllib_req.get_method()
1242 assert 'X-Test' in req.headers
1243 assert 'Cookie' in req.headers
1244 assert req.extensions.get('timeout') == 2
1245
1246 with pytest.raises(AssertionError):
1247 ydl.urlopen(None)
1248
1249 def test_extract_basic_auth(self):
1250 with FakeRHYDL() as ydl:
1251 res = ydl.urlopen(Request('http://user:pass@foo.bar'))
1252 assert res.request.headers['Authorization'] == 'Basic dXNlcjpwYXNz'
1253
1254 def test_sanitize_url(self):
1255 with FakeRHYDL() as ydl:
1256 res = ydl.urlopen(Request('httpss://foo.bar'))
1257 assert res.request.url == 'https://foo.bar'
1258
1259 def test_file_urls_error(self):
1260 # use urllib handler
1261 with FakeYDL() as ydl:
1262 with pytest.raises(RequestError, match=r'file:// URLs are disabled by default'):
1263 ydl.urlopen('file://')
1264
1265 @pytest.mark.parametrize('scheme', (['ws', 'wss']))
1266 def test_websocket_unavailable_error(self, scheme):
1267 with AllUnsupportedRHYDL() as ydl:
1268 with pytest.raises(RequestError, match=r'This request requires WebSocket support'):
1269 ydl.urlopen(f'{scheme}://')
1270
1271 def test_legacy_server_connect_error(self):
1272 with FakeRHYDL() as ydl:
1273 for error in ('UNSAFE_LEGACY_RENEGOTIATION_DISABLED', 'SSLV3_ALERT_HANDSHAKE_FAILURE'):
1274 with pytest.raises(RequestError, match=r'Try using --legacy-server-connect'):
1275 ydl.urlopen(f'ssl://{error}')
1276
1277 with pytest.raises(SSLError, match='testerror'):
1278 ydl.urlopen('ssl://testerror')
1279
1280 @pytest.mark.parametrize('proxy_key,proxy_url,expected', [
1281 ('http', '__noproxy__', None),
1282 ('no', '127.0.0.1,foo.bar', '127.0.0.1,foo.bar'),
1283 ('https', 'example.com', 'http://example.com'),
1284 ('https', '//example.com', 'http://example.com'),
1285 ('https', 'socks5://example.com', 'socks5h://example.com'),
1286 ('http', 'socks://example.com', 'socks4://example.com'),
1287 ('http', 'socks4://example.com', 'socks4://example.com'),
1288 ('unrelated', '/bad/proxy', '/bad/proxy'), # clean_proxies should ignore bad proxies
1289 ])
1290 def test_clean_proxy(self, proxy_key, proxy_url, expected):
1291 # proxies should be cleaned in urlopen()
1292 with FakeRHYDL() as ydl:
1293 req = ydl.urlopen(Request('test://', proxies={proxy_key: proxy_url})).request
1294 assert req.proxies[proxy_key] == expected
1295
1296 # and should also be cleaned when building the handler
1297 env_key = f'{proxy_key.upper()}_PROXY'
1298 old_env_proxy = os.environ.get(env_key)
1299 try:
1300 os.environ[env_key] = proxy_url # ensure that provided proxies override env
1301 with FakeYDL() as ydl:
1302 rh = self.build_handler(ydl)
1303 assert rh.proxies[proxy_key] == expected
1304 finally:
1305 if old_env_proxy:
1306 os.environ[env_key] = old_env_proxy
1307
1308 def test_clean_proxy_header(self):
1309 with FakeRHYDL() as ydl:
1310 req = ydl.urlopen(Request('test://', headers={'ytdl-request-proxy': '//foo.bar'})).request
1311 assert 'ytdl-request-proxy' not in req.headers
1312 assert req.proxies == {'all': 'http://foo.bar'}
1313
1314 with FakeYDL({'http_headers': {'ytdl-request-proxy': '//foo.bar'}}) as ydl:
1315 rh = self.build_handler(ydl)
1316 assert 'ytdl-request-proxy' not in rh.headers
1317 assert rh.proxies == {'all': 'http://foo.bar'}
1318
1319 def test_clean_header(self):
1320 with FakeRHYDL() as ydl:
1321 res = ydl.urlopen(Request('test://', headers={'Youtubedl-no-compression': True}))
1322 assert 'Youtubedl-no-compression' not in res.request.headers
1323 assert res.request.headers.get('Accept-Encoding') == 'identity'
1324
1325 with FakeYDL({'http_headers': {'Youtubedl-no-compression': True}}) as ydl:
1326 rh = self.build_handler(ydl)
1327 assert 'Youtubedl-no-compression' not in rh.headers
1328 assert rh.headers.get('Accept-Encoding') == 'identity'
1329
1330 with FakeYDL({'http_headers': {'Ytdl-socks-proxy': 'socks://localhost:1080'}}) as ydl:
1331 rh = self.build_handler(ydl)
1332 assert 'Ytdl-socks-proxy' not in rh.headers
1333
1334 def test_build_handler_params(self):
1335 with FakeYDL({
1336 'http_headers': {'test': 'testtest'},
1337 'socket_timeout': 2,
1338 'proxy': 'http://127.0.0.1:8080',
1339 'source_address': '127.0.0.45',
1340 'debug_printtraffic': True,
1341 'compat_opts': ['no-certifi'],
1342 'nocheckcertificate': True,
1343 'legacyserverconnect': True,
1344 }) as ydl:
1345 rh = self.build_handler(ydl)
1346 assert rh.headers.get('test') == 'testtest'
1347 assert 'Accept' in rh.headers # ensure std_headers are still there
1348 assert rh.timeout == 2
1349 assert rh.proxies.get('all') == 'http://127.0.0.1:8080'
1350 assert rh.source_address == '127.0.0.45'
1351 assert rh.verbose is True
1352 assert rh.prefer_system_certs is True
1353 assert rh.verify is False
1354 assert rh.legacy_ssl_support is True
1355
1356 @pytest.mark.parametrize('ydl_params', [
1357 {'client_certificate': 'fakecert.crt'},
1358 {'client_certificate': 'fakecert.crt', 'client_certificate_key': 'fakekey.key'},
1359 {'client_certificate': 'fakecert.crt', 'client_certificate_key': 'fakekey.key', 'client_certificate_password': 'foobar'},
1360 {'client_certificate_key': 'fakekey.key', 'client_certificate_password': 'foobar'},
1361 ])
1362 def test_client_certificate(self, ydl_params):
1363 with FakeYDL(ydl_params) as ydl:
1364 rh = self.build_handler(ydl)
1365 assert rh._client_cert == ydl_params # XXX: Too bound to implementation
1366
1367 def test_urllib_file_urls(self):
1368 with FakeYDL({'enable_file_urls': False}) as ydl:
1369 rh = self.build_handler(ydl, UrllibRH)
1370 assert rh.enable_file_urls is False
1371
1372 with FakeYDL({'enable_file_urls': True}) as ydl:
1373 rh = self.build_handler(ydl, UrllibRH)
1374 assert rh.enable_file_urls is True
1375
1376 def test_compat_opt_prefer_urllib(self):
1377 # This assumes urllib only has a preference when this compat opt is given
1378 with FakeYDL({'compat_opts': ['prefer-legacy-http-handler']}) as ydl:
1379 director = ydl.build_request_director([UrllibRH])
1380 assert len(director.preferences) == 1
1381 assert director.preferences.pop()(UrllibRH, None)
1382
1383
1384 class TestRequest:
1385
1386 def test_query(self):
1387 req = Request('http://example.com?q=something', query={'v': 'xyz'})
1388 assert req.url == 'http://example.com?q=something&v=xyz'
1389
1390 req.update(query={'v': '123'})
1391 assert req.url == 'http://example.com?q=something&v=123'
1392 req.update(url='http://example.com', query={'v': 'xyz'})
1393 assert req.url == 'http://example.com?v=xyz'
1394
1395 def test_method(self):
1396 req = Request('http://example.com')
1397 assert req.method == 'GET'
1398 req.data = b'test'
1399 assert req.method == 'POST'
1400 req.data = None
1401 assert req.method == 'GET'
1402 req.data = b'test2'
1403 req.method = 'PUT'
1404 assert req.method == 'PUT'
1405 req.data = None
1406 assert req.method == 'PUT'
1407 with pytest.raises(TypeError):
1408 req.method = 1
1409
1410 def test_request_helpers(self):
1411 assert HEADRequest('http://example.com').method == 'HEAD'
1412 assert PUTRequest('http://example.com').method == 'PUT'
1413
1414 def test_headers(self):
1415 req = Request('http://example.com', headers={'tesT': 'test'})
1416 assert req.headers == HTTPHeaderDict({'test': 'test'})
1417 req.update(headers={'teSt2': 'test2'})
1418 assert req.headers == HTTPHeaderDict({'test': 'test', 'test2': 'test2'})
1419
1420 req.headers = new_headers = HTTPHeaderDict({'test': 'test'})
1421 assert req.headers == HTTPHeaderDict({'test': 'test'})
1422 assert req.headers is new_headers
1423
1424 # test converts dict to case insensitive dict
1425 req.headers = new_headers = {'test2': 'test2'}
1426 assert isinstance(req.headers, HTTPHeaderDict)
1427 assert req.headers is not new_headers
1428
1429 with pytest.raises(TypeError):
1430 req.headers = None
1431
1432 def test_data_type(self):
1433 req = Request('http://example.com')
1434 assert req.data is None
1435 # test bytes is allowed
1436 req.data = b'test'
1437 assert req.data == b'test'
1438 # test iterable of bytes is allowed
1439 i = [b'test', b'test2']
1440 req.data = i
1441 assert req.data == i
1442
1443 # test file-like object is allowed
1444 f = io.BytesIO(b'test')
1445 req.data = f
1446 assert req.data == f
1447
1448 # common mistake: test str not allowed
1449 with pytest.raises(TypeError):
1450 req.data = 'test'
1451 assert req.data != 'test'
1452
1453 # common mistake: test dict is not allowed
1454 with pytest.raises(TypeError):
1455 req.data = {'test': 'test'}
1456 assert req.data != {'test': 'test'}
1457
1458 def test_content_length_header(self):
1459 req = Request('http://example.com', headers={'Content-Length': '0'}, data=b'')
1460 assert req.headers.get('Content-Length') == '0'
1461
1462 req.data = b'test'
1463 assert 'Content-Length' not in req.headers
1464
1465 req = Request('http://example.com', headers={'Content-Length': '10'})
1466 assert 'Content-Length' not in req.headers
1467
1468 def test_content_type_header(self):
1469 req = Request('http://example.com', headers={'Content-Type': 'test'}, data=b'test')
1470 assert req.headers.get('Content-Type') == 'test'
1471 req.data = b'test2'
1472 assert req.headers.get('Content-Type') == 'test'
1473 req.data = None
1474 assert 'Content-Type' not in req.headers
1475 req.data = b'test3'
1476 assert req.headers.get('Content-Type') == 'application/x-www-form-urlencoded'
1477
1478 def test_update_req(self):
1479 req = Request('http://example.com')
1480 assert req.data is None
1481 assert req.method == 'GET'
1482 assert 'Content-Type' not in req.headers
1483 # Test that zero-byte payloads will be sent
1484 req.update(data=b'')
1485 assert req.data == b''
1486 assert req.method == 'POST'
1487 assert req.headers.get('Content-Type') == 'application/x-www-form-urlencoded'
1488
1489 def test_proxies(self):
1490 req = Request(url='http://example.com', proxies={'http': 'http://127.0.0.1:8080'})
1491 assert req.proxies == {'http': 'http://127.0.0.1:8080'}
1492
1493 def test_extensions(self):
1494 req = Request(url='http://example.com', extensions={'timeout': 2})
1495 assert req.extensions == {'timeout': 2}
1496
1497 def test_copy(self):
1498 req = Request(
1499 url='http://example.com',
1500 extensions={'cookiejar': CookieJar()},
1501 headers={'Accept-Encoding': 'br'},
1502 proxies={'http': 'http://127.0.0.1'},
1503 data=[b'123']
1504 )
1505 req_copy = req.copy()
1506 assert req_copy is not req
1507 assert req_copy.url == req.url
1508 assert req_copy.headers == req.headers
1509 assert req_copy.headers is not req.headers
1510 assert req_copy.proxies == req.proxies
1511 assert req_copy.proxies is not req.proxies
1512
1513 # Data is not able to be copied
1514 assert req_copy.data == req.data
1515 assert req_copy.data is req.data
1516
1517 # Shallow copy extensions
1518 assert req_copy.extensions is not req.extensions
1519 assert req_copy.extensions['cookiejar'] == req.extensions['cookiejar']
1520
1521 # Subclasses are copied by default
1522 class AnotherRequest(Request):
1523 pass
1524
1525 req = AnotherRequest(url='http://127.0.0.1')
1526 assert isinstance(req.copy(), AnotherRequest)
1527
1528 def test_url(self):
1529 req = Request(url='https://фtest.example.com/ some spaceв?ä=c',)
1530 assert req.url == 'https://xn--test-z6d.example.com/%20some%20space%D0%B2?%C3%A4=c'
1531
1532 assert Request(url='//example.com').url == 'http://example.com'
1533
1534 with pytest.raises(TypeError):
1535 Request(url='https://').url = None
1536
1537
1538 class TestResponse:
1539
1540 @pytest.mark.parametrize('reason,status,expected', [
1541 ('custom', 200, 'custom'),
1542 (None, 404, 'Not Found'), # fallback status
1543 ('', 403, 'Forbidden'),
1544 (None, 999, None)
1545 ])
1546 def test_reason(self, reason, status, expected):
1547 res = Response(io.BytesIO(b''), url='test://', headers={}, status=status, reason=reason)
1548 assert res.reason == expected
1549
1550 def test_headers(self):
1551 headers = Message()
1552 headers.add_header('Test', 'test')
1553 headers.add_header('Test', 'test2')
1554 headers.add_header('content-encoding', 'br')
1555 res = Response(io.BytesIO(b''), headers=headers, url='test://')
1556 assert res.headers.get_all('test') == ['test', 'test2']
1557 assert 'Content-Encoding' in res.headers
1558
1559 def test_get_header(self):
1560 headers = Message()
1561 headers.add_header('Set-Cookie', 'cookie1')
1562 headers.add_header('Set-cookie', 'cookie2')
1563 headers.add_header('Test', 'test')
1564 headers.add_header('Test', 'test2')
1565 res = Response(io.BytesIO(b''), headers=headers, url='test://')
1566 assert res.get_header('test') == 'test, test2'
1567 assert res.get_header('set-Cookie') == 'cookie1'
1568 assert res.get_header('notexist', 'default') == 'default'
1569
1570 def test_compat(self):
1571 res = Response(io.BytesIO(b''), url='test://', status=404, headers={'test': 'test'})
1572 with warnings.catch_warnings():
1573 warnings.simplefilter('ignore', category=DeprecationWarning)
1574 assert res.code == res.getcode() == res.status
1575 assert res.geturl() == res.url
1576 assert res.info() is res.headers
1577 assert res.getheader('test') == res.get_header('test')