]> jfr.im git - yt-dlp.git/blob - test/test_networking.py
[utils] `traverse_obj`: Move `is_user_input` into output template (#8673)
[yt-dlp.git] / test / test_networking.py
1 #!/usr/bin/env python3
2
3 # Allow direct execution
4 import os
5 import sys
6
7 import pytest
8
9 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
10
11 import gzip
12 import http.client
13 import http.cookiejar
14 import http.server
15 import io
16 import pathlib
17 import random
18 import ssl
19 import tempfile
20 import threading
21 import time
22 import urllib.error
23 import urllib.request
24 import warnings
25 import zlib
26 from email.message import Message
27 from http.cookiejar import CookieJar
28
29 from test.helper import FakeYDL, http_server_port
30 from yt_dlp.cookies import YoutubeDLCookieJar
31 from yt_dlp.dependencies import brotli, requests, urllib3
32 from yt_dlp.networking import (
33 HEADRequest,
34 PUTRequest,
35 Request,
36 RequestDirector,
37 RequestHandler,
38 Response,
39 )
40 from yt_dlp.networking._urllib import UrllibRH
41 from yt_dlp.networking.exceptions import (
42 CertificateVerifyError,
43 HTTPError,
44 IncompleteRead,
45 NoSupportingHandlers,
46 ProxyError,
47 RequestError,
48 SSLError,
49 TransportError,
50 UnsupportedRequest,
51 )
52 from yt_dlp.utils._utils import _YDLLogger as FakeLogger
53 from yt_dlp.utils.networking import HTTPHeaderDict
54
55 from test.conftest import validate_and_send
56
57 TEST_DIR = os.path.dirname(os.path.abspath(__file__))
58
59
60 def _build_proxy_handler(name):
61 class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
62 proxy_name = name
63
64 def log_message(self, format, *args):
65 pass
66
67 def do_GET(self):
68 self.send_response(200)
69 self.send_header('Content-Type', 'text/plain; charset=utf-8')
70 self.end_headers()
71 self.wfile.write('{self.proxy_name}: {self.path}'.format(self=self).encode())
72 return HTTPTestRequestHandler
73
74
75 class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
76 protocol_version = 'HTTP/1.1'
77
78 def log_message(self, format, *args):
79 pass
80
81 def _headers(self):
82 payload = str(self.headers).encode()
83 self.send_response(200)
84 self.send_header('Content-Type', 'application/json')
85 self.send_header('Content-Length', str(len(payload)))
86 self.end_headers()
87 self.wfile.write(payload)
88
89 def _redirect(self):
90 self.send_response(int(self.path[len('/redirect_'):]))
91 self.send_header('Location', '/method')
92 self.send_header('Content-Length', '0')
93 self.end_headers()
94
95 def _method(self, method, payload=None):
96 self.send_response(200)
97 self.send_header('Content-Length', str(len(payload or '')))
98 self.send_header('Method', method)
99 self.end_headers()
100 if payload:
101 self.wfile.write(payload)
102
103 def _status(self, status):
104 payload = f'<html>{status} NOT FOUND</html>'.encode()
105 self.send_response(int(status))
106 self.send_header('Content-Type', 'text/html; charset=utf-8')
107 self.send_header('Content-Length', str(len(payload)))
108 self.end_headers()
109 self.wfile.write(payload)
110
111 def _read_data(self):
112 if 'Content-Length' in self.headers:
113 return self.rfile.read(int(self.headers['Content-Length']))
114
115 def do_POST(self):
116 data = self._read_data() + str(self.headers).encode()
117 if self.path.startswith('/redirect_'):
118 self._redirect()
119 elif self.path.startswith('/method'):
120 self._method('POST', data)
121 elif self.path.startswith('/headers'):
122 self._headers()
123 else:
124 self._status(404)
125
126 def do_HEAD(self):
127 if self.path.startswith('/redirect_'):
128 self._redirect()
129 elif self.path.startswith('/method'):
130 self._method('HEAD')
131 else:
132 self._status(404)
133
134 def do_PUT(self):
135 data = self._read_data() + str(self.headers).encode()
136 if self.path.startswith('/redirect_'):
137 self._redirect()
138 elif self.path.startswith('/method'):
139 self._method('PUT', data)
140 else:
141 self._status(404)
142
143 def do_GET(self):
144 if self.path == '/video.html':
145 payload = b'<html><video src="/vid.mp4" /></html>'
146 self.send_response(200)
147 self.send_header('Content-Type', 'text/html; charset=utf-8')
148 self.send_header('Content-Length', str(len(payload)))
149 self.end_headers()
150 self.wfile.write(payload)
151 elif self.path == '/vid.mp4':
152 payload = b'\x00\x00\x00\x00\x20\x66\x74[video]'
153 self.send_response(200)
154 self.send_header('Content-Type', 'video/mp4')
155 self.send_header('Content-Length', str(len(payload)))
156 self.end_headers()
157 self.wfile.write(payload)
158 elif self.path == '/%E4%B8%AD%E6%96%87.html':
159 payload = b'<html><video src="/vid.mp4" /></html>'
160 self.send_response(200)
161 self.send_header('Content-Type', 'text/html; charset=utf-8')
162 self.send_header('Content-Length', str(len(payload)))
163 self.end_headers()
164 self.wfile.write(payload)
165 elif self.path == '/%c7%9f':
166 payload = b'<html><video src="/vid.mp4" /></html>'
167 self.send_response(200)
168 self.send_header('Content-Type', 'text/html; charset=utf-8')
169 self.send_header('Content-Length', str(len(payload)))
170 self.end_headers()
171 self.wfile.write(payload)
172 elif self.path.startswith('/redirect_loop'):
173 self.send_response(301)
174 self.send_header('Location', self.path)
175 self.send_header('Content-Length', '0')
176 self.end_headers()
177 elif self.path == '/redirect_dotsegments':
178 self.send_response(301)
179 # redirect to /headers but with dot segments before
180 self.send_header('Location', '/a/b/./../../headers')
181 self.send_header('Content-Length', '0')
182 self.end_headers()
183 elif self.path.startswith('/redirect_'):
184 self._redirect()
185 elif self.path.startswith('/method'):
186 self._method('GET', str(self.headers).encode())
187 elif self.path.startswith('/headers'):
188 self._headers()
189 elif self.path.startswith('/308-to-headers'):
190 self.send_response(308)
191 self.send_header('Location', '/headers')
192 self.send_header('Content-Length', '0')
193 self.end_headers()
194 elif self.path == '/trailing_garbage':
195 payload = b'<html><video src="/vid.mp4" /></html>'
196 self.send_response(200)
197 self.send_header('Content-Type', 'text/html; charset=utf-8')
198 self.send_header('Content-Encoding', 'gzip')
199 buf = io.BytesIO()
200 with gzip.GzipFile(fileobj=buf, mode='wb') as f:
201 f.write(payload)
202 compressed = buf.getvalue() + b'trailing garbage'
203 self.send_header('Content-Length', str(len(compressed)))
204 self.end_headers()
205 self.wfile.write(compressed)
206 elif self.path == '/302-non-ascii-redirect':
207 new_url = f'http://127.0.0.1:{http_server_port(self.server)}/中文.html'
208 self.send_response(301)
209 self.send_header('Location', new_url)
210 self.send_header('Content-Length', '0')
211 self.end_headers()
212 elif self.path == '/content-encoding':
213 encodings = self.headers.get('ytdl-encoding', '')
214 payload = b'<html><video src="/vid.mp4" /></html>'
215 for encoding in filter(None, (e.strip() for e in encodings.split(','))):
216 if encoding == 'br' and brotli:
217 payload = brotli.compress(payload)
218 elif encoding == 'gzip':
219 buf = io.BytesIO()
220 with gzip.GzipFile(fileobj=buf, mode='wb') as f:
221 f.write(payload)
222 payload = buf.getvalue()
223 elif encoding == 'deflate':
224 payload = zlib.compress(payload)
225 elif encoding == 'unsupported':
226 payload = b'raw'
227 break
228 else:
229 self._status(415)
230 return
231 self.send_response(200)
232 self.send_header('Content-Encoding', encodings)
233 self.send_header('Content-Length', str(len(payload)))
234 self.end_headers()
235 self.wfile.write(payload)
236 elif self.path.startswith('/gen_'):
237 payload = b'<html></html>'
238 self.send_response(int(self.path[len('/gen_'):]))
239 self.send_header('Content-Type', 'text/html; charset=utf-8')
240 self.send_header('Content-Length', str(len(payload)))
241 self.end_headers()
242 self.wfile.write(payload)
243 elif self.path.startswith('/incompleteread'):
244 payload = b'<html></html>'
245 self.send_response(200)
246 self.send_header('Content-Type', 'text/html; charset=utf-8')
247 self.send_header('Content-Length', '234234')
248 self.end_headers()
249 self.wfile.write(payload)
250 self.finish()
251 elif self.path.startswith('/timeout_'):
252 time.sleep(int(self.path[len('/timeout_'):]))
253 self._headers()
254 elif self.path == '/source_address':
255 payload = str(self.client_address[0]).encode()
256 self.send_response(200)
257 self.send_header('Content-Type', 'text/html; charset=utf-8')
258 self.send_header('Content-Length', str(len(payload)))
259 self.end_headers()
260 self.wfile.write(payload)
261 self.finish()
262 else:
263 self._status(404)
264
265 def send_header(self, keyword, value):
266 """
267 Forcibly allow HTTP server to send non percent-encoded non-ASCII characters in headers.
268 This is against what is defined in RFC 3986, however we need to test we support this
269 since some sites incorrectly do this.
270 """
271 if keyword.lower() == 'connection':
272 return super().send_header(keyword, value)
273
274 if not hasattr(self, '_headers_buffer'):
275 self._headers_buffer = []
276
277 self._headers_buffer.append(f'{keyword}: {value}\r\n'.encode())
278
279
280 class TestRequestHandlerBase:
281 @classmethod
282 def setup_class(cls):
283 cls.http_httpd = http.server.ThreadingHTTPServer(
284 ('127.0.0.1', 0), HTTPTestRequestHandler)
285 cls.http_port = http_server_port(cls.http_httpd)
286 cls.http_server_thread = threading.Thread(target=cls.http_httpd.serve_forever)
287 # FIXME: we should probably stop the http server thread after each test
288 # See: https://github.com/yt-dlp/yt-dlp/pull/7094#discussion_r1199746041
289 cls.http_server_thread.daemon = True
290 cls.http_server_thread.start()
291
292 # HTTPS server
293 certfn = os.path.join(TEST_DIR, 'testcert.pem')
294 cls.https_httpd = http.server.ThreadingHTTPServer(
295 ('127.0.0.1', 0), HTTPTestRequestHandler)
296 sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
297 sslctx.load_cert_chain(certfn, None)
298 cls.https_httpd.socket = sslctx.wrap_socket(cls.https_httpd.socket, server_side=True)
299 cls.https_port = http_server_port(cls.https_httpd)
300 cls.https_server_thread = threading.Thread(target=cls.https_httpd.serve_forever)
301 cls.https_server_thread.daemon = True
302 cls.https_server_thread.start()
303
304
305 class TestHTTPRequestHandler(TestRequestHandlerBase):
306 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
307 def test_verify_cert(self, handler):
308 with handler() as rh:
309 with pytest.raises(CertificateVerifyError):
310 validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
311
312 with handler(verify=False) as rh:
313 r = validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
314 assert r.status == 200
315 r.close()
316
317 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
318 def test_ssl_error(self, handler):
319 # HTTPS server with too old TLS version
320 # XXX: is there a better way to test this than to create a new server?
321 https_httpd = http.server.ThreadingHTTPServer(
322 ('127.0.0.1', 0), HTTPTestRequestHandler)
323 sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
324 https_httpd.socket = sslctx.wrap_socket(https_httpd.socket, server_side=True)
325 https_port = http_server_port(https_httpd)
326 https_server_thread = threading.Thread(target=https_httpd.serve_forever)
327 https_server_thread.daemon = True
328 https_server_thread.start()
329
330 with handler(verify=False) as rh:
331 with pytest.raises(SSLError, match='sslv3 alert handshake failure') as exc_info:
332 validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers'))
333 assert not issubclass(exc_info.type, CertificateVerifyError)
334
335 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
336 def test_percent_encode(self, handler):
337 with handler() as rh:
338 # Unicode characters should be encoded with uppercase percent-encoding
339 res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/中文.html'))
340 assert res.status == 200
341 res.close()
342 # don't normalize existing percent encodings
343 res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/%c7%9f'))
344 assert res.status == 200
345 res.close()
346
347 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
348 def test_remove_dot_segments(self, handler):
349 with handler() as rh:
350 # This isn't a comprehensive test,
351 # but it should be enough to check whether the handler is removing dot segments
352 res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/a/b/./../../headers'))
353 assert res.status == 200
354 assert res.url == f'http://127.0.0.1:{self.http_port}/headers'
355 res.close()
356
357 res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_dotsegments'))
358 assert res.status == 200
359 assert res.url == f'http://127.0.0.1:{self.http_port}/headers'
360 res.close()
361
362 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
363 def test_unicode_path_redirection(self, handler):
364 with handler() as rh:
365 r = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/302-non-ascii-redirect'))
366 assert r.url == f'http://127.0.0.1:{self.http_port}/%E4%B8%AD%E6%96%87.html'
367 r.close()
368
369 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
370 def test_raise_http_error(self, handler):
371 with handler() as rh:
372 for bad_status in (400, 500, 599, 302):
373 with pytest.raises(HTTPError):
374 validate_and_send(rh, Request('http://127.0.0.1:%d/gen_%d' % (self.http_port, bad_status)))
375
376 # Should not raise an error
377 validate_and_send(rh, Request('http://127.0.0.1:%d/gen_200' % self.http_port)).close()
378
379 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
380 def test_response_url(self, handler):
381 with handler() as rh:
382 # Response url should be that of the last url in redirect chain
383 res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_301'))
384 assert res.url == f'http://127.0.0.1:{self.http_port}/method'
385 res.close()
386 res2 = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_200'))
387 assert res2.url == f'http://127.0.0.1:{self.http_port}/gen_200'
388 res2.close()
389
390 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
391 def test_redirect(self, handler):
392 with handler() as rh:
393 def do_req(redirect_status, method, assert_no_content=False):
394 data = b'testdata' if method in ('POST', 'PUT') else None
395 res = validate_and_send(
396 rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_{redirect_status}', method=method, data=data))
397
398 headers = b''
399 data_sent = b''
400 if data is not None:
401 data_sent += res.read(len(data))
402 if data_sent != data:
403 headers += data_sent
404 data_sent = b''
405
406 headers += res.read()
407
408 if assert_no_content or data is None:
409 assert b'Content-Type' not in headers
410 assert b'Content-Length' not in headers
411 else:
412 assert b'Content-Type' in headers
413 assert b'Content-Length' in headers
414
415 return data_sent.decode(), res.headers.get('method', '')
416
417 # A 303 must either use GET or HEAD for subsequent request
418 assert do_req(303, 'POST', True) == ('', 'GET')
419 assert do_req(303, 'HEAD') == ('', 'HEAD')
420
421 assert do_req(303, 'PUT', True) == ('', 'GET')
422
423 # 301 and 302 turn POST only into a GET
424 assert do_req(301, 'POST', True) == ('', 'GET')
425 assert do_req(301, 'HEAD') == ('', 'HEAD')
426 assert do_req(302, 'POST', True) == ('', 'GET')
427 assert do_req(302, 'HEAD') == ('', 'HEAD')
428
429 assert do_req(301, 'PUT') == ('testdata', 'PUT')
430 assert do_req(302, 'PUT') == ('testdata', 'PUT')
431
432 # 307 and 308 should not change method
433 for m in ('POST', 'PUT'):
434 assert do_req(307, m) == ('testdata', m)
435 assert do_req(308, m) == ('testdata', m)
436
437 assert do_req(307, 'HEAD') == ('', 'HEAD')
438 assert do_req(308, 'HEAD') == ('', 'HEAD')
439
440 # These should not redirect and instead raise an HTTPError
441 for code in (300, 304, 305, 306):
442 with pytest.raises(HTTPError):
443 do_req(code, 'GET')
444
445 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
446 def test_request_cookie_header(self, handler):
447 # We should accept a Cookie header being passed as in normal headers and handle it appropriately.
448 with handler() as rh:
449 # Specified Cookie header should be used
450 res = validate_and_send(
451 rh, Request(
452 f'http://127.0.0.1:{self.http_port}/headers',
453 headers={'Cookie': 'test=test'})).read().decode()
454 assert 'Cookie: test=test' in res
455
456 # Specified Cookie header should be removed on any redirect
457 res = validate_and_send(
458 rh, Request(
459 f'http://127.0.0.1:{self.http_port}/308-to-headers',
460 headers={'Cookie': 'test=test'})).read().decode()
461 assert 'Cookie: test=test' not in res
462
463 # Specified Cookie header should override global cookiejar for that request
464 cookiejar = YoutubeDLCookieJar()
465 cookiejar.set_cookie(http.cookiejar.Cookie(
466 version=0, name='test', value='ytdlp', port=None, port_specified=False,
467 domain='127.0.0.1', domain_specified=True, domain_initial_dot=False, path='/',
468 path_specified=True, secure=False, expires=None, discard=False, comment=None,
469 comment_url=None, rest={}))
470
471 with handler(cookiejar=cookiejar) as rh:
472 data = validate_and_send(
473 rh, Request(f'http://127.0.0.1:{self.http_port}/headers', headers={'cookie': 'test=test'})).read()
474 assert b'Cookie: test=ytdlp' not in data
475 assert b'Cookie: test=test' in data
476
477 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
478 def test_redirect_loop(self, handler):
479 with handler() as rh:
480 with pytest.raises(HTTPError, match='redirect loop'):
481 validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_loop'))
482
483 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
484 def test_incompleteread(self, handler):
485 with handler(timeout=2) as rh:
486 with pytest.raises(IncompleteRead):
487 validate_and_send(rh, Request('http://127.0.0.1:%d/incompleteread' % self.http_port)).read()
488
489 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
490 def test_cookies(self, handler):
491 cookiejar = YoutubeDLCookieJar()
492 cookiejar.set_cookie(http.cookiejar.Cookie(
493 0, 'test', 'ytdlp', None, False, '127.0.0.1', True,
494 False, '/headers', True, False, None, False, None, None, {}))
495
496 with handler(cookiejar=cookiejar) as rh:
497 data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read()
498 assert b'Cookie: test=ytdlp' in data
499
500 # Per request
501 with handler() as rh:
502 data = validate_and_send(
503 rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions={'cookiejar': cookiejar})).read()
504 assert b'Cookie: test=ytdlp' in data
505
506 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
507 def test_headers(self, handler):
508
509 with handler(headers=HTTPHeaderDict({'test1': 'test', 'test2': 'test2'})) as rh:
510 # Global Headers
511 data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read()
512 assert b'Test1: test' in data
513
514 # Per request headers, merged with global
515 data = validate_and_send(rh, Request(
516 f'http://127.0.0.1:{self.http_port}/headers', headers={'test2': 'changed', 'test3': 'test3'})).read()
517 assert b'Test1: test' in data
518 assert b'Test2: changed' in data
519 assert b'Test2: test2' not in data
520 assert b'Test3: test3' in data
521
522 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
523 def test_timeout(self, handler):
524 with handler() as rh:
525 # Default timeout is 20 seconds, so this should go through
526 validate_and_send(
527 rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_3'))
528
529 with handler(timeout=0.5) as rh:
530 with pytest.raises(TransportError):
531 validate_and_send(
532 rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1'))
533
534 # Per request timeout, should override handler timeout
535 validate_and_send(
536 rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1', extensions={'timeout': 4}))
537
538 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
539 def test_source_address(self, handler):
540 source_address = f'127.0.0.{random.randint(5, 255)}'
541 with handler(source_address=source_address) as rh:
542 data = validate_and_send(
543 rh, Request(f'http://127.0.0.1:{self.http_port}/source_address')).read().decode()
544 assert source_address == data
545
546 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
547 def test_gzip_trailing_garbage(self, handler):
548 with handler() as rh:
549 data = validate_and_send(rh, Request(f'http://localhost:{self.http_port}/trailing_garbage')).read().decode()
550 assert data == '<html><video src="/vid.mp4" /></html>'
551
552 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
553 @pytest.mark.skipif(not brotli, reason='brotli support is not installed')
554 def test_brotli(self, handler):
555 with handler() as rh:
556 res = validate_and_send(
557 rh, Request(
558 f'http://127.0.0.1:{self.http_port}/content-encoding',
559 headers={'ytdl-encoding': 'br'}))
560 assert res.headers.get('Content-Encoding') == 'br'
561 assert res.read() == b'<html><video src="/vid.mp4" /></html>'
562
563 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
564 def test_deflate(self, handler):
565 with handler() as rh:
566 res = validate_and_send(
567 rh, Request(
568 f'http://127.0.0.1:{self.http_port}/content-encoding',
569 headers={'ytdl-encoding': 'deflate'}))
570 assert res.headers.get('Content-Encoding') == 'deflate'
571 assert res.read() == b'<html><video src="/vid.mp4" /></html>'
572
573 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
574 def test_gzip(self, handler):
575 with handler() as rh:
576 res = validate_and_send(
577 rh, Request(
578 f'http://127.0.0.1:{self.http_port}/content-encoding',
579 headers={'ytdl-encoding': 'gzip'}))
580 assert res.headers.get('Content-Encoding') == 'gzip'
581 assert res.read() == b'<html><video src="/vid.mp4" /></html>'
582
583 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
584 def test_multiple_encodings(self, handler):
585 with handler() as rh:
586 for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'):
587 res = validate_and_send(
588 rh, Request(
589 f'http://127.0.0.1:{self.http_port}/content-encoding',
590 headers={'ytdl-encoding': pair}))
591 assert res.headers.get('Content-Encoding') == pair
592 assert res.read() == b'<html><video src="/vid.mp4" /></html>'
593
594 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
595 def test_unsupported_encoding(self, handler):
596 with handler() as rh:
597 res = validate_and_send(
598 rh, Request(
599 f'http://127.0.0.1:{self.http_port}/content-encoding',
600 headers={'ytdl-encoding': 'unsupported'}))
601 assert res.headers.get('Content-Encoding') == 'unsupported'
602 assert res.read() == b'raw'
603
604 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
605 def test_read(self, handler):
606 with handler() as rh:
607 res = validate_and_send(
608 rh, Request(f'http://127.0.0.1:{self.http_port}/headers'))
609 assert res.readable()
610 assert res.read(1) == b'H'
611 assert res.read(3) == b'ost'
612
613
614 class TestHTTPProxy(TestRequestHandlerBase):
615 @classmethod
616 def setup_class(cls):
617 super().setup_class()
618 # HTTP Proxy server
619 cls.proxy = http.server.ThreadingHTTPServer(
620 ('127.0.0.1', 0), _build_proxy_handler('normal'))
621 cls.proxy_port = http_server_port(cls.proxy)
622 cls.proxy_thread = threading.Thread(target=cls.proxy.serve_forever)
623 cls.proxy_thread.daemon = True
624 cls.proxy_thread.start()
625
626 # Geo proxy server
627 cls.geo_proxy = http.server.ThreadingHTTPServer(
628 ('127.0.0.1', 0), _build_proxy_handler('geo'))
629 cls.geo_port = http_server_port(cls.geo_proxy)
630 cls.geo_proxy_thread = threading.Thread(target=cls.geo_proxy.serve_forever)
631 cls.geo_proxy_thread.daemon = True
632 cls.geo_proxy_thread.start()
633
634 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
635 def test_http_proxy(self, handler):
636 http_proxy = f'http://127.0.0.1:{self.proxy_port}'
637 geo_proxy = f'http://127.0.0.1:{self.geo_port}'
638
639 # Test global http proxy
640 # Test per request http proxy
641 # Test per request http proxy disables proxy
642 url = 'http://foo.com/bar'
643
644 # Global HTTP proxy
645 with handler(proxies={'http': http_proxy}) as rh:
646 res = validate_and_send(rh, Request(url)).read().decode()
647 assert res == f'normal: {url}'
648
649 # Per request proxy overrides global
650 res = validate_and_send(rh, Request(url, proxies={'http': geo_proxy})).read().decode()
651 assert res == f'geo: {url}'
652
653 # and setting to None disables all proxies for that request
654 real_url = f'http://127.0.0.1:{self.http_port}/headers'
655 res = validate_and_send(
656 rh, Request(real_url, proxies={'http': None})).read().decode()
657 assert res != f'normal: {real_url}'
658 assert 'Accept' in res
659
660 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
661 def test_noproxy(self, handler):
662 with handler(proxies={'proxy': f'http://127.0.0.1:{self.proxy_port}'}) as rh:
663 # NO_PROXY
664 for no_proxy in (f'127.0.0.1:{self.http_port}', '127.0.0.1', 'localhost'):
665 nop_response = validate_and_send(
666 rh, Request(f'http://127.0.0.1:{self.http_port}/headers', proxies={'no': no_proxy})).read().decode(
667 'utf-8')
668 assert 'Accept' in nop_response
669
670 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
671 def test_allproxy(self, handler):
672 url = 'http://foo.com/bar'
673 with handler() as rh:
674 response = validate_and_send(rh, Request(url, proxies={'all': f'http://127.0.0.1:{self.proxy_port}'})).read().decode(
675 'utf-8')
676 assert response == f'normal: {url}'
677
678 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
679 def test_http_proxy_with_idn(self, handler):
680 with handler(proxies={
681 'http': f'http://127.0.0.1:{self.proxy_port}',
682 }) as rh:
683 url = 'http://中文.tw/'
684 response = rh.send(Request(url)).read().decode()
685 # b'xn--fiq228c' is '中文'.encode('idna')
686 assert response == 'normal: http://xn--fiq228c.tw/'
687
688
689 class TestClientCertificate:
690
691 @classmethod
692 def setup_class(cls):
693 certfn = os.path.join(TEST_DIR, 'testcert.pem')
694 cls.certdir = os.path.join(TEST_DIR, 'testdata', 'certificate')
695 cacertfn = os.path.join(cls.certdir, 'ca.crt')
696 cls.httpd = http.server.ThreadingHTTPServer(('127.0.0.1', 0), HTTPTestRequestHandler)
697 sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
698 sslctx.verify_mode = ssl.CERT_REQUIRED
699 sslctx.load_verify_locations(cafile=cacertfn)
700 sslctx.load_cert_chain(certfn, None)
701 cls.httpd.socket = sslctx.wrap_socket(cls.httpd.socket, server_side=True)
702 cls.port = http_server_port(cls.httpd)
703 cls.server_thread = threading.Thread(target=cls.httpd.serve_forever)
704 cls.server_thread.daemon = True
705 cls.server_thread.start()
706
707 def _run_test(self, handler, **handler_kwargs):
708 with handler(
709 # Disable client-side validation of unacceptable self-signed testcert.pem
710 # The test is of a check on the server side, so unaffected
711 verify=False,
712 **handler_kwargs,
713 ) as rh:
714 validate_and_send(rh, Request(f'https://127.0.0.1:{self.port}/video.html')).read().decode()
715
716 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
717 def test_certificate_combined_nopass(self, handler):
718 self._run_test(handler, client_cert={
719 'client_certificate': os.path.join(self.certdir, 'clientwithkey.crt'),
720 })
721
722 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
723 def test_certificate_nocombined_nopass(self, handler):
724 self._run_test(handler, client_cert={
725 'client_certificate': os.path.join(self.certdir, 'client.crt'),
726 'client_certificate_key': os.path.join(self.certdir, 'client.key'),
727 })
728
729 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
730 def test_certificate_combined_pass(self, handler):
731 self._run_test(handler, client_cert={
732 'client_certificate': os.path.join(self.certdir, 'clientwithencryptedkey.crt'),
733 'client_certificate_password': 'foobar',
734 })
735
736 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
737 def test_certificate_nocombined_pass(self, handler):
738 self._run_test(handler, client_cert={
739 'client_certificate': os.path.join(self.certdir, 'client.crt'),
740 'client_certificate_key': os.path.join(self.certdir, 'clientencrypted.key'),
741 'client_certificate_password': 'foobar',
742 })
743
744
745 class TestUrllibRequestHandler(TestRequestHandlerBase):
746 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
747 def test_file_urls(self, handler):
748 # See https://github.com/ytdl-org/youtube-dl/issues/8227
749 tf = tempfile.NamedTemporaryFile(delete=False)
750 tf.write(b'foobar')
751 tf.close()
752 req = Request(pathlib.Path(tf.name).as_uri())
753 with handler() as rh:
754 with pytest.raises(UnsupportedRequest):
755 rh.validate(req)
756
757 # Test that urllib never loaded FileHandler
758 with pytest.raises(TransportError):
759 rh.send(req)
760
761 with handler(enable_file_urls=True) as rh:
762 res = validate_and_send(rh, req)
763 assert res.read() == b'foobar'
764 res.close()
765
766 os.unlink(tf.name)
767
768 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
769 def test_http_error_returns_content(self, handler):
770 # urllib HTTPError will try close the underlying response if reference to the HTTPError object is lost
771 def get_response():
772 with handler() as rh:
773 # headers url
774 try:
775 validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_404'))
776 except HTTPError as e:
777 return e.response
778
779 assert get_response().read() == b'<html></html>'
780
781 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
782 def test_verify_cert_error_text(self, handler):
783 # Check the output of the error message
784 with handler() as rh:
785 with pytest.raises(
786 CertificateVerifyError,
787 match=r'\[SSL: CERTIFICATE_VERIFY_FAILED\] certificate verify failed: self.signed certificate'
788 ):
789 validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
790
791 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
792 @pytest.mark.parametrize('req,match,version_check', [
793 # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1256
794 # bpo-39603: Check implemented in 3.7.9+, 3.8.5+
795 (
796 Request('http://127.0.0.1', method='GET\n'),
797 'method can\'t contain control characters',
798 lambda v: v < (3, 7, 9) or (3, 8, 0) <= v < (3, 8, 5)
799 ),
800 # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1265
801 # bpo-38576: Check implemented in 3.7.8+, 3.8.3+
802 (
803 Request('http://127.0.0. 1', method='GET'),
804 'URL can\'t contain control characters',
805 lambda v: v < (3, 7, 8) or (3, 8, 0) <= v < (3, 8, 3)
806 ),
807 # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1288C31-L1288C50
808 (Request('http://127.0.0.1', headers={'foo\n': 'bar'}), 'Invalid header name', None),
809 ])
810 def test_httplib_validation_errors(self, handler, req, match, version_check):
811 if version_check and version_check(sys.version_info):
812 pytest.skip(f'Python {sys.version} version does not have the required validation for this test.')
813
814 with handler() as rh:
815 with pytest.raises(RequestError, match=match) as exc_info:
816 validate_and_send(rh, req)
817 assert not isinstance(exc_info.value, TransportError)
818
819
820 class TestRequestsRequestHandler(TestRequestHandlerBase):
821 @pytest.mark.parametrize('raised,expected', [
822 (lambda: requests.exceptions.ConnectTimeout(), TransportError),
823 (lambda: requests.exceptions.ReadTimeout(), TransportError),
824 (lambda: requests.exceptions.Timeout(), TransportError),
825 (lambda: requests.exceptions.ConnectionError(), TransportError),
826 (lambda: requests.exceptions.ProxyError(), ProxyError),
827 (lambda: requests.exceptions.SSLError('12[CERTIFICATE_VERIFY_FAILED]34'), CertificateVerifyError),
828 (lambda: requests.exceptions.SSLError(), SSLError),
829 (lambda: requests.exceptions.InvalidURL(), RequestError),
830 (lambda: requests.exceptions.InvalidHeader(), RequestError),
831 # catch-all: https://github.com/psf/requests/blob/main/src/requests/adapters.py#L535
832 (lambda: urllib3.exceptions.HTTPError(), TransportError),
833 (lambda: requests.exceptions.RequestException(), RequestError)
834 # (lambda: requests.exceptions.TooManyRedirects(), HTTPError) - Needs a response object
835 ])
836 @pytest.mark.parametrize('handler', ['Requests'], indirect=True)
837 def test_request_error_mapping(self, handler, monkeypatch, raised, expected):
838 with handler() as rh:
839 def mock_get_instance(*args, **kwargs):
840 class MockSession:
841 def request(self, *args, **kwargs):
842 raise raised()
843 return MockSession()
844
845 monkeypatch.setattr(rh, '_get_instance', mock_get_instance)
846
847 with pytest.raises(expected) as exc_info:
848 rh.send(Request('http://fake'))
849
850 assert exc_info.type is expected
851
852 @pytest.mark.parametrize('raised,expected,match', [
853 (lambda: urllib3.exceptions.SSLError(), SSLError, None),
854 (lambda: urllib3.exceptions.TimeoutError(), TransportError, None),
855 (lambda: urllib3.exceptions.ReadTimeoutError(None, None, None), TransportError, None),
856 (lambda: urllib3.exceptions.ProtocolError(), TransportError, None),
857 (lambda: urllib3.exceptions.DecodeError(), TransportError, None),
858 (lambda: urllib3.exceptions.HTTPError(), TransportError, None), # catch-all
859 (
860 lambda: urllib3.exceptions.ProtocolError('error', http.client.IncompleteRead(partial=b'abc', expected=4)),
861 IncompleteRead,
862 '3 bytes read, 4 more expected'
863 ),
864 (
865 lambda: urllib3.exceptions.ProtocolError('error', urllib3.exceptions.IncompleteRead(partial=3, expected=5)),
866 IncompleteRead,
867 '3 bytes read, 5 more expected'
868 ),
869 ])
870 @pytest.mark.parametrize('handler', ['Requests'], indirect=True)
871 def test_response_error_mapping(self, handler, monkeypatch, raised, expected, match):
872 from requests.models import Response as RequestsResponse
873 from urllib3.response import HTTPResponse as Urllib3Response
874
875 from yt_dlp.networking._requests import RequestsResponseAdapter
876 requests_res = RequestsResponse()
877 requests_res.raw = Urllib3Response(body=b'', status=200)
878 res = RequestsResponseAdapter(requests_res)
879
880 def mock_read(*args, **kwargs):
881 raise raised()
882 monkeypatch.setattr(res.fp, 'read', mock_read)
883
884 with pytest.raises(expected, match=match) as exc_info:
885 res.read()
886
887 assert exc_info.type is expected
888
889
890 def run_validation(handler, error, req, **handler_kwargs):
891 with handler(**handler_kwargs) as rh:
892 if error:
893 with pytest.raises(error):
894 rh.validate(req)
895 else:
896 rh.validate(req)
897
898
899 class TestRequestHandlerValidation:
900
901 class ValidationRH(RequestHandler):
902 def _send(self, request):
903 raise RequestError('test')
904
905 class NoCheckRH(ValidationRH):
906 _SUPPORTED_FEATURES = None
907 _SUPPORTED_PROXY_SCHEMES = None
908 _SUPPORTED_URL_SCHEMES = None
909
910 def _check_extensions(self, extensions):
911 extensions.clear()
912
913 class HTTPSupportedRH(ValidationRH):
914 _SUPPORTED_URL_SCHEMES = ('http',)
915
916 URL_SCHEME_TESTS = [
917 # scheme, expected to fail, handler kwargs
918 ('Urllib', [
919 ('http', False, {}),
920 ('https', False, {}),
921 ('data', False, {}),
922 ('ftp', False, {}),
923 ('file', UnsupportedRequest, {}),
924 ('file', False, {'enable_file_urls': True}),
925 ]),
926 ('Requests', [
927 ('http', False, {}),
928 ('https', False, {}),
929 ]),
930 ('Websockets', [
931 ('ws', False, {}),
932 ('wss', False, {}),
933 ]),
934 (NoCheckRH, [('http', False, {})]),
935 (ValidationRH, [('http', UnsupportedRequest, {})])
936 ]
937
938 PROXY_SCHEME_TESTS = [
939 # scheme, expected to fail
940 ('Urllib', 'http', [
941 ('http', False),
942 ('https', UnsupportedRequest),
943 ('socks4', False),
944 ('socks4a', False),
945 ('socks5', False),
946 ('socks5h', False),
947 ('socks', UnsupportedRequest),
948 ]),
949 ('Requests', 'http', [
950 ('http', False),
951 ('https', False),
952 ('socks4', False),
953 ('socks4a', False),
954 ('socks5', False),
955 ('socks5h', False),
956 ]),
957 (NoCheckRH, 'http', [('http', False)]),
958 (HTTPSupportedRH, 'http', [('http', UnsupportedRequest)]),
959 ('Websockets', 'ws', [('http', UnsupportedRequest)]),
960 (NoCheckRH, 'http', [('http', False)]),
961 (HTTPSupportedRH, 'http', [('http', UnsupportedRequest)]),
962 ]
963
964 PROXY_KEY_TESTS = [
965 # key, expected to fail
966 ('Urllib', [
967 ('all', False),
968 ('unrelated', False),
969 ]),
970 ('Requests', [
971 ('all', False),
972 ('unrelated', False),
973 ]),
974 (NoCheckRH, [('all', False)]),
975 (HTTPSupportedRH, [('all', UnsupportedRequest)]),
976 (HTTPSupportedRH, [('no', UnsupportedRequest)]),
977 ]
978
979 EXTENSION_TESTS = [
980 ('Urllib', 'http', [
981 ({'cookiejar': 'notacookiejar'}, AssertionError),
982 ({'cookiejar': YoutubeDLCookieJar()}, False),
983 ({'cookiejar': CookieJar()}, AssertionError),
984 ({'timeout': 1}, False),
985 ({'timeout': 'notatimeout'}, AssertionError),
986 ({'unsupported': 'value'}, UnsupportedRequest),
987 ]),
988 ('Requests', 'http', [
989 ({'cookiejar': 'notacookiejar'}, AssertionError),
990 ({'cookiejar': YoutubeDLCookieJar()}, False),
991 ({'timeout': 1}, False),
992 ({'timeout': 'notatimeout'}, AssertionError),
993 ({'unsupported': 'value'}, UnsupportedRequest),
994 ]),
995 (NoCheckRH, 'http', [
996 ({'cookiejar': 'notacookiejar'}, False),
997 ({'somerandom': 'test'}, False), # but any extension is allowed through
998 ]),
999 ('Websockets', 'ws', [
1000 ({'cookiejar': YoutubeDLCookieJar()}, False),
1001 ({'timeout': 2}, False),
1002 ]),
1003 ]
1004
1005 @pytest.mark.parametrize('handler,scheme,fail,handler_kwargs', [
1006 (handler_tests[0], scheme, fail, handler_kwargs)
1007 for handler_tests in URL_SCHEME_TESTS
1008 for scheme, fail, handler_kwargs in handler_tests[1]
1009
1010 ], indirect=['handler'])
1011 def test_url_scheme(self, handler, scheme, fail, handler_kwargs):
1012 run_validation(handler, fail, Request(f'{scheme}://'), **(handler_kwargs or {}))
1013
1014 @pytest.mark.parametrize('handler,fail', [('Urllib', False), ('Requests', False)], indirect=['handler'])
1015 def test_no_proxy(self, handler, fail):
1016 run_validation(handler, fail, Request('http://', proxies={'no': '127.0.0.1,github.com'}))
1017 run_validation(handler, fail, Request('http://'), proxies={'no': '127.0.0.1,github.com'})
1018
1019 @pytest.mark.parametrize('handler,proxy_key,fail', [
1020 (handler_tests[0], proxy_key, fail)
1021 for handler_tests in PROXY_KEY_TESTS
1022 for proxy_key, fail in handler_tests[1]
1023 ], indirect=['handler'])
1024 def test_proxy_key(self, handler, proxy_key, fail):
1025 run_validation(handler, fail, Request('http://', proxies={proxy_key: 'http://example.com'}))
1026 run_validation(handler, fail, Request('http://'), proxies={proxy_key: 'http://example.com'})
1027
1028 @pytest.mark.parametrize('handler,req_scheme,scheme,fail', [
1029 (handler_tests[0], handler_tests[1], scheme, fail)
1030 for handler_tests in PROXY_SCHEME_TESTS
1031 for scheme, fail in handler_tests[2]
1032 ], indirect=['handler'])
1033 def test_proxy_scheme(self, handler, req_scheme, scheme, fail):
1034 run_validation(handler, fail, Request(f'{req_scheme}://', proxies={req_scheme: f'{scheme}://example.com'}))
1035 run_validation(handler, fail, Request(f'{req_scheme}://'), proxies={req_scheme: f'{scheme}://example.com'})
1036
1037 @pytest.mark.parametrize('handler', ['Urllib', HTTPSupportedRH, 'Requests'], indirect=True)
1038 def test_empty_proxy(self, handler):
1039 run_validation(handler, False, Request('http://', proxies={'http': None}))
1040 run_validation(handler, False, Request('http://'), proxies={'http': None})
1041
1042 @pytest.mark.parametrize('proxy_url', ['//example.com', 'example.com', '127.0.0.1', '/a/b/c'])
1043 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
1044 def test_invalid_proxy_url(self, handler, proxy_url):
1045 run_validation(handler, UnsupportedRequest, Request('http://', proxies={'http': proxy_url}))
1046
1047 @pytest.mark.parametrize('handler,scheme,extensions,fail', [
1048 (handler_tests[0], handler_tests[1], extensions, fail)
1049 for handler_tests in EXTENSION_TESTS
1050 for extensions, fail in handler_tests[2]
1051 ], indirect=['handler'])
1052 def test_extension(self, handler, scheme, extensions, fail):
1053 run_validation(
1054 handler, fail, Request(f'{scheme}://', extensions=extensions))
1055
1056 def test_invalid_request_type(self):
1057 rh = self.ValidationRH(logger=FakeLogger())
1058 for method in (rh.validate, rh.send):
1059 with pytest.raises(TypeError, match='Expected an instance of Request'):
1060 method('not a request')
1061
1062
1063 class FakeResponse(Response):
1064 def __init__(self, request):
1065 # XXX: we could make request part of standard response interface
1066 self.request = request
1067 super().__init__(fp=io.BytesIO(b''), headers={}, url=request.url)
1068
1069
1070 class FakeRH(RequestHandler):
1071
1072 def _validate(self, request):
1073 return
1074
1075 def _send(self, request: Request):
1076 if request.url.startswith('ssl://'):
1077 raise SSLError(request.url[len('ssl://'):])
1078 return FakeResponse(request)
1079
1080
1081 class FakeRHYDL(FakeYDL):
1082 def __init__(self, *args, **kwargs):
1083 super().__init__(*args, **kwargs)
1084 self._request_director = self.build_request_director([FakeRH])
1085
1086
1087 class AllUnsupportedRHYDL(FakeYDL):
1088
1089 def __init__(self, *args, **kwargs):
1090
1091 class UnsupportedRH(RequestHandler):
1092 def _send(self, request: Request):
1093 pass
1094
1095 _SUPPORTED_FEATURES = ()
1096 _SUPPORTED_PROXY_SCHEMES = ()
1097 _SUPPORTED_URL_SCHEMES = ()
1098
1099 super().__init__(*args, **kwargs)
1100 self._request_director = self.build_request_director([UnsupportedRH])
1101
1102
1103 class TestRequestDirector:
1104
1105 def test_handler_operations(self):
1106 director = RequestDirector(logger=FakeLogger())
1107 handler = FakeRH(logger=FakeLogger())
1108 director.add_handler(handler)
1109 assert director.handlers.get(FakeRH.RH_KEY) is handler
1110
1111 # Handler should overwrite
1112 handler2 = FakeRH(logger=FakeLogger())
1113 director.add_handler(handler2)
1114 assert director.handlers.get(FakeRH.RH_KEY) is not handler
1115 assert director.handlers.get(FakeRH.RH_KEY) is handler2
1116 assert len(director.handlers) == 1
1117
1118 class AnotherFakeRH(FakeRH):
1119 pass
1120 director.add_handler(AnotherFakeRH(logger=FakeLogger()))
1121 assert len(director.handlers) == 2
1122 assert director.handlers.get(AnotherFakeRH.RH_KEY).RH_KEY == AnotherFakeRH.RH_KEY
1123
1124 director.handlers.pop(FakeRH.RH_KEY, None)
1125 assert director.handlers.get(FakeRH.RH_KEY) is None
1126 assert len(director.handlers) == 1
1127
1128 # RequestErrors should passthrough
1129 with pytest.raises(SSLError):
1130 director.send(Request('ssl://something'))
1131
1132 def test_send(self):
1133 director = RequestDirector(logger=FakeLogger())
1134 with pytest.raises(RequestError):
1135 director.send(Request('any://'))
1136 director.add_handler(FakeRH(logger=FakeLogger()))
1137 assert isinstance(director.send(Request('http://')), FakeResponse)
1138
1139 def test_unsupported_handlers(self):
1140 class SupportedRH(RequestHandler):
1141 _SUPPORTED_URL_SCHEMES = ['http']
1142
1143 def _send(self, request: Request):
1144 return Response(fp=io.BytesIO(b'supported'), headers={}, url=request.url)
1145
1146 director = RequestDirector(logger=FakeLogger())
1147 director.add_handler(SupportedRH(logger=FakeLogger()))
1148 director.add_handler(FakeRH(logger=FakeLogger()))
1149
1150 # First should take preference
1151 assert director.send(Request('http://')).read() == b'supported'
1152 assert director.send(Request('any://')).read() == b''
1153
1154 director.handlers.pop(FakeRH.RH_KEY)
1155 with pytest.raises(NoSupportingHandlers):
1156 director.send(Request('any://'))
1157
1158 def test_unexpected_error(self):
1159 director = RequestDirector(logger=FakeLogger())
1160
1161 class UnexpectedRH(FakeRH):
1162 def _send(self, request: Request):
1163 raise TypeError('something')
1164
1165 director.add_handler(UnexpectedRH(logger=FakeLogger))
1166 with pytest.raises(NoSupportingHandlers, match=r'1 unexpected error'):
1167 director.send(Request('any://'))
1168
1169 director.handlers.clear()
1170 assert len(director.handlers) == 0
1171
1172 # Should not be fatal
1173 director.add_handler(FakeRH(logger=FakeLogger()))
1174 director.add_handler(UnexpectedRH(logger=FakeLogger))
1175 assert director.send(Request('any://'))
1176
1177 def test_preference(self):
1178 director = RequestDirector(logger=FakeLogger())
1179 director.add_handler(FakeRH(logger=FakeLogger()))
1180
1181 class SomeRH(RequestHandler):
1182 _SUPPORTED_URL_SCHEMES = ['http']
1183
1184 def _send(self, request: Request):
1185 return Response(fp=io.BytesIO(b'supported'), headers={}, url=request.url)
1186
1187 def some_preference(rh, request):
1188 return (0 if not isinstance(rh, SomeRH)
1189 else 100 if 'prefer' in request.headers
1190 else -1)
1191
1192 director.add_handler(SomeRH(logger=FakeLogger()))
1193 director.preferences.add(some_preference)
1194
1195 assert director.send(Request('http://')).read() == b''
1196 assert director.send(Request('http://', headers={'prefer': '1'})).read() == b'supported'
1197
1198
1199 # XXX: do we want to move this to test_YoutubeDL.py?
1200 class TestYoutubeDLNetworking:
1201
1202 @staticmethod
1203 def build_handler(ydl, handler: RequestHandler = FakeRH):
1204 return ydl.build_request_director([handler]).handlers.get(handler.RH_KEY)
1205
1206 def test_compat_opener(self):
1207 with FakeYDL() as ydl:
1208 with warnings.catch_warnings():
1209 warnings.simplefilter('ignore', category=DeprecationWarning)
1210 assert isinstance(ydl._opener, urllib.request.OpenerDirector)
1211
1212 @pytest.mark.parametrize('proxy,expected', [
1213 ('http://127.0.0.1:8080', {'all': 'http://127.0.0.1:8080'}),
1214 ('', {'all': '__noproxy__'}),
1215 (None, {'http': 'http://127.0.0.1:8081', 'https': 'http://127.0.0.1:8081'}) # env, set https
1216 ])
1217 def test_proxy(self, proxy, expected):
1218 old_http_proxy = os.environ.get('HTTP_PROXY')
1219 try:
1220 os.environ['HTTP_PROXY'] = 'http://127.0.0.1:8081' # ensure that provided proxies override env
1221 with FakeYDL({'proxy': proxy}) as ydl:
1222 assert ydl.proxies == expected
1223 finally:
1224 if old_http_proxy:
1225 os.environ['HTTP_PROXY'] = old_http_proxy
1226
1227 def test_compat_request(self):
1228 with FakeRHYDL() as ydl:
1229 assert ydl.urlopen('test://')
1230 urllib_req = urllib.request.Request('http://foo.bar', data=b'test', method='PUT', headers={'X-Test': '1'})
1231 urllib_req.add_unredirected_header('Cookie', 'bob=bob')
1232 urllib_req.timeout = 2
1233 with warnings.catch_warnings():
1234 warnings.simplefilter('ignore', category=DeprecationWarning)
1235 req = ydl.urlopen(urllib_req).request
1236 assert req.url == urllib_req.get_full_url()
1237 assert req.data == urllib_req.data
1238 assert req.method == urllib_req.get_method()
1239 assert 'X-Test' in req.headers
1240 assert 'Cookie' in req.headers
1241 assert req.extensions.get('timeout') == 2
1242
1243 with pytest.raises(AssertionError):
1244 ydl.urlopen(None)
1245
1246 def test_extract_basic_auth(self):
1247 with FakeRHYDL() as ydl:
1248 res = ydl.urlopen(Request('http://user:pass@foo.bar'))
1249 assert res.request.headers['Authorization'] == 'Basic dXNlcjpwYXNz'
1250
1251 def test_sanitize_url(self):
1252 with FakeRHYDL() as ydl:
1253 res = ydl.urlopen(Request('httpss://foo.bar'))
1254 assert res.request.url == 'https://foo.bar'
1255
1256 def test_file_urls_error(self):
1257 # use urllib handler
1258 with FakeYDL() as ydl:
1259 with pytest.raises(RequestError, match=r'file:// URLs are disabled by default'):
1260 ydl.urlopen('file://')
1261
1262 @pytest.mark.parametrize('scheme', (['ws', 'wss']))
1263 def test_websocket_unavailable_error(self, scheme):
1264 with AllUnsupportedRHYDL() as ydl:
1265 with pytest.raises(RequestError, match=r'This request requires WebSocket support'):
1266 ydl.urlopen(f'{scheme}://')
1267
1268 def test_legacy_server_connect_error(self):
1269 with FakeRHYDL() as ydl:
1270 for error in ('UNSAFE_LEGACY_RENEGOTIATION_DISABLED', 'SSLV3_ALERT_HANDSHAKE_FAILURE'):
1271 with pytest.raises(RequestError, match=r'Try using --legacy-server-connect'):
1272 ydl.urlopen(f'ssl://{error}')
1273
1274 with pytest.raises(SSLError, match='testerror'):
1275 ydl.urlopen('ssl://testerror')
1276
1277 @pytest.mark.parametrize('proxy_key,proxy_url,expected', [
1278 ('http', '__noproxy__', None),
1279 ('no', '127.0.0.1,foo.bar', '127.0.0.1,foo.bar'),
1280 ('https', 'example.com', 'http://example.com'),
1281 ('https', '//example.com', 'http://example.com'),
1282 ('https', 'socks5://example.com', 'socks5h://example.com'),
1283 ('http', 'socks://example.com', 'socks4://example.com'),
1284 ('http', 'socks4://example.com', 'socks4://example.com'),
1285 ('unrelated', '/bad/proxy', '/bad/proxy'), # clean_proxies should ignore bad proxies
1286 ])
1287 def test_clean_proxy(self, proxy_key, proxy_url, expected):
1288 # proxies should be cleaned in urlopen()
1289 with FakeRHYDL() as ydl:
1290 req = ydl.urlopen(Request('test://', proxies={proxy_key: proxy_url})).request
1291 assert req.proxies[proxy_key] == expected
1292
1293 # and should also be cleaned when building the handler
1294 env_key = f'{proxy_key.upper()}_PROXY'
1295 old_env_proxy = os.environ.get(env_key)
1296 try:
1297 os.environ[env_key] = proxy_url # ensure that provided proxies override env
1298 with FakeYDL() as ydl:
1299 rh = self.build_handler(ydl)
1300 assert rh.proxies[proxy_key] == expected
1301 finally:
1302 if old_env_proxy:
1303 os.environ[env_key] = old_env_proxy
1304
1305 def test_clean_proxy_header(self):
1306 with FakeRHYDL() as ydl:
1307 req = ydl.urlopen(Request('test://', headers={'ytdl-request-proxy': '//foo.bar'})).request
1308 assert 'ytdl-request-proxy' not in req.headers
1309 assert req.proxies == {'all': 'http://foo.bar'}
1310
1311 with FakeYDL({'http_headers': {'ytdl-request-proxy': '//foo.bar'}}) as ydl:
1312 rh = self.build_handler(ydl)
1313 assert 'ytdl-request-proxy' not in rh.headers
1314 assert rh.proxies == {'all': 'http://foo.bar'}
1315
1316 def test_clean_header(self):
1317 with FakeRHYDL() as ydl:
1318 res = ydl.urlopen(Request('test://', headers={'Youtubedl-no-compression': True}))
1319 assert 'Youtubedl-no-compression' not in res.request.headers
1320 assert res.request.headers.get('Accept-Encoding') == 'identity'
1321
1322 with FakeYDL({'http_headers': {'Youtubedl-no-compression': True}}) as ydl:
1323 rh = self.build_handler(ydl)
1324 assert 'Youtubedl-no-compression' not in rh.headers
1325 assert rh.headers.get('Accept-Encoding') == 'identity'
1326
1327 with FakeYDL({'http_headers': {'Ytdl-socks-proxy': 'socks://localhost:1080'}}) as ydl:
1328 rh = self.build_handler(ydl)
1329 assert 'Ytdl-socks-proxy' not in rh.headers
1330
1331 def test_build_handler_params(self):
1332 with FakeYDL({
1333 'http_headers': {'test': 'testtest'},
1334 'socket_timeout': 2,
1335 'proxy': 'http://127.0.0.1:8080',
1336 'source_address': '127.0.0.45',
1337 'debug_printtraffic': True,
1338 'compat_opts': ['no-certifi'],
1339 'nocheckcertificate': True,
1340 'legacyserverconnect': True,
1341 }) as ydl:
1342 rh = self.build_handler(ydl)
1343 assert rh.headers.get('test') == 'testtest'
1344 assert 'Accept' in rh.headers # ensure std_headers are still there
1345 assert rh.timeout == 2
1346 assert rh.proxies.get('all') == 'http://127.0.0.1:8080'
1347 assert rh.source_address == '127.0.0.45'
1348 assert rh.verbose is True
1349 assert rh.prefer_system_certs is True
1350 assert rh.verify is False
1351 assert rh.legacy_ssl_support is True
1352
1353 @pytest.mark.parametrize('ydl_params', [
1354 {'client_certificate': 'fakecert.crt'},
1355 {'client_certificate': 'fakecert.crt', 'client_certificate_key': 'fakekey.key'},
1356 {'client_certificate': 'fakecert.crt', 'client_certificate_key': 'fakekey.key', 'client_certificate_password': 'foobar'},
1357 {'client_certificate_key': 'fakekey.key', 'client_certificate_password': 'foobar'},
1358 ])
1359 def test_client_certificate(self, ydl_params):
1360 with FakeYDL(ydl_params) as ydl:
1361 rh = self.build_handler(ydl)
1362 assert rh._client_cert == ydl_params # XXX: Too bound to implementation
1363
1364 def test_urllib_file_urls(self):
1365 with FakeYDL({'enable_file_urls': False}) as ydl:
1366 rh = self.build_handler(ydl, UrllibRH)
1367 assert rh.enable_file_urls is False
1368
1369 with FakeYDL({'enable_file_urls': True}) as ydl:
1370 rh = self.build_handler(ydl, UrllibRH)
1371 assert rh.enable_file_urls is True
1372
1373 def test_compat_opt_prefer_urllib(self):
1374 # This assumes urllib only has a preference when this compat opt is given
1375 with FakeYDL({'compat_opts': ['prefer-legacy-http-handler']}) as ydl:
1376 director = ydl.build_request_director([UrllibRH])
1377 assert len(director.preferences) == 1
1378 assert director.preferences.pop()(UrllibRH, None)
1379
1380
1381 class TestRequest:
1382
1383 def test_query(self):
1384 req = Request('http://example.com?q=something', query={'v': 'xyz'})
1385 assert req.url == 'http://example.com?q=something&v=xyz'
1386
1387 req.update(query={'v': '123'})
1388 assert req.url == 'http://example.com?q=something&v=123'
1389 req.update(url='http://example.com', query={'v': 'xyz'})
1390 assert req.url == 'http://example.com?v=xyz'
1391
1392 def test_method(self):
1393 req = Request('http://example.com')
1394 assert req.method == 'GET'
1395 req.data = b'test'
1396 assert req.method == 'POST'
1397 req.data = None
1398 assert req.method == 'GET'
1399 req.data = b'test2'
1400 req.method = 'PUT'
1401 assert req.method == 'PUT'
1402 req.data = None
1403 assert req.method == 'PUT'
1404 with pytest.raises(TypeError):
1405 req.method = 1
1406
1407 def test_request_helpers(self):
1408 assert HEADRequest('http://example.com').method == 'HEAD'
1409 assert PUTRequest('http://example.com').method == 'PUT'
1410
1411 def test_headers(self):
1412 req = Request('http://example.com', headers={'tesT': 'test'})
1413 assert req.headers == HTTPHeaderDict({'test': 'test'})
1414 req.update(headers={'teSt2': 'test2'})
1415 assert req.headers == HTTPHeaderDict({'test': 'test', 'test2': 'test2'})
1416
1417 req.headers = new_headers = HTTPHeaderDict({'test': 'test'})
1418 assert req.headers == HTTPHeaderDict({'test': 'test'})
1419 assert req.headers is new_headers
1420
1421 # test converts dict to case insensitive dict
1422 req.headers = new_headers = {'test2': 'test2'}
1423 assert isinstance(req.headers, HTTPHeaderDict)
1424 assert req.headers is not new_headers
1425
1426 with pytest.raises(TypeError):
1427 req.headers = None
1428
1429 def test_data_type(self):
1430 req = Request('http://example.com')
1431 assert req.data is None
1432 # test bytes is allowed
1433 req.data = b'test'
1434 assert req.data == b'test'
1435 # test iterable of bytes is allowed
1436 i = [b'test', b'test2']
1437 req.data = i
1438 assert req.data == i
1439
1440 # test file-like object is allowed
1441 f = io.BytesIO(b'test')
1442 req.data = f
1443 assert req.data == f
1444
1445 # common mistake: test str not allowed
1446 with pytest.raises(TypeError):
1447 req.data = 'test'
1448 assert req.data != 'test'
1449
1450 # common mistake: test dict is not allowed
1451 with pytest.raises(TypeError):
1452 req.data = {'test': 'test'}
1453 assert req.data != {'test': 'test'}
1454
1455 def test_content_length_header(self):
1456 req = Request('http://example.com', headers={'Content-Length': '0'}, data=b'')
1457 assert req.headers.get('Content-Length') == '0'
1458
1459 req.data = b'test'
1460 assert 'Content-Length' not in req.headers
1461
1462 req = Request('http://example.com', headers={'Content-Length': '10'})
1463 assert 'Content-Length' not in req.headers
1464
1465 def test_content_type_header(self):
1466 req = Request('http://example.com', headers={'Content-Type': 'test'}, data=b'test')
1467 assert req.headers.get('Content-Type') == 'test'
1468 req.data = b'test2'
1469 assert req.headers.get('Content-Type') == 'test'
1470 req.data = None
1471 assert 'Content-Type' not in req.headers
1472 req.data = b'test3'
1473 assert req.headers.get('Content-Type') == 'application/x-www-form-urlencoded'
1474
1475 def test_update_req(self):
1476 req = Request('http://example.com')
1477 assert req.data is None
1478 assert req.method == 'GET'
1479 assert 'Content-Type' not in req.headers
1480 # Test that zero-byte payloads will be sent
1481 req.update(data=b'')
1482 assert req.data == b''
1483 assert req.method == 'POST'
1484 assert req.headers.get('Content-Type') == 'application/x-www-form-urlencoded'
1485
1486 def test_proxies(self):
1487 req = Request(url='http://example.com', proxies={'http': 'http://127.0.0.1:8080'})
1488 assert req.proxies == {'http': 'http://127.0.0.1:8080'}
1489
1490 def test_extensions(self):
1491 req = Request(url='http://example.com', extensions={'timeout': 2})
1492 assert req.extensions == {'timeout': 2}
1493
1494 def test_copy(self):
1495 req = Request(
1496 url='http://example.com',
1497 extensions={'cookiejar': CookieJar()},
1498 headers={'Accept-Encoding': 'br'},
1499 proxies={'http': 'http://127.0.0.1'},
1500 data=[b'123']
1501 )
1502 req_copy = req.copy()
1503 assert req_copy is not req
1504 assert req_copy.url == req.url
1505 assert req_copy.headers == req.headers
1506 assert req_copy.headers is not req.headers
1507 assert req_copy.proxies == req.proxies
1508 assert req_copy.proxies is not req.proxies
1509
1510 # Data is not able to be copied
1511 assert req_copy.data == req.data
1512 assert req_copy.data is req.data
1513
1514 # Shallow copy extensions
1515 assert req_copy.extensions is not req.extensions
1516 assert req_copy.extensions['cookiejar'] == req.extensions['cookiejar']
1517
1518 # Subclasses are copied by default
1519 class AnotherRequest(Request):
1520 pass
1521
1522 req = AnotherRequest(url='http://127.0.0.1')
1523 assert isinstance(req.copy(), AnotherRequest)
1524
1525 def test_url(self):
1526 req = Request(url='https://фtest.example.com/ some spaceв?ä=c',)
1527 assert req.url == 'https://xn--test-z6d.example.com/%20some%20space%D0%B2?%C3%A4=c'
1528
1529 assert Request(url='//example.com').url == 'http://example.com'
1530
1531 with pytest.raises(TypeError):
1532 Request(url='https://').url = None
1533
1534
1535 class TestResponse:
1536
1537 @pytest.mark.parametrize('reason,status,expected', [
1538 ('custom', 200, 'custom'),
1539 (None, 404, 'Not Found'), # fallback status
1540 ('', 403, 'Forbidden'),
1541 (None, 999, None)
1542 ])
1543 def test_reason(self, reason, status, expected):
1544 res = Response(io.BytesIO(b''), url='test://', headers={}, status=status, reason=reason)
1545 assert res.reason == expected
1546
1547 def test_headers(self):
1548 headers = Message()
1549 headers.add_header('Test', 'test')
1550 headers.add_header('Test', 'test2')
1551 headers.add_header('content-encoding', 'br')
1552 res = Response(io.BytesIO(b''), headers=headers, url='test://')
1553 assert res.headers.get_all('test') == ['test', 'test2']
1554 assert 'Content-Encoding' in res.headers
1555
1556 def test_get_header(self):
1557 headers = Message()
1558 headers.add_header('Set-Cookie', 'cookie1')
1559 headers.add_header('Set-cookie', 'cookie2')
1560 headers.add_header('Test', 'test')
1561 headers.add_header('Test', 'test2')
1562 res = Response(io.BytesIO(b''), headers=headers, url='test://')
1563 assert res.get_header('test') == 'test, test2'
1564 assert res.get_header('set-Cookie') == 'cookie1'
1565 assert res.get_header('notexist', 'default') == 'default'
1566
1567 def test_compat(self):
1568 res = Response(io.BytesIO(b''), url='test://', status=404, headers={'test': 'test'})
1569 with warnings.catch_warnings():
1570 warnings.simplefilter('ignore', category=DeprecationWarning)
1571 assert res.code == res.getcode() == res.status
1572 assert res.geturl() == res.url
1573 assert res.info() is res.headers
1574 assert res.getheader('test') == res.get_header('test')