]> jfr.im git - yt-dlp.git/blob - test/test_networking.py
[cleanup] Misc (#8968)
[yt-dlp.git] / test / test_networking.py
1 #!/usr/bin/env python3
2
3 # Allow direct execution
4 import os
5 import sys
6
7 import pytest
8
9 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
10
11 import gzip
12 import http.client
13 import http.cookiejar
14 import http.server
15 import io
16 import logging
17 import pathlib
18 import random
19 import ssl
20 import tempfile
21 import threading
22 import time
23 import urllib.error
24 import urllib.request
25 import warnings
26 import zlib
27 from email.message import Message
28 from http.cookiejar import CookieJar
29
30 from test.helper import FakeYDL, http_server_port, verify_address_availability
31 from yt_dlp.cookies import YoutubeDLCookieJar
32 from yt_dlp.dependencies import brotli, requests, urllib3
33 from yt_dlp.networking import (
34 HEADRequest,
35 PUTRequest,
36 Request,
37 RequestDirector,
38 RequestHandler,
39 Response,
40 )
41 from yt_dlp.networking._urllib import UrllibRH
42 from yt_dlp.networking.exceptions import (
43 CertificateVerifyError,
44 HTTPError,
45 IncompleteRead,
46 NoSupportingHandlers,
47 ProxyError,
48 RequestError,
49 SSLError,
50 TransportError,
51 UnsupportedRequest,
52 )
53 from yt_dlp.utils._utils import _YDLLogger as FakeLogger
54 from yt_dlp.utils.networking import HTTPHeaderDict
55
56 from test.conftest import validate_and_send
57
58 TEST_DIR = os.path.dirname(os.path.abspath(__file__))
59
60
61 def _build_proxy_handler(name):
62 class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
63 proxy_name = name
64
65 def log_message(self, format, *args):
66 pass
67
68 def do_GET(self):
69 self.send_response(200)
70 self.send_header('Content-Type', 'text/plain; charset=utf-8')
71 self.end_headers()
72 self.wfile.write(f'{self.proxy_name}: {self.path}'.encode())
73 return HTTPTestRequestHandler
74
75
76 class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
77 protocol_version = 'HTTP/1.1'
78
79 def log_message(self, format, *args):
80 pass
81
82 def _headers(self):
83 payload = str(self.headers).encode()
84 self.send_response(200)
85 self.send_header('Content-Type', 'application/json')
86 self.send_header('Content-Length', str(len(payload)))
87 self.end_headers()
88 self.wfile.write(payload)
89
90 def _redirect(self):
91 self.send_response(int(self.path[len('/redirect_'):]))
92 self.send_header('Location', '/method')
93 self.send_header('Content-Length', '0')
94 self.end_headers()
95
96 def _method(self, method, payload=None):
97 self.send_response(200)
98 self.send_header('Content-Length', str(len(payload or '')))
99 self.send_header('Method', method)
100 self.end_headers()
101 if payload:
102 self.wfile.write(payload)
103
104 def _status(self, status):
105 payload = f'<html>{status} NOT FOUND</html>'.encode()
106 self.send_response(int(status))
107 self.send_header('Content-Type', 'text/html; charset=utf-8')
108 self.send_header('Content-Length', str(len(payload)))
109 self.end_headers()
110 self.wfile.write(payload)
111
112 def _read_data(self):
113 if 'Content-Length' in self.headers:
114 return self.rfile.read(int(self.headers['Content-Length']))
115
116 def do_POST(self):
117 data = self._read_data() + str(self.headers).encode()
118 if self.path.startswith('/redirect_'):
119 self._redirect()
120 elif self.path.startswith('/method'):
121 self._method('POST', data)
122 elif self.path.startswith('/headers'):
123 self._headers()
124 else:
125 self._status(404)
126
127 def do_HEAD(self):
128 if self.path.startswith('/redirect_'):
129 self._redirect()
130 elif self.path.startswith('/method'):
131 self._method('HEAD')
132 else:
133 self._status(404)
134
135 def do_PUT(self):
136 data = self._read_data() + str(self.headers).encode()
137 if self.path.startswith('/redirect_'):
138 self._redirect()
139 elif self.path.startswith('/method'):
140 self._method('PUT', data)
141 else:
142 self._status(404)
143
144 def do_GET(self):
145 if self.path == '/video.html':
146 payload = b'<html><video src="/vid.mp4" /></html>'
147 self.send_response(200)
148 self.send_header('Content-Type', 'text/html; charset=utf-8')
149 self.send_header('Content-Length', str(len(payload)))
150 self.end_headers()
151 self.wfile.write(payload)
152 elif self.path == '/vid.mp4':
153 payload = b'\x00\x00\x00\x00\x20\x66\x74[video]'
154 self.send_response(200)
155 self.send_header('Content-Type', 'video/mp4')
156 self.send_header('Content-Length', str(len(payload)))
157 self.end_headers()
158 self.wfile.write(payload)
159 elif self.path == '/%E4%B8%AD%E6%96%87.html':
160 payload = b'<html><video src="/vid.mp4" /></html>'
161 self.send_response(200)
162 self.send_header('Content-Type', 'text/html; charset=utf-8')
163 self.send_header('Content-Length', str(len(payload)))
164 self.end_headers()
165 self.wfile.write(payload)
166 elif self.path == '/%c7%9f':
167 payload = b'<html><video src="/vid.mp4" /></html>'
168 self.send_response(200)
169 self.send_header('Content-Type', 'text/html; charset=utf-8')
170 self.send_header('Content-Length', str(len(payload)))
171 self.end_headers()
172 self.wfile.write(payload)
173 elif self.path.startswith('/redirect_loop'):
174 self.send_response(301)
175 self.send_header('Location', self.path)
176 self.send_header('Content-Length', '0')
177 self.end_headers()
178 elif self.path == '/redirect_dotsegments':
179 self.send_response(301)
180 # redirect to /headers but with dot segments before
181 self.send_header('Location', '/a/b/./../../headers')
182 self.send_header('Content-Length', '0')
183 self.end_headers()
184 elif self.path == '/redirect_dotsegments_absolute':
185 self.send_response(301)
186 # redirect to /headers but with dot segments before - absolute url
187 self.send_header('Location', f'http://127.0.0.1:{http_server_port(self.server)}/a/b/./../../headers')
188 self.send_header('Content-Length', '0')
189 self.end_headers()
190 elif self.path.startswith('/redirect_'):
191 self._redirect()
192 elif self.path.startswith('/method'):
193 self._method('GET', str(self.headers).encode())
194 elif self.path.startswith('/headers'):
195 self._headers()
196 elif self.path.startswith('/308-to-headers'):
197 self.send_response(308)
198 self.send_header('Location', '/headers')
199 self.send_header('Content-Length', '0')
200 self.end_headers()
201 elif self.path == '/trailing_garbage':
202 payload = b'<html><video src="/vid.mp4" /></html>'
203 self.send_response(200)
204 self.send_header('Content-Type', 'text/html; charset=utf-8')
205 self.send_header('Content-Encoding', 'gzip')
206 buf = io.BytesIO()
207 with gzip.GzipFile(fileobj=buf, mode='wb') as f:
208 f.write(payload)
209 compressed = buf.getvalue() + b'trailing garbage'
210 self.send_header('Content-Length', str(len(compressed)))
211 self.end_headers()
212 self.wfile.write(compressed)
213 elif self.path == '/302-non-ascii-redirect':
214 new_url = f'http://127.0.0.1:{http_server_port(self.server)}/中文.html'
215 self.send_response(301)
216 self.send_header('Location', new_url)
217 self.send_header('Content-Length', '0')
218 self.end_headers()
219 elif self.path == '/content-encoding':
220 encodings = self.headers.get('ytdl-encoding', '')
221 payload = b'<html><video src="/vid.mp4" /></html>'
222 for encoding in filter(None, (e.strip() for e in encodings.split(','))):
223 if encoding == 'br' and brotli:
224 payload = brotli.compress(payload)
225 elif encoding == 'gzip':
226 buf = io.BytesIO()
227 with gzip.GzipFile(fileobj=buf, mode='wb') as f:
228 f.write(payload)
229 payload = buf.getvalue()
230 elif encoding == 'deflate':
231 payload = zlib.compress(payload)
232 elif encoding == 'unsupported':
233 payload = b'raw'
234 break
235 else:
236 self._status(415)
237 return
238 self.send_response(200)
239 self.send_header('Content-Encoding', encodings)
240 self.send_header('Content-Length', str(len(payload)))
241 self.end_headers()
242 self.wfile.write(payload)
243 elif self.path.startswith('/gen_'):
244 payload = b'<html></html>'
245 self.send_response(int(self.path[len('/gen_'):]))
246 self.send_header('Content-Type', 'text/html; charset=utf-8')
247 self.send_header('Content-Length', str(len(payload)))
248 self.end_headers()
249 self.wfile.write(payload)
250 elif self.path.startswith('/incompleteread'):
251 payload = b'<html></html>'
252 self.send_response(200)
253 self.send_header('Content-Type', 'text/html; charset=utf-8')
254 self.send_header('Content-Length', '234234')
255 self.end_headers()
256 self.wfile.write(payload)
257 self.finish()
258 elif self.path.startswith('/timeout_'):
259 time.sleep(int(self.path[len('/timeout_'):]))
260 self._headers()
261 elif self.path == '/source_address':
262 payload = str(self.client_address[0]).encode()
263 self.send_response(200)
264 self.send_header('Content-Type', 'text/html; charset=utf-8')
265 self.send_header('Content-Length', str(len(payload)))
266 self.end_headers()
267 self.wfile.write(payload)
268 self.finish()
269 else:
270 self._status(404)
271
272 def send_header(self, keyword, value):
273 """
274 Forcibly allow HTTP server to send non percent-encoded non-ASCII characters in headers.
275 This is against what is defined in RFC 3986, however we need to test we support this
276 since some sites incorrectly do this.
277 """
278 if keyword.lower() == 'connection':
279 return super().send_header(keyword, value)
280
281 if not hasattr(self, '_headers_buffer'):
282 self._headers_buffer = []
283
284 self._headers_buffer.append(f'{keyword}: {value}\r\n'.encode())
285
286
287 class TestRequestHandlerBase:
288 @classmethod
289 def setup_class(cls):
290 cls.http_httpd = http.server.ThreadingHTTPServer(
291 ('127.0.0.1', 0), HTTPTestRequestHandler)
292 cls.http_port = http_server_port(cls.http_httpd)
293 cls.http_server_thread = threading.Thread(target=cls.http_httpd.serve_forever)
294 # FIXME: we should probably stop the http server thread after each test
295 # See: https://github.com/yt-dlp/yt-dlp/pull/7094#discussion_r1199746041
296 cls.http_server_thread.daemon = True
297 cls.http_server_thread.start()
298
299 # HTTPS server
300 certfn = os.path.join(TEST_DIR, 'testcert.pem')
301 cls.https_httpd = http.server.ThreadingHTTPServer(
302 ('127.0.0.1', 0), HTTPTestRequestHandler)
303 sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
304 sslctx.load_cert_chain(certfn, None)
305 cls.https_httpd.socket = sslctx.wrap_socket(cls.https_httpd.socket, server_side=True)
306 cls.https_port = http_server_port(cls.https_httpd)
307 cls.https_server_thread = threading.Thread(target=cls.https_httpd.serve_forever)
308 cls.https_server_thread.daemon = True
309 cls.https_server_thread.start()
310
311
312 class TestHTTPRequestHandler(TestRequestHandlerBase):
313 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
314 def test_verify_cert(self, handler):
315 with handler() as rh:
316 with pytest.raises(CertificateVerifyError):
317 validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
318
319 with handler(verify=False) as rh:
320 r = validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
321 assert r.status == 200
322 r.close()
323
324 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
325 def test_ssl_error(self, handler):
326 # HTTPS server with too old TLS version
327 # XXX: is there a better way to test this than to create a new server?
328 https_httpd = http.server.ThreadingHTTPServer(
329 ('127.0.0.1', 0), HTTPTestRequestHandler)
330 sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
331 https_httpd.socket = sslctx.wrap_socket(https_httpd.socket, server_side=True)
332 https_port = http_server_port(https_httpd)
333 https_server_thread = threading.Thread(target=https_httpd.serve_forever)
334 https_server_thread.daemon = True
335 https_server_thread.start()
336
337 with handler(verify=False) as rh:
338 with pytest.raises(SSLError, match=r'ssl(?:v3|/tls) alert handshake failure') as exc_info:
339 validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers'))
340 assert not issubclass(exc_info.type, CertificateVerifyError)
341
342 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
343 def test_percent_encode(self, handler):
344 with handler() as rh:
345 # Unicode characters should be encoded with uppercase percent-encoding
346 res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/中文.html'))
347 assert res.status == 200
348 res.close()
349 # don't normalize existing percent encodings
350 res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/%c7%9f'))
351 assert res.status == 200
352 res.close()
353
354 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
355 @pytest.mark.parametrize('path', [
356 '/a/b/./../../headers',
357 '/redirect_dotsegments',
358 # https://github.com/yt-dlp/yt-dlp/issues/9020
359 '/redirect_dotsegments_absolute',
360 ])
361 def test_remove_dot_segments(self, handler, path):
362 with handler(verbose=True) as rh:
363 # This isn't a comprehensive test,
364 # but it should be enough to check whether the handler is removing dot segments in required scenarios
365 res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}{path}'))
366 assert res.status == 200
367 assert res.url == f'http://127.0.0.1:{self.http_port}/headers'
368 res.close()
369
370 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
371 def test_unicode_path_redirection(self, handler):
372 with handler() as rh:
373 r = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/302-non-ascii-redirect'))
374 assert r.url == f'http://127.0.0.1:{self.http_port}/%E4%B8%AD%E6%96%87.html'
375 r.close()
376
377 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
378 def test_raise_http_error(self, handler):
379 with handler() as rh:
380 for bad_status in (400, 500, 599, 302):
381 with pytest.raises(HTTPError):
382 validate_and_send(rh, Request('http://127.0.0.1:%d/gen_%d' % (self.http_port, bad_status)))
383
384 # Should not raise an error
385 validate_and_send(rh, Request('http://127.0.0.1:%d/gen_200' % self.http_port)).close()
386
387 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
388 def test_response_url(self, handler):
389 with handler() as rh:
390 # Response url should be that of the last url in redirect chain
391 res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_301'))
392 assert res.url == f'http://127.0.0.1:{self.http_port}/method'
393 res.close()
394 res2 = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_200'))
395 assert res2.url == f'http://127.0.0.1:{self.http_port}/gen_200'
396 res2.close()
397
398 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
399 def test_redirect(self, handler):
400 with handler() as rh:
401 def do_req(redirect_status, method, assert_no_content=False):
402 data = b'testdata' if method in ('POST', 'PUT') else None
403 res = validate_and_send(
404 rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_{redirect_status}', method=method, data=data))
405
406 headers = b''
407 data_sent = b''
408 if data is not None:
409 data_sent += res.read(len(data))
410 if data_sent != data:
411 headers += data_sent
412 data_sent = b''
413
414 headers += res.read()
415
416 if assert_no_content or data is None:
417 assert b'Content-Type' not in headers
418 assert b'Content-Length' not in headers
419 else:
420 assert b'Content-Type' in headers
421 assert b'Content-Length' in headers
422
423 return data_sent.decode(), res.headers.get('method', '')
424
425 # A 303 must either use GET or HEAD for subsequent request
426 assert do_req(303, 'POST', True) == ('', 'GET')
427 assert do_req(303, 'HEAD') == ('', 'HEAD')
428
429 assert do_req(303, 'PUT', True) == ('', 'GET')
430
431 # 301 and 302 turn POST only into a GET
432 assert do_req(301, 'POST', True) == ('', 'GET')
433 assert do_req(301, 'HEAD') == ('', 'HEAD')
434 assert do_req(302, 'POST', True) == ('', 'GET')
435 assert do_req(302, 'HEAD') == ('', 'HEAD')
436
437 assert do_req(301, 'PUT') == ('testdata', 'PUT')
438 assert do_req(302, 'PUT') == ('testdata', 'PUT')
439
440 # 307 and 308 should not change method
441 for m in ('POST', 'PUT'):
442 assert do_req(307, m) == ('testdata', m)
443 assert do_req(308, m) == ('testdata', m)
444
445 assert do_req(307, 'HEAD') == ('', 'HEAD')
446 assert do_req(308, 'HEAD') == ('', 'HEAD')
447
448 # These should not redirect and instead raise an HTTPError
449 for code in (300, 304, 305, 306):
450 with pytest.raises(HTTPError):
451 do_req(code, 'GET')
452
453 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
454 def test_request_cookie_header(self, handler):
455 # We should accept a Cookie header being passed as in normal headers and handle it appropriately.
456 with handler() as rh:
457 # Specified Cookie header should be used
458 res = validate_and_send(
459 rh, Request(
460 f'http://127.0.0.1:{self.http_port}/headers',
461 headers={'Cookie': 'test=test'})).read().decode()
462 assert 'Cookie: test=test' in res
463
464 # Specified Cookie header should be removed on any redirect
465 res = validate_and_send(
466 rh, Request(
467 f'http://127.0.0.1:{self.http_port}/308-to-headers',
468 headers={'Cookie': 'test=test'})).read().decode()
469 assert 'Cookie: test=test' not in res
470
471 # Specified Cookie header should override global cookiejar for that request
472 cookiejar = YoutubeDLCookieJar()
473 cookiejar.set_cookie(http.cookiejar.Cookie(
474 version=0, name='test', value='ytdlp', port=None, port_specified=False,
475 domain='127.0.0.1', domain_specified=True, domain_initial_dot=False, path='/',
476 path_specified=True, secure=False, expires=None, discard=False, comment=None,
477 comment_url=None, rest={}))
478
479 with handler(cookiejar=cookiejar) as rh:
480 data = validate_and_send(
481 rh, Request(f'http://127.0.0.1:{self.http_port}/headers', headers={'cookie': 'test=test'})).read()
482 assert b'Cookie: test=ytdlp' not in data
483 assert b'Cookie: test=test' in data
484
485 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
486 def test_redirect_loop(self, handler):
487 with handler() as rh:
488 with pytest.raises(HTTPError, match='redirect loop'):
489 validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_loop'))
490
491 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
492 def test_incompleteread(self, handler):
493 with handler(timeout=2) as rh:
494 with pytest.raises(IncompleteRead):
495 validate_and_send(rh, Request('http://127.0.0.1:%d/incompleteread' % self.http_port)).read()
496
497 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
498 def test_cookies(self, handler):
499 cookiejar = YoutubeDLCookieJar()
500 cookiejar.set_cookie(http.cookiejar.Cookie(
501 0, 'test', 'ytdlp', None, False, '127.0.0.1', True,
502 False, '/headers', True, False, None, False, None, None, {}))
503
504 with handler(cookiejar=cookiejar) as rh:
505 data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read()
506 assert b'Cookie: test=ytdlp' in data
507
508 # Per request
509 with handler() as rh:
510 data = validate_and_send(
511 rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions={'cookiejar': cookiejar})).read()
512 assert b'Cookie: test=ytdlp' in data
513
514 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
515 def test_headers(self, handler):
516
517 with handler(headers=HTTPHeaderDict({'test1': 'test', 'test2': 'test2'})) as rh:
518 # Global Headers
519 data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read()
520 assert b'Test1: test' in data
521
522 # Per request headers, merged with global
523 data = validate_and_send(rh, Request(
524 f'http://127.0.0.1:{self.http_port}/headers', headers={'test2': 'changed', 'test3': 'test3'})).read()
525 assert b'Test1: test' in data
526 assert b'Test2: changed' in data
527 assert b'Test2: test2' not in data
528 assert b'Test3: test3' in data
529
530 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
531 def test_timeout(self, handler):
532 with handler() as rh:
533 # Default timeout is 20 seconds, so this should go through
534 validate_and_send(
535 rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_3'))
536
537 with handler(timeout=0.5) as rh:
538 with pytest.raises(TransportError):
539 validate_and_send(
540 rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1'))
541
542 # Per request timeout, should override handler timeout
543 validate_and_send(
544 rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1', extensions={'timeout': 4}))
545
546 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
547 def test_source_address(self, handler):
548 source_address = f'127.0.0.{random.randint(5, 255)}'
549 # on some systems these loopback addresses we need for testing may not be available
550 # see: https://github.com/yt-dlp/yt-dlp/issues/8890
551 verify_address_availability(source_address)
552 with handler(source_address=source_address) as rh:
553 data = validate_and_send(
554 rh, Request(f'http://127.0.0.1:{self.http_port}/source_address')).read().decode()
555 assert source_address == data
556
557 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
558 def test_gzip_trailing_garbage(self, handler):
559 with handler() as rh:
560 data = validate_and_send(rh, Request(f'http://localhost:{self.http_port}/trailing_garbage')).read().decode()
561 assert data == '<html><video src="/vid.mp4" /></html>'
562
563 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
564 @pytest.mark.skipif(not brotli, reason='brotli support is not installed')
565 def test_brotli(self, handler):
566 with handler() as rh:
567 res = validate_and_send(
568 rh, Request(
569 f'http://127.0.0.1:{self.http_port}/content-encoding',
570 headers={'ytdl-encoding': 'br'}))
571 assert res.headers.get('Content-Encoding') == 'br'
572 assert res.read() == b'<html><video src="/vid.mp4" /></html>'
573
574 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
575 def test_deflate(self, handler):
576 with handler() as rh:
577 res = validate_and_send(
578 rh, Request(
579 f'http://127.0.0.1:{self.http_port}/content-encoding',
580 headers={'ytdl-encoding': 'deflate'}))
581 assert res.headers.get('Content-Encoding') == 'deflate'
582 assert res.read() == b'<html><video src="/vid.mp4" /></html>'
583
584 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
585 def test_gzip(self, handler):
586 with handler() as rh:
587 res = validate_and_send(
588 rh, Request(
589 f'http://127.0.0.1:{self.http_port}/content-encoding',
590 headers={'ytdl-encoding': 'gzip'}))
591 assert res.headers.get('Content-Encoding') == 'gzip'
592 assert res.read() == b'<html><video src="/vid.mp4" /></html>'
593
594 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
595 def test_multiple_encodings(self, handler):
596 with handler() as rh:
597 for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'):
598 res = validate_and_send(
599 rh, Request(
600 f'http://127.0.0.1:{self.http_port}/content-encoding',
601 headers={'ytdl-encoding': pair}))
602 assert res.headers.get('Content-Encoding') == pair
603 assert res.read() == b'<html><video src="/vid.mp4" /></html>'
604
605 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
606 def test_unsupported_encoding(self, handler):
607 with handler() as rh:
608 res = validate_and_send(
609 rh, Request(
610 f'http://127.0.0.1:{self.http_port}/content-encoding',
611 headers={'ytdl-encoding': 'unsupported'}))
612 assert res.headers.get('Content-Encoding') == 'unsupported'
613 assert res.read() == b'raw'
614
615 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
616 def test_read(self, handler):
617 with handler() as rh:
618 res = validate_and_send(
619 rh, Request(f'http://127.0.0.1:{self.http_port}/headers'))
620 assert res.readable()
621 assert res.read(1) == b'H'
622 assert res.read(3) == b'ost'
623
624
625 class TestHTTPProxy(TestRequestHandlerBase):
626 @classmethod
627 def setup_class(cls):
628 super().setup_class()
629 # HTTP Proxy server
630 cls.proxy = http.server.ThreadingHTTPServer(
631 ('127.0.0.1', 0), _build_proxy_handler('normal'))
632 cls.proxy_port = http_server_port(cls.proxy)
633 cls.proxy_thread = threading.Thread(target=cls.proxy.serve_forever)
634 cls.proxy_thread.daemon = True
635 cls.proxy_thread.start()
636
637 # Geo proxy server
638 cls.geo_proxy = http.server.ThreadingHTTPServer(
639 ('127.0.0.1', 0), _build_proxy_handler('geo'))
640 cls.geo_port = http_server_port(cls.geo_proxy)
641 cls.geo_proxy_thread = threading.Thread(target=cls.geo_proxy.serve_forever)
642 cls.geo_proxy_thread.daemon = True
643 cls.geo_proxy_thread.start()
644
645 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
646 def test_http_proxy(self, handler):
647 http_proxy = f'http://127.0.0.1:{self.proxy_port}'
648 geo_proxy = f'http://127.0.0.1:{self.geo_port}'
649
650 # Test global http proxy
651 # Test per request http proxy
652 # Test per request http proxy disables proxy
653 url = 'http://foo.com/bar'
654
655 # Global HTTP proxy
656 with handler(proxies={'http': http_proxy}) as rh:
657 res = validate_and_send(rh, Request(url)).read().decode()
658 assert res == f'normal: {url}'
659
660 # Per request proxy overrides global
661 res = validate_and_send(rh, Request(url, proxies={'http': geo_proxy})).read().decode()
662 assert res == f'geo: {url}'
663
664 # and setting to None disables all proxies for that request
665 real_url = f'http://127.0.0.1:{self.http_port}/headers'
666 res = validate_and_send(
667 rh, Request(real_url, proxies={'http': None})).read().decode()
668 assert res != f'normal: {real_url}'
669 assert 'Accept' in res
670
671 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
672 def test_noproxy(self, handler):
673 with handler(proxies={'proxy': f'http://127.0.0.1:{self.proxy_port}'}) as rh:
674 # NO_PROXY
675 for no_proxy in (f'127.0.0.1:{self.http_port}', '127.0.0.1', 'localhost'):
676 nop_response = validate_and_send(
677 rh, Request(f'http://127.0.0.1:{self.http_port}/headers', proxies={'no': no_proxy})).read().decode(
678 'utf-8')
679 assert 'Accept' in nop_response
680
681 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
682 def test_allproxy(self, handler):
683 url = 'http://foo.com/bar'
684 with handler() as rh:
685 response = validate_and_send(rh, Request(url, proxies={'all': f'http://127.0.0.1:{self.proxy_port}'})).read().decode(
686 'utf-8')
687 assert response == f'normal: {url}'
688
689 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
690 def test_http_proxy_with_idn(self, handler):
691 with handler(proxies={
692 'http': f'http://127.0.0.1:{self.proxy_port}',
693 }) as rh:
694 url = 'http://中文.tw/'
695 response = rh.send(Request(url)).read().decode()
696 # b'xn--fiq228c' is '中文'.encode('idna')
697 assert response == 'normal: http://xn--fiq228c.tw/'
698
699
700 class TestClientCertificate:
701
702 @classmethod
703 def setup_class(cls):
704 certfn = os.path.join(TEST_DIR, 'testcert.pem')
705 cls.certdir = os.path.join(TEST_DIR, 'testdata', 'certificate')
706 cacertfn = os.path.join(cls.certdir, 'ca.crt')
707 cls.httpd = http.server.ThreadingHTTPServer(('127.0.0.1', 0), HTTPTestRequestHandler)
708 sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
709 sslctx.verify_mode = ssl.CERT_REQUIRED
710 sslctx.load_verify_locations(cafile=cacertfn)
711 sslctx.load_cert_chain(certfn, None)
712 cls.httpd.socket = sslctx.wrap_socket(cls.httpd.socket, server_side=True)
713 cls.port = http_server_port(cls.httpd)
714 cls.server_thread = threading.Thread(target=cls.httpd.serve_forever)
715 cls.server_thread.daemon = True
716 cls.server_thread.start()
717
718 def _run_test(self, handler, **handler_kwargs):
719 with handler(
720 # Disable client-side validation of unacceptable self-signed testcert.pem
721 # The test is of a check on the server side, so unaffected
722 verify=False,
723 **handler_kwargs,
724 ) as rh:
725 validate_and_send(rh, Request(f'https://127.0.0.1:{self.port}/video.html')).read().decode()
726
727 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
728 def test_certificate_combined_nopass(self, handler):
729 self._run_test(handler, client_cert={
730 'client_certificate': os.path.join(self.certdir, 'clientwithkey.crt'),
731 })
732
733 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
734 def test_certificate_nocombined_nopass(self, handler):
735 self._run_test(handler, client_cert={
736 'client_certificate': os.path.join(self.certdir, 'client.crt'),
737 'client_certificate_key': os.path.join(self.certdir, 'client.key'),
738 })
739
740 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
741 def test_certificate_combined_pass(self, handler):
742 self._run_test(handler, client_cert={
743 'client_certificate': os.path.join(self.certdir, 'clientwithencryptedkey.crt'),
744 'client_certificate_password': 'foobar',
745 })
746
747 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
748 def test_certificate_nocombined_pass(self, handler):
749 self._run_test(handler, client_cert={
750 'client_certificate': os.path.join(self.certdir, 'client.crt'),
751 'client_certificate_key': os.path.join(self.certdir, 'clientencrypted.key'),
752 'client_certificate_password': 'foobar',
753 })
754
755
756 class TestRequestHandlerMisc:
757 """Misc generic tests for request handlers, not related to request or validation testing"""
758 @pytest.mark.parametrize('handler,logger_name', [
759 ('Requests', 'urllib3'),
760 ('Websockets', 'websockets.client'),
761 ('Websockets', 'websockets.server')
762 ], indirect=['handler'])
763 def test_remove_logging_handler(self, handler, logger_name):
764 # Ensure any logging handlers, which may contain a YoutubeDL instance,
765 # are removed when we close the request handler
766 # See: https://github.com/yt-dlp/yt-dlp/issues/8922
767 logging_handlers = logging.getLogger(logger_name).handlers
768 before_count = len(logging_handlers)
769 rh = handler()
770 assert len(logging_handlers) == before_count + 1
771 rh.close()
772 assert len(logging_handlers) == before_count
773
774
775 class TestUrllibRequestHandler(TestRequestHandlerBase):
776 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
777 def test_file_urls(self, handler):
778 # See https://github.com/ytdl-org/youtube-dl/issues/8227
779 tf = tempfile.NamedTemporaryFile(delete=False)
780 tf.write(b'foobar')
781 tf.close()
782 req = Request(pathlib.Path(tf.name).as_uri())
783 with handler() as rh:
784 with pytest.raises(UnsupportedRequest):
785 rh.validate(req)
786
787 # Test that urllib never loaded FileHandler
788 with pytest.raises(TransportError):
789 rh.send(req)
790
791 with handler(enable_file_urls=True) as rh:
792 res = validate_and_send(rh, req)
793 assert res.read() == b'foobar'
794 res.close()
795
796 os.unlink(tf.name)
797
798 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
799 def test_http_error_returns_content(self, handler):
800 # urllib HTTPError will try close the underlying response if reference to the HTTPError object is lost
801 def get_response():
802 with handler() as rh:
803 # headers url
804 try:
805 validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_404'))
806 except HTTPError as e:
807 return e.response
808
809 assert get_response().read() == b'<html></html>'
810
811 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
812 def test_verify_cert_error_text(self, handler):
813 # Check the output of the error message
814 with handler() as rh:
815 with pytest.raises(
816 CertificateVerifyError,
817 match=r'\[SSL: CERTIFICATE_VERIFY_FAILED\] certificate verify failed: self.signed certificate'
818 ):
819 validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
820
821 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
822 @pytest.mark.parametrize('req,match,version_check', [
823 # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1256
824 # bpo-39603: Check implemented in 3.7.9+, 3.8.5+
825 (
826 Request('http://127.0.0.1', method='GET\n'),
827 'method can\'t contain control characters',
828 lambda v: v < (3, 7, 9) or (3, 8, 0) <= v < (3, 8, 5)
829 ),
830 # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1265
831 # bpo-38576: Check implemented in 3.7.8+, 3.8.3+
832 (
833 Request('http://127.0.0. 1', method='GET'),
834 'URL can\'t contain control characters',
835 lambda v: v < (3, 7, 8) or (3, 8, 0) <= v < (3, 8, 3)
836 ),
837 # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1288C31-L1288C50
838 (Request('http://127.0.0.1', headers={'foo\n': 'bar'}), 'Invalid header name', None),
839 ])
840 def test_httplib_validation_errors(self, handler, req, match, version_check):
841 if version_check and version_check(sys.version_info):
842 pytest.skip(f'Python {sys.version} version does not have the required validation for this test.')
843
844 with handler() as rh:
845 with pytest.raises(RequestError, match=match) as exc_info:
846 validate_and_send(rh, req)
847 assert not isinstance(exc_info.value, TransportError)
848
849
850 @pytest.mark.parametrize('handler', ['Requests'], indirect=True)
851 class TestRequestsRequestHandler(TestRequestHandlerBase):
852 @pytest.mark.parametrize('raised,expected', [
853 (lambda: requests.exceptions.ConnectTimeout(), TransportError),
854 (lambda: requests.exceptions.ReadTimeout(), TransportError),
855 (lambda: requests.exceptions.Timeout(), TransportError),
856 (lambda: requests.exceptions.ConnectionError(), TransportError),
857 (lambda: requests.exceptions.ProxyError(), ProxyError),
858 (lambda: requests.exceptions.SSLError('12[CERTIFICATE_VERIFY_FAILED]34'), CertificateVerifyError),
859 (lambda: requests.exceptions.SSLError(), SSLError),
860 (lambda: requests.exceptions.InvalidURL(), RequestError),
861 (lambda: requests.exceptions.InvalidHeader(), RequestError),
862 # catch-all: https://github.com/psf/requests/blob/main/src/requests/adapters.py#L535
863 (lambda: urllib3.exceptions.HTTPError(), TransportError),
864 (lambda: requests.exceptions.RequestException(), RequestError)
865 # (lambda: requests.exceptions.TooManyRedirects(), HTTPError) - Needs a response object
866 ])
867 def test_request_error_mapping(self, handler, monkeypatch, raised, expected):
868 with handler() as rh:
869 def mock_get_instance(*args, **kwargs):
870 class MockSession:
871 def request(self, *args, **kwargs):
872 raise raised()
873 return MockSession()
874
875 monkeypatch.setattr(rh, '_get_instance', mock_get_instance)
876
877 with pytest.raises(expected) as exc_info:
878 rh.send(Request('http://fake'))
879
880 assert exc_info.type is expected
881
882 @pytest.mark.parametrize('raised,expected,match', [
883 (lambda: urllib3.exceptions.SSLError(), SSLError, None),
884 (lambda: urllib3.exceptions.TimeoutError(), TransportError, None),
885 (lambda: urllib3.exceptions.ReadTimeoutError(None, None, None), TransportError, None),
886 (lambda: urllib3.exceptions.ProtocolError(), TransportError, None),
887 (lambda: urllib3.exceptions.DecodeError(), TransportError, None),
888 (lambda: urllib3.exceptions.HTTPError(), TransportError, None), # catch-all
889 (
890 lambda: urllib3.exceptions.ProtocolError('error', http.client.IncompleteRead(partial=b'abc', expected=4)),
891 IncompleteRead,
892 '3 bytes read, 4 more expected'
893 ),
894 (
895 lambda: urllib3.exceptions.ProtocolError('error', urllib3.exceptions.IncompleteRead(partial=3, expected=5)),
896 IncompleteRead,
897 '3 bytes read, 5 more expected'
898 ),
899 ])
900 def test_response_error_mapping(self, handler, monkeypatch, raised, expected, match):
901 from requests.models import Response as RequestsResponse
902 from urllib3.response import HTTPResponse as Urllib3Response
903
904 from yt_dlp.networking._requests import RequestsResponseAdapter
905 requests_res = RequestsResponse()
906 requests_res.raw = Urllib3Response(body=b'', status=200)
907 res = RequestsResponseAdapter(requests_res)
908
909 def mock_read(*args, **kwargs):
910 raise raised()
911 monkeypatch.setattr(res.fp, 'read', mock_read)
912
913 with pytest.raises(expected, match=match) as exc_info:
914 res.read()
915
916 assert exc_info.type is expected
917
918 def test_close(self, handler, monkeypatch):
919 rh = handler()
920 session = rh._get_instance(cookiejar=rh.cookiejar)
921 called = False
922 original_close = session.close
923
924 def mock_close(*args, **kwargs):
925 nonlocal called
926 called = True
927 return original_close(*args, **kwargs)
928
929 monkeypatch.setattr(session, 'close', mock_close)
930 rh.close()
931 assert called
932
933
934 def run_validation(handler, error, req, **handler_kwargs):
935 with handler(**handler_kwargs) as rh:
936 if error:
937 with pytest.raises(error):
938 rh.validate(req)
939 else:
940 rh.validate(req)
941
942
943 class TestRequestHandlerValidation:
944
945 class ValidationRH(RequestHandler):
946 def _send(self, request):
947 raise RequestError('test')
948
949 class NoCheckRH(ValidationRH):
950 _SUPPORTED_FEATURES = None
951 _SUPPORTED_PROXY_SCHEMES = None
952 _SUPPORTED_URL_SCHEMES = None
953
954 def _check_extensions(self, extensions):
955 extensions.clear()
956
957 class HTTPSupportedRH(ValidationRH):
958 _SUPPORTED_URL_SCHEMES = ('http',)
959
960 URL_SCHEME_TESTS = [
961 # scheme, expected to fail, handler kwargs
962 ('Urllib', [
963 ('http', False, {}),
964 ('https', False, {}),
965 ('data', False, {}),
966 ('ftp', False, {}),
967 ('file', UnsupportedRequest, {}),
968 ('file', False, {'enable_file_urls': True}),
969 ]),
970 ('Requests', [
971 ('http', False, {}),
972 ('https', False, {}),
973 ]),
974 ('Websockets', [
975 ('ws', False, {}),
976 ('wss', False, {}),
977 ]),
978 (NoCheckRH, [('http', False, {})]),
979 (ValidationRH, [('http', UnsupportedRequest, {})])
980 ]
981
982 PROXY_SCHEME_TESTS = [
983 # scheme, expected to fail
984 ('Urllib', 'http', [
985 ('http', False),
986 ('https', UnsupportedRequest),
987 ('socks4', False),
988 ('socks4a', False),
989 ('socks5', False),
990 ('socks5h', False),
991 ('socks', UnsupportedRequest),
992 ]),
993 ('Requests', 'http', [
994 ('http', False),
995 ('https', False),
996 ('socks4', False),
997 ('socks4a', False),
998 ('socks5', False),
999 ('socks5h', False),
1000 ]),
1001 (NoCheckRH, 'http', [('http', False)]),
1002 (HTTPSupportedRH, 'http', [('http', UnsupportedRequest)]),
1003 ('Websockets', 'ws', [('http', UnsupportedRequest)]),
1004 (NoCheckRH, 'http', [('http', False)]),
1005 (HTTPSupportedRH, 'http', [('http', UnsupportedRequest)]),
1006 ]
1007
1008 PROXY_KEY_TESTS = [
1009 # key, expected to fail
1010 ('Urllib', [
1011 ('all', False),
1012 ('unrelated', False),
1013 ]),
1014 ('Requests', [
1015 ('all', False),
1016 ('unrelated', False),
1017 ]),
1018 (NoCheckRH, [('all', False)]),
1019 (HTTPSupportedRH, [('all', UnsupportedRequest)]),
1020 (HTTPSupportedRH, [('no', UnsupportedRequest)]),
1021 ]
1022
1023 EXTENSION_TESTS = [
1024 ('Urllib', 'http', [
1025 ({'cookiejar': 'notacookiejar'}, AssertionError),
1026 ({'cookiejar': YoutubeDLCookieJar()}, False),
1027 ({'cookiejar': CookieJar()}, AssertionError),
1028 ({'timeout': 1}, False),
1029 ({'timeout': 'notatimeout'}, AssertionError),
1030 ({'unsupported': 'value'}, UnsupportedRequest),
1031 ]),
1032 ('Requests', 'http', [
1033 ({'cookiejar': 'notacookiejar'}, AssertionError),
1034 ({'cookiejar': YoutubeDLCookieJar()}, False),
1035 ({'timeout': 1}, False),
1036 ({'timeout': 'notatimeout'}, AssertionError),
1037 ({'unsupported': 'value'}, UnsupportedRequest),
1038 ]),
1039 (NoCheckRH, 'http', [
1040 ({'cookiejar': 'notacookiejar'}, False),
1041 ({'somerandom': 'test'}, False), # but any extension is allowed through
1042 ]),
1043 ('Websockets', 'ws', [
1044 ({'cookiejar': YoutubeDLCookieJar()}, False),
1045 ({'timeout': 2}, False),
1046 ]),
1047 ]
1048
1049 @pytest.mark.parametrize('handler,scheme,fail,handler_kwargs', [
1050 (handler_tests[0], scheme, fail, handler_kwargs)
1051 for handler_tests in URL_SCHEME_TESTS
1052 for scheme, fail, handler_kwargs in handler_tests[1]
1053
1054 ], indirect=['handler'])
1055 def test_url_scheme(self, handler, scheme, fail, handler_kwargs):
1056 run_validation(handler, fail, Request(f'{scheme}://'), **(handler_kwargs or {}))
1057
1058 @pytest.mark.parametrize('handler,fail', [('Urllib', False), ('Requests', False)], indirect=['handler'])
1059 def test_no_proxy(self, handler, fail):
1060 run_validation(handler, fail, Request('http://', proxies={'no': '127.0.0.1,github.com'}))
1061 run_validation(handler, fail, Request('http://'), proxies={'no': '127.0.0.1,github.com'})
1062
1063 @pytest.mark.parametrize('handler,proxy_key,fail', [
1064 (handler_tests[0], proxy_key, fail)
1065 for handler_tests in PROXY_KEY_TESTS
1066 for proxy_key, fail in handler_tests[1]
1067 ], indirect=['handler'])
1068 def test_proxy_key(self, handler, proxy_key, fail):
1069 run_validation(handler, fail, Request('http://', proxies={proxy_key: 'http://example.com'}))
1070 run_validation(handler, fail, Request('http://'), proxies={proxy_key: 'http://example.com'})
1071
1072 @pytest.mark.parametrize('handler,req_scheme,scheme,fail', [
1073 (handler_tests[0], handler_tests[1], scheme, fail)
1074 for handler_tests in PROXY_SCHEME_TESTS
1075 for scheme, fail in handler_tests[2]
1076 ], indirect=['handler'])
1077 def test_proxy_scheme(self, handler, req_scheme, scheme, fail):
1078 run_validation(handler, fail, Request(f'{req_scheme}://', proxies={req_scheme: f'{scheme}://example.com'}))
1079 run_validation(handler, fail, Request(f'{req_scheme}://'), proxies={req_scheme: f'{scheme}://example.com'})
1080
1081 @pytest.mark.parametrize('handler', ['Urllib', HTTPSupportedRH, 'Requests'], indirect=True)
1082 def test_empty_proxy(self, handler):
1083 run_validation(handler, False, Request('http://', proxies={'http': None}))
1084 run_validation(handler, False, Request('http://'), proxies={'http': None})
1085
1086 @pytest.mark.parametrize('proxy_url', ['//example.com', 'example.com', '127.0.0.1', '/a/b/c'])
1087 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
1088 def test_invalid_proxy_url(self, handler, proxy_url):
1089 run_validation(handler, UnsupportedRequest, Request('http://', proxies={'http': proxy_url}))
1090
1091 @pytest.mark.parametrize('handler,scheme,extensions,fail', [
1092 (handler_tests[0], handler_tests[1], extensions, fail)
1093 for handler_tests in EXTENSION_TESTS
1094 for extensions, fail in handler_tests[2]
1095 ], indirect=['handler'])
1096 def test_extension(self, handler, scheme, extensions, fail):
1097 run_validation(
1098 handler, fail, Request(f'{scheme}://', extensions=extensions))
1099
1100 def test_invalid_request_type(self):
1101 rh = self.ValidationRH(logger=FakeLogger())
1102 for method in (rh.validate, rh.send):
1103 with pytest.raises(TypeError, match='Expected an instance of Request'):
1104 method('not a request')
1105
1106
1107 class FakeResponse(Response):
1108 def __init__(self, request):
1109 # XXX: we could make request part of standard response interface
1110 self.request = request
1111 super().__init__(fp=io.BytesIO(b''), headers={}, url=request.url)
1112
1113
1114 class FakeRH(RequestHandler):
1115
1116 def _validate(self, request):
1117 return
1118
1119 def _send(self, request: Request):
1120 if request.url.startswith('ssl://'):
1121 raise SSLError(request.url[len('ssl://'):])
1122 return FakeResponse(request)
1123
1124
1125 class FakeRHYDL(FakeYDL):
1126 def __init__(self, *args, **kwargs):
1127 super().__init__(*args, **kwargs)
1128 self._request_director = self.build_request_director([FakeRH])
1129
1130
1131 class AllUnsupportedRHYDL(FakeYDL):
1132
1133 def __init__(self, *args, **kwargs):
1134
1135 class UnsupportedRH(RequestHandler):
1136 def _send(self, request: Request):
1137 pass
1138
1139 _SUPPORTED_FEATURES = ()
1140 _SUPPORTED_PROXY_SCHEMES = ()
1141 _SUPPORTED_URL_SCHEMES = ()
1142
1143 super().__init__(*args, **kwargs)
1144 self._request_director = self.build_request_director([UnsupportedRH])
1145
1146
1147 class TestRequestDirector:
1148
1149 def test_handler_operations(self):
1150 director = RequestDirector(logger=FakeLogger())
1151 handler = FakeRH(logger=FakeLogger())
1152 director.add_handler(handler)
1153 assert director.handlers.get(FakeRH.RH_KEY) is handler
1154
1155 # Handler should overwrite
1156 handler2 = FakeRH(logger=FakeLogger())
1157 director.add_handler(handler2)
1158 assert director.handlers.get(FakeRH.RH_KEY) is not handler
1159 assert director.handlers.get(FakeRH.RH_KEY) is handler2
1160 assert len(director.handlers) == 1
1161
1162 class AnotherFakeRH(FakeRH):
1163 pass
1164 director.add_handler(AnotherFakeRH(logger=FakeLogger()))
1165 assert len(director.handlers) == 2
1166 assert director.handlers.get(AnotherFakeRH.RH_KEY).RH_KEY == AnotherFakeRH.RH_KEY
1167
1168 director.handlers.pop(FakeRH.RH_KEY, None)
1169 assert director.handlers.get(FakeRH.RH_KEY) is None
1170 assert len(director.handlers) == 1
1171
1172 # RequestErrors should passthrough
1173 with pytest.raises(SSLError):
1174 director.send(Request('ssl://something'))
1175
1176 def test_send(self):
1177 director = RequestDirector(logger=FakeLogger())
1178 with pytest.raises(RequestError):
1179 director.send(Request('any://'))
1180 director.add_handler(FakeRH(logger=FakeLogger()))
1181 assert isinstance(director.send(Request('http://')), FakeResponse)
1182
1183 def test_unsupported_handlers(self):
1184 class SupportedRH(RequestHandler):
1185 _SUPPORTED_URL_SCHEMES = ['http']
1186
1187 def _send(self, request: Request):
1188 return Response(fp=io.BytesIO(b'supported'), headers={}, url=request.url)
1189
1190 director = RequestDirector(logger=FakeLogger())
1191 director.add_handler(SupportedRH(logger=FakeLogger()))
1192 director.add_handler(FakeRH(logger=FakeLogger()))
1193
1194 # First should take preference
1195 assert director.send(Request('http://')).read() == b'supported'
1196 assert director.send(Request('any://')).read() == b''
1197
1198 director.handlers.pop(FakeRH.RH_KEY)
1199 with pytest.raises(NoSupportingHandlers):
1200 director.send(Request('any://'))
1201
1202 def test_unexpected_error(self):
1203 director = RequestDirector(logger=FakeLogger())
1204
1205 class UnexpectedRH(FakeRH):
1206 def _send(self, request: Request):
1207 raise TypeError('something')
1208
1209 director.add_handler(UnexpectedRH(logger=FakeLogger))
1210 with pytest.raises(NoSupportingHandlers, match=r'1 unexpected error'):
1211 director.send(Request('any://'))
1212
1213 director.handlers.clear()
1214 assert len(director.handlers) == 0
1215
1216 # Should not be fatal
1217 director.add_handler(FakeRH(logger=FakeLogger()))
1218 director.add_handler(UnexpectedRH(logger=FakeLogger))
1219 assert director.send(Request('any://'))
1220
1221 def test_preference(self):
1222 director = RequestDirector(logger=FakeLogger())
1223 director.add_handler(FakeRH(logger=FakeLogger()))
1224
1225 class SomeRH(RequestHandler):
1226 _SUPPORTED_URL_SCHEMES = ['http']
1227
1228 def _send(self, request: Request):
1229 return Response(fp=io.BytesIO(b'supported'), headers={}, url=request.url)
1230
1231 def some_preference(rh, request):
1232 return (0 if not isinstance(rh, SomeRH)
1233 else 100 if 'prefer' in request.headers
1234 else -1)
1235
1236 director.add_handler(SomeRH(logger=FakeLogger()))
1237 director.preferences.add(some_preference)
1238
1239 assert director.send(Request('http://')).read() == b''
1240 assert director.send(Request('http://', headers={'prefer': '1'})).read() == b'supported'
1241
1242 def test_close(self, monkeypatch):
1243 director = RequestDirector(logger=FakeLogger())
1244 director.add_handler(FakeRH(logger=FakeLogger()))
1245 called = False
1246
1247 def mock_close(*args, **kwargs):
1248 nonlocal called
1249 called = True
1250
1251 monkeypatch.setattr(director.handlers[FakeRH.RH_KEY], 'close', mock_close)
1252 director.close()
1253 assert called
1254
1255
1256 # XXX: do we want to move this to test_YoutubeDL.py?
1257 class TestYoutubeDLNetworking:
1258
1259 @staticmethod
1260 def build_handler(ydl, handler: RequestHandler = FakeRH):
1261 return ydl.build_request_director([handler]).handlers.get(handler.RH_KEY)
1262
1263 def test_compat_opener(self):
1264 with FakeYDL() as ydl:
1265 with warnings.catch_warnings():
1266 warnings.simplefilter('ignore', category=DeprecationWarning)
1267 assert isinstance(ydl._opener, urllib.request.OpenerDirector)
1268
1269 @pytest.mark.parametrize('proxy,expected', [
1270 ('http://127.0.0.1:8080', {'all': 'http://127.0.0.1:8080'}),
1271 ('', {'all': '__noproxy__'}),
1272 (None, {'http': 'http://127.0.0.1:8081', 'https': 'http://127.0.0.1:8081'}) # env, set https
1273 ])
1274 def test_proxy(self, proxy, expected):
1275 old_http_proxy = os.environ.get('HTTP_PROXY')
1276 try:
1277 os.environ['HTTP_PROXY'] = 'http://127.0.0.1:8081' # ensure that provided proxies override env
1278 with FakeYDL({'proxy': proxy}) as ydl:
1279 assert ydl.proxies == expected
1280 finally:
1281 if old_http_proxy:
1282 os.environ['HTTP_PROXY'] = old_http_proxy
1283
1284 def test_compat_request(self):
1285 with FakeRHYDL() as ydl:
1286 assert ydl.urlopen('test://')
1287 urllib_req = urllib.request.Request('http://foo.bar', data=b'test', method='PUT', headers={'X-Test': '1'})
1288 urllib_req.add_unredirected_header('Cookie', 'bob=bob')
1289 urllib_req.timeout = 2
1290 with warnings.catch_warnings():
1291 warnings.simplefilter('ignore', category=DeprecationWarning)
1292 req = ydl.urlopen(urllib_req).request
1293 assert req.url == urllib_req.get_full_url()
1294 assert req.data == urllib_req.data
1295 assert req.method == urllib_req.get_method()
1296 assert 'X-Test' in req.headers
1297 assert 'Cookie' in req.headers
1298 assert req.extensions.get('timeout') == 2
1299
1300 with pytest.raises(AssertionError):
1301 ydl.urlopen(None)
1302
1303 def test_extract_basic_auth(self):
1304 with FakeRHYDL() as ydl:
1305 res = ydl.urlopen(Request('http://user:pass@foo.bar'))
1306 assert res.request.headers['Authorization'] == 'Basic dXNlcjpwYXNz'
1307
1308 def test_sanitize_url(self):
1309 with FakeRHYDL() as ydl:
1310 res = ydl.urlopen(Request('httpss://foo.bar'))
1311 assert res.request.url == 'https://foo.bar'
1312
1313 def test_file_urls_error(self):
1314 # use urllib handler
1315 with FakeYDL() as ydl:
1316 with pytest.raises(RequestError, match=r'file:// URLs are disabled by default'):
1317 ydl.urlopen('file://')
1318
1319 @pytest.mark.parametrize('scheme', (['ws', 'wss']))
1320 def test_websocket_unavailable_error(self, scheme):
1321 with AllUnsupportedRHYDL() as ydl:
1322 with pytest.raises(RequestError, match=r'This request requires WebSocket support'):
1323 ydl.urlopen(f'{scheme}://')
1324
1325 def test_legacy_server_connect_error(self):
1326 with FakeRHYDL() as ydl:
1327 for error in ('UNSAFE_LEGACY_RENEGOTIATION_DISABLED', 'SSLV3_ALERT_HANDSHAKE_FAILURE'):
1328 with pytest.raises(RequestError, match=r'Try using --legacy-server-connect'):
1329 ydl.urlopen(f'ssl://{error}')
1330
1331 with pytest.raises(SSLError, match='testerror'):
1332 ydl.urlopen('ssl://testerror')
1333
1334 @pytest.mark.parametrize('proxy_key,proxy_url,expected', [
1335 ('http', '__noproxy__', None),
1336 ('no', '127.0.0.1,foo.bar', '127.0.0.1,foo.bar'),
1337 ('https', 'example.com', 'http://example.com'),
1338 ('https', '//example.com', 'http://example.com'),
1339 ('https', 'socks5://example.com', 'socks5h://example.com'),
1340 ('http', 'socks://example.com', 'socks4://example.com'),
1341 ('http', 'socks4://example.com', 'socks4://example.com'),
1342 ('unrelated', '/bad/proxy', '/bad/proxy'), # clean_proxies should ignore bad proxies
1343 ])
1344 def test_clean_proxy(self, proxy_key, proxy_url, expected):
1345 # proxies should be cleaned in urlopen()
1346 with FakeRHYDL() as ydl:
1347 req = ydl.urlopen(Request('test://', proxies={proxy_key: proxy_url})).request
1348 assert req.proxies[proxy_key] == expected
1349
1350 # and should also be cleaned when building the handler
1351 env_key = f'{proxy_key.upper()}_PROXY'
1352 old_env_proxy = os.environ.get(env_key)
1353 try:
1354 os.environ[env_key] = proxy_url # ensure that provided proxies override env
1355 with FakeYDL() as ydl:
1356 rh = self.build_handler(ydl)
1357 assert rh.proxies[proxy_key] == expected
1358 finally:
1359 if old_env_proxy:
1360 os.environ[env_key] = old_env_proxy
1361
1362 def test_clean_proxy_header(self):
1363 with FakeRHYDL() as ydl:
1364 req = ydl.urlopen(Request('test://', headers={'ytdl-request-proxy': '//foo.bar'})).request
1365 assert 'ytdl-request-proxy' not in req.headers
1366 assert req.proxies == {'all': 'http://foo.bar'}
1367
1368 with FakeYDL({'http_headers': {'ytdl-request-proxy': '//foo.bar'}}) as ydl:
1369 rh = self.build_handler(ydl)
1370 assert 'ytdl-request-proxy' not in rh.headers
1371 assert rh.proxies == {'all': 'http://foo.bar'}
1372
1373 def test_clean_header(self):
1374 with FakeRHYDL() as ydl:
1375 res = ydl.urlopen(Request('test://', headers={'Youtubedl-no-compression': True}))
1376 assert 'Youtubedl-no-compression' not in res.request.headers
1377 assert res.request.headers.get('Accept-Encoding') == 'identity'
1378
1379 with FakeYDL({'http_headers': {'Youtubedl-no-compression': True}}) as ydl:
1380 rh = self.build_handler(ydl)
1381 assert 'Youtubedl-no-compression' not in rh.headers
1382 assert rh.headers.get('Accept-Encoding') == 'identity'
1383
1384 with FakeYDL({'http_headers': {'Ytdl-socks-proxy': 'socks://localhost:1080'}}) as ydl:
1385 rh = self.build_handler(ydl)
1386 assert 'Ytdl-socks-proxy' not in rh.headers
1387
1388 def test_build_handler_params(self):
1389 with FakeYDL({
1390 'http_headers': {'test': 'testtest'},
1391 'socket_timeout': 2,
1392 'proxy': 'http://127.0.0.1:8080',
1393 'source_address': '127.0.0.45',
1394 'debug_printtraffic': True,
1395 'compat_opts': ['no-certifi'],
1396 'nocheckcertificate': True,
1397 'legacyserverconnect': True,
1398 }) as ydl:
1399 rh = self.build_handler(ydl)
1400 assert rh.headers.get('test') == 'testtest'
1401 assert 'Accept' in rh.headers # ensure std_headers are still there
1402 assert rh.timeout == 2
1403 assert rh.proxies.get('all') == 'http://127.0.0.1:8080'
1404 assert rh.source_address == '127.0.0.45'
1405 assert rh.verbose is True
1406 assert rh.prefer_system_certs is True
1407 assert rh.verify is False
1408 assert rh.legacy_ssl_support is True
1409
1410 @pytest.mark.parametrize('ydl_params', [
1411 {'client_certificate': 'fakecert.crt'},
1412 {'client_certificate': 'fakecert.crt', 'client_certificate_key': 'fakekey.key'},
1413 {'client_certificate': 'fakecert.crt', 'client_certificate_key': 'fakekey.key', 'client_certificate_password': 'foobar'},
1414 {'client_certificate_key': 'fakekey.key', 'client_certificate_password': 'foobar'},
1415 ])
1416 def test_client_certificate(self, ydl_params):
1417 with FakeYDL(ydl_params) as ydl:
1418 rh = self.build_handler(ydl)
1419 assert rh._client_cert == ydl_params # XXX: Too bound to implementation
1420
1421 def test_urllib_file_urls(self):
1422 with FakeYDL({'enable_file_urls': False}) as ydl:
1423 rh = self.build_handler(ydl, UrllibRH)
1424 assert rh.enable_file_urls is False
1425
1426 with FakeYDL({'enable_file_urls': True}) as ydl:
1427 rh = self.build_handler(ydl, UrllibRH)
1428 assert rh.enable_file_urls is True
1429
1430 def test_compat_opt_prefer_urllib(self):
1431 # This assumes urllib only has a preference when this compat opt is given
1432 with FakeYDL({'compat_opts': ['prefer-legacy-http-handler']}) as ydl:
1433 director = ydl.build_request_director([UrllibRH])
1434 assert len(director.preferences) == 1
1435 assert director.preferences.pop()(UrllibRH, None)
1436
1437
1438 class TestRequest:
1439
1440 def test_query(self):
1441 req = Request('http://example.com?q=something', query={'v': 'xyz'})
1442 assert req.url == 'http://example.com?q=something&v=xyz'
1443
1444 req.update(query={'v': '123'})
1445 assert req.url == 'http://example.com?q=something&v=123'
1446 req.update(url='http://example.com', query={'v': 'xyz'})
1447 assert req.url == 'http://example.com?v=xyz'
1448
1449 def test_method(self):
1450 req = Request('http://example.com')
1451 assert req.method == 'GET'
1452 req.data = b'test'
1453 assert req.method == 'POST'
1454 req.data = None
1455 assert req.method == 'GET'
1456 req.data = b'test2'
1457 req.method = 'PUT'
1458 assert req.method == 'PUT'
1459 req.data = None
1460 assert req.method == 'PUT'
1461 with pytest.raises(TypeError):
1462 req.method = 1
1463
1464 def test_request_helpers(self):
1465 assert HEADRequest('http://example.com').method == 'HEAD'
1466 assert PUTRequest('http://example.com').method == 'PUT'
1467
1468 def test_headers(self):
1469 req = Request('http://example.com', headers={'tesT': 'test'})
1470 assert req.headers == HTTPHeaderDict({'test': 'test'})
1471 req.update(headers={'teSt2': 'test2'})
1472 assert req.headers == HTTPHeaderDict({'test': 'test', 'test2': 'test2'})
1473
1474 req.headers = new_headers = HTTPHeaderDict({'test': 'test'})
1475 assert req.headers == HTTPHeaderDict({'test': 'test'})
1476 assert req.headers is new_headers
1477
1478 # test converts dict to case insensitive dict
1479 req.headers = new_headers = {'test2': 'test2'}
1480 assert isinstance(req.headers, HTTPHeaderDict)
1481 assert req.headers is not new_headers
1482
1483 with pytest.raises(TypeError):
1484 req.headers = None
1485
1486 def test_data_type(self):
1487 req = Request('http://example.com')
1488 assert req.data is None
1489 # test bytes is allowed
1490 req.data = b'test'
1491 assert req.data == b'test'
1492 # test iterable of bytes is allowed
1493 i = [b'test', b'test2']
1494 req.data = i
1495 assert req.data == i
1496
1497 # test file-like object is allowed
1498 f = io.BytesIO(b'test')
1499 req.data = f
1500 assert req.data == f
1501
1502 # common mistake: test str not allowed
1503 with pytest.raises(TypeError):
1504 req.data = 'test'
1505 assert req.data != 'test'
1506
1507 # common mistake: test dict is not allowed
1508 with pytest.raises(TypeError):
1509 req.data = {'test': 'test'}
1510 assert req.data != {'test': 'test'}
1511
1512 def test_content_length_header(self):
1513 req = Request('http://example.com', headers={'Content-Length': '0'}, data=b'')
1514 assert req.headers.get('Content-Length') == '0'
1515
1516 req.data = b'test'
1517 assert 'Content-Length' not in req.headers
1518
1519 req = Request('http://example.com', headers={'Content-Length': '10'})
1520 assert 'Content-Length' not in req.headers
1521
1522 def test_content_type_header(self):
1523 req = Request('http://example.com', headers={'Content-Type': 'test'}, data=b'test')
1524 assert req.headers.get('Content-Type') == 'test'
1525 req.data = b'test2'
1526 assert req.headers.get('Content-Type') == 'test'
1527 req.data = None
1528 assert 'Content-Type' not in req.headers
1529 req.data = b'test3'
1530 assert req.headers.get('Content-Type') == 'application/x-www-form-urlencoded'
1531
1532 def test_update_req(self):
1533 req = Request('http://example.com')
1534 assert req.data is None
1535 assert req.method == 'GET'
1536 assert 'Content-Type' not in req.headers
1537 # Test that zero-byte payloads will be sent
1538 req.update(data=b'')
1539 assert req.data == b''
1540 assert req.method == 'POST'
1541 assert req.headers.get('Content-Type') == 'application/x-www-form-urlencoded'
1542
1543 def test_proxies(self):
1544 req = Request(url='http://example.com', proxies={'http': 'http://127.0.0.1:8080'})
1545 assert req.proxies == {'http': 'http://127.0.0.1:8080'}
1546
1547 def test_extensions(self):
1548 req = Request(url='http://example.com', extensions={'timeout': 2})
1549 assert req.extensions == {'timeout': 2}
1550
1551 def test_copy(self):
1552 req = Request(
1553 url='http://example.com',
1554 extensions={'cookiejar': CookieJar()},
1555 headers={'Accept-Encoding': 'br'},
1556 proxies={'http': 'http://127.0.0.1'},
1557 data=[b'123']
1558 )
1559 req_copy = req.copy()
1560 assert req_copy is not req
1561 assert req_copy.url == req.url
1562 assert req_copy.headers == req.headers
1563 assert req_copy.headers is not req.headers
1564 assert req_copy.proxies == req.proxies
1565 assert req_copy.proxies is not req.proxies
1566
1567 # Data is not able to be copied
1568 assert req_copy.data == req.data
1569 assert req_copy.data is req.data
1570
1571 # Shallow copy extensions
1572 assert req_copy.extensions is not req.extensions
1573 assert req_copy.extensions['cookiejar'] == req.extensions['cookiejar']
1574
1575 # Subclasses are copied by default
1576 class AnotherRequest(Request):
1577 pass
1578
1579 req = AnotherRequest(url='http://127.0.0.1')
1580 assert isinstance(req.copy(), AnotherRequest)
1581
1582 def test_url(self):
1583 req = Request(url='https://фtest.example.com/ some spaceв?ä=c',)
1584 assert req.url == 'https://xn--test-z6d.example.com/%20some%20space%D0%B2?%C3%A4=c'
1585
1586 assert Request(url='//example.com').url == 'http://example.com'
1587
1588 with pytest.raises(TypeError):
1589 Request(url='https://').url = None
1590
1591
1592 class TestResponse:
1593
1594 @pytest.mark.parametrize('reason,status,expected', [
1595 ('custom', 200, 'custom'),
1596 (None, 404, 'Not Found'), # fallback status
1597 ('', 403, 'Forbidden'),
1598 (None, 999, None)
1599 ])
1600 def test_reason(self, reason, status, expected):
1601 res = Response(io.BytesIO(b''), url='test://', headers={}, status=status, reason=reason)
1602 assert res.reason == expected
1603
1604 def test_headers(self):
1605 headers = Message()
1606 headers.add_header('Test', 'test')
1607 headers.add_header('Test', 'test2')
1608 headers.add_header('content-encoding', 'br')
1609 res = Response(io.BytesIO(b''), headers=headers, url='test://')
1610 assert res.headers.get_all('test') == ['test', 'test2']
1611 assert 'Content-Encoding' in res.headers
1612
1613 def test_get_header(self):
1614 headers = Message()
1615 headers.add_header('Set-Cookie', 'cookie1')
1616 headers.add_header('Set-cookie', 'cookie2')
1617 headers.add_header('Test', 'test')
1618 headers.add_header('Test', 'test2')
1619 res = Response(io.BytesIO(b''), headers=headers, url='test://')
1620 assert res.get_header('test') == 'test, test2'
1621 assert res.get_header('set-Cookie') == 'cookie1'
1622 assert res.get_header('notexist', 'default') == 'default'
1623
1624 def test_compat(self):
1625 res = Response(io.BytesIO(b''), url='test://', status=404, headers={'test': 'test'})
1626 with warnings.catch_warnings():
1627 warnings.simplefilter('ignore', category=DeprecationWarning)
1628 assert res.code == res.getcode() == res.status
1629 assert res.geturl() == res.url
1630 assert res.info() is res.headers
1631 assert res.getheader('test') == res.get_header('test')