]> jfr.im git - yt-dlp.git/blob - test/test_networking.py
[rh:requests] Apply `remove_dot_segments` to absolute redirect locations
[yt-dlp.git] / test / test_networking.py
1 #!/usr/bin/env python3
2
3 # Allow direct execution
4 import os
5 import sys
6
7 import pytest
8
9 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
10
11 import gzip
12 import http.client
13 import http.cookiejar
14 import http.server
15 import io
16 import pathlib
17 import random
18 import ssl
19 import tempfile
20 import threading
21 import time
22 import urllib.error
23 import urllib.request
24 import warnings
25 import zlib
26 from email.message import Message
27 from http.cookiejar import CookieJar
28
29 from test.helper import FakeYDL, http_server_port, verify_address_availability
30 from yt_dlp.cookies import YoutubeDLCookieJar
31 from yt_dlp.dependencies import brotli, requests, urllib3
32 from yt_dlp.networking import (
33 HEADRequest,
34 PUTRequest,
35 Request,
36 RequestDirector,
37 RequestHandler,
38 Response,
39 )
40 from yt_dlp.networking._urllib import UrllibRH
41 from yt_dlp.networking.exceptions import (
42 CertificateVerifyError,
43 HTTPError,
44 IncompleteRead,
45 NoSupportingHandlers,
46 ProxyError,
47 RequestError,
48 SSLError,
49 TransportError,
50 UnsupportedRequest,
51 )
52 from yt_dlp.utils._utils import _YDLLogger as FakeLogger
53 from yt_dlp.utils.networking import HTTPHeaderDict
54
55 from test.conftest import validate_and_send
56
57 TEST_DIR = os.path.dirname(os.path.abspath(__file__))
58
59
60 def _build_proxy_handler(name):
61 class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
62 proxy_name = name
63
64 def log_message(self, format, *args):
65 pass
66
67 def do_GET(self):
68 self.send_response(200)
69 self.send_header('Content-Type', 'text/plain; charset=utf-8')
70 self.end_headers()
71 self.wfile.write('{self.proxy_name}: {self.path}'.format(self=self).encode())
72 return HTTPTestRequestHandler
73
74
75 class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
76 protocol_version = 'HTTP/1.1'
77
78 def log_message(self, format, *args):
79 pass
80
81 def _headers(self):
82 payload = str(self.headers).encode()
83 self.send_response(200)
84 self.send_header('Content-Type', 'application/json')
85 self.send_header('Content-Length', str(len(payload)))
86 self.end_headers()
87 self.wfile.write(payload)
88
89 def _redirect(self):
90 self.send_response(int(self.path[len('/redirect_'):]))
91 self.send_header('Location', '/method')
92 self.send_header('Content-Length', '0')
93 self.end_headers()
94
95 def _method(self, method, payload=None):
96 self.send_response(200)
97 self.send_header('Content-Length', str(len(payload or '')))
98 self.send_header('Method', method)
99 self.end_headers()
100 if payload:
101 self.wfile.write(payload)
102
103 def _status(self, status):
104 payload = f'<html>{status} NOT FOUND</html>'.encode()
105 self.send_response(int(status))
106 self.send_header('Content-Type', 'text/html; charset=utf-8')
107 self.send_header('Content-Length', str(len(payload)))
108 self.end_headers()
109 self.wfile.write(payload)
110
111 def _read_data(self):
112 if 'Content-Length' in self.headers:
113 return self.rfile.read(int(self.headers['Content-Length']))
114
115 def do_POST(self):
116 data = self._read_data() + str(self.headers).encode()
117 if self.path.startswith('/redirect_'):
118 self._redirect()
119 elif self.path.startswith('/method'):
120 self._method('POST', data)
121 elif self.path.startswith('/headers'):
122 self._headers()
123 else:
124 self._status(404)
125
126 def do_HEAD(self):
127 if self.path.startswith('/redirect_'):
128 self._redirect()
129 elif self.path.startswith('/method'):
130 self._method('HEAD')
131 else:
132 self._status(404)
133
134 def do_PUT(self):
135 data = self._read_data() + str(self.headers).encode()
136 if self.path.startswith('/redirect_'):
137 self._redirect()
138 elif self.path.startswith('/method'):
139 self._method('PUT', data)
140 else:
141 self._status(404)
142
143 def do_GET(self):
144 if self.path == '/video.html':
145 payload = b'<html><video src="/vid.mp4" /></html>'
146 self.send_response(200)
147 self.send_header('Content-Type', 'text/html; charset=utf-8')
148 self.send_header('Content-Length', str(len(payload)))
149 self.end_headers()
150 self.wfile.write(payload)
151 elif self.path == '/vid.mp4':
152 payload = b'\x00\x00\x00\x00\x20\x66\x74[video]'
153 self.send_response(200)
154 self.send_header('Content-Type', 'video/mp4')
155 self.send_header('Content-Length', str(len(payload)))
156 self.end_headers()
157 self.wfile.write(payload)
158 elif self.path == '/%E4%B8%AD%E6%96%87.html':
159 payload = b'<html><video src="/vid.mp4" /></html>'
160 self.send_response(200)
161 self.send_header('Content-Type', 'text/html; charset=utf-8')
162 self.send_header('Content-Length', str(len(payload)))
163 self.end_headers()
164 self.wfile.write(payload)
165 elif self.path == '/%c7%9f':
166 payload = b'<html><video src="/vid.mp4" /></html>'
167 self.send_response(200)
168 self.send_header('Content-Type', 'text/html; charset=utf-8')
169 self.send_header('Content-Length', str(len(payload)))
170 self.end_headers()
171 self.wfile.write(payload)
172 elif self.path.startswith('/redirect_loop'):
173 self.send_response(301)
174 self.send_header('Location', self.path)
175 self.send_header('Content-Length', '0')
176 self.end_headers()
177 elif self.path == '/redirect_dotsegments':
178 self.send_response(301)
179 # redirect to /headers but with dot segments before
180 self.send_header('Location', '/a/b/./../../headers')
181 self.send_header('Content-Length', '0')
182 self.end_headers()
183 elif self.path == '/redirect_dotsegments_absolute':
184 self.send_response(301)
185 # redirect to /headers but with dot segments before - absolute url
186 self.send_header('Location', f'http://127.0.0.1:{http_server_port(self.server)}/a/b/./../../headers')
187 self.send_header('Content-Length', '0')
188 self.end_headers()
189 elif self.path.startswith('/redirect_'):
190 self._redirect()
191 elif self.path.startswith('/method'):
192 self._method('GET', str(self.headers).encode())
193 elif self.path.startswith('/headers'):
194 self._headers()
195 elif self.path.startswith('/308-to-headers'):
196 self.send_response(308)
197 self.send_header('Location', '/headers')
198 self.send_header('Content-Length', '0')
199 self.end_headers()
200 elif self.path == '/trailing_garbage':
201 payload = b'<html><video src="/vid.mp4" /></html>'
202 self.send_response(200)
203 self.send_header('Content-Type', 'text/html; charset=utf-8')
204 self.send_header('Content-Encoding', 'gzip')
205 buf = io.BytesIO()
206 with gzip.GzipFile(fileobj=buf, mode='wb') as f:
207 f.write(payload)
208 compressed = buf.getvalue() + b'trailing garbage'
209 self.send_header('Content-Length', str(len(compressed)))
210 self.end_headers()
211 self.wfile.write(compressed)
212 elif self.path == '/302-non-ascii-redirect':
213 new_url = f'http://127.0.0.1:{http_server_port(self.server)}/中文.html'
214 self.send_response(301)
215 self.send_header('Location', new_url)
216 self.send_header('Content-Length', '0')
217 self.end_headers()
218 elif self.path == '/content-encoding':
219 encodings = self.headers.get('ytdl-encoding', '')
220 payload = b'<html><video src="/vid.mp4" /></html>'
221 for encoding in filter(None, (e.strip() for e in encodings.split(','))):
222 if encoding == 'br' and brotli:
223 payload = brotli.compress(payload)
224 elif encoding == 'gzip':
225 buf = io.BytesIO()
226 with gzip.GzipFile(fileobj=buf, mode='wb') as f:
227 f.write(payload)
228 payload = buf.getvalue()
229 elif encoding == 'deflate':
230 payload = zlib.compress(payload)
231 elif encoding == 'unsupported':
232 payload = b'raw'
233 break
234 else:
235 self._status(415)
236 return
237 self.send_response(200)
238 self.send_header('Content-Encoding', encodings)
239 self.send_header('Content-Length', str(len(payload)))
240 self.end_headers()
241 self.wfile.write(payload)
242 elif self.path.startswith('/gen_'):
243 payload = b'<html></html>'
244 self.send_response(int(self.path[len('/gen_'):]))
245 self.send_header('Content-Type', 'text/html; charset=utf-8')
246 self.send_header('Content-Length', str(len(payload)))
247 self.end_headers()
248 self.wfile.write(payload)
249 elif self.path.startswith('/incompleteread'):
250 payload = b'<html></html>'
251 self.send_response(200)
252 self.send_header('Content-Type', 'text/html; charset=utf-8')
253 self.send_header('Content-Length', '234234')
254 self.end_headers()
255 self.wfile.write(payload)
256 self.finish()
257 elif self.path.startswith('/timeout_'):
258 time.sleep(int(self.path[len('/timeout_'):]))
259 self._headers()
260 elif self.path == '/source_address':
261 payload = str(self.client_address[0]).encode()
262 self.send_response(200)
263 self.send_header('Content-Type', 'text/html; charset=utf-8')
264 self.send_header('Content-Length', str(len(payload)))
265 self.end_headers()
266 self.wfile.write(payload)
267 self.finish()
268 else:
269 self._status(404)
270
271 def send_header(self, keyword, value):
272 """
273 Forcibly allow HTTP server to send non percent-encoded non-ASCII characters in headers.
274 This is against what is defined in RFC 3986, however we need to test we support this
275 since some sites incorrectly do this.
276 """
277 if keyword.lower() == 'connection':
278 return super().send_header(keyword, value)
279
280 if not hasattr(self, '_headers_buffer'):
281 self._headers_buffer = []
282
283 self._headers_buffer.append(f'{keyword}: {value}\r\n'.encode())
284
285
286 class TestRequestHandlerBase:
287 @classmethod
288 def setup_class(cls):
289 cls.http_httpd = http.server.ThreadingHTTPServer(
290 ('127.0.0.1', 0), HTTPTestRequestHandler)
291 cls.http_port = http_server_port(cls.http_httpd)
292 cls.http_server_thread = threading.Thread(target=cls.http_httpd.serve_forever)
293 # FIXME: we should probably stop the http server thread after each test
294 # See: https://github.com/yt-dlp/yt-dlp/pull/7094#discussion_r1199746041
295 cls.http_server_thread.daemon = True
296 cls.http_server_thread.start()
297
298 # HTTPS server
299 certfn = os.path.join(TEST_DIR, 'testcert.pem')
300 cls.https_httpd = http.server.ThreadingHTTPServer(
301 ('127.0.0.1', 0), HTTPTestRequestHandler)
302 sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
303 sslctx.load_cert_chain(certfn, None)
304 cls.https_httpd.socket = sslctx.wrap_socket(cls.https_httpd.socket, server_side=True)
305 cls.https_port = http_server_port(cls.https_httpd)
306 cls.https_server_thread = threading.Thread(target=cls.https_httpd.serve_forever)
307 cls.https_server_thread.daemon = True
308 cls.https_server_thread.start()
309
310
311 class TestHTTPRequestHandler(TestRequestHandlerBase):
312 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
313 def test_verify_cert(self, handler):
314 with handler() as rh:
315 with pytest.raises(CertificateVerifyError):
316 validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
317
318 with handler(verify=False) as rh:
319 r = validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
320 assert r.status == 200
321 r.close()
322
323 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
324 def test_ssl_error(self, handler):
325 # HTTPS server with too old TLS version
326 # XXX: is there a better way to test this than to create a new server?
327 https_httpd = http.server.ThreadingHTTPServer(
328 ('127.0.0.1', 0), HTTPTestRequestHandler)
329 sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
330 https_httpd.socket = sslctx.wrap_socket(https_httpd.socket, server_side=True)
331 https_port = http_server_port(https_httpd)
332 https_server_thread = threading.Thread(target=https_httpd.serve_forever)
333 https_server_thread.daemon = True
334 https_server_thread.start()
335
336 with handler(verify=False) as rh:
337 with pytest.raises(SSLError, match=r'ssl(?:v3|/tls) alert handshake failure') as exc_info:
338 validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers'))
339 assert not issubclass(exc_info.type, CertificateVerifyError)
340
341 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
342 def test_percent_encode(self, handler):
343 with handler() as rh:
344 # Unicode characters should be encoded with uppercase percent-encoding
345 res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/中文.html'))
346 assert res.status == 200
347 res.close()
348 # don't normalize existing percent encodings
349 res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/%c7%9f'))
350 assert res.status == 200
351 res.close()
352
353 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
354 @pytest.mark.parametrize('path', [
355 '/a/b/./../../headers',
356 '/redirect_dotsegments',
357 # https://github.com/yt-dlp/yt-dlp/issues/9020
358 '/redirect_dotsegments_absolute',
359 ])
360 def test_remove_dot_segments(self, handler, path):
361 with handler(verbose=True) as rh:
362 # This isn't a comprehensive test,
363 # but it should be enough to check whether the handler is removing dot segments in required scenarios
364 res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}{path}'))
365 assert res.status == 200
366 assert res.url == f'http://127.0.0.1:{self.http_port}/headers'
367 res.close()
368
369 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
370 def test_unicode_path_redirection(self, handler):
371 with handler() as rh:
372 r = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/302-non-ascii-redirect'))
373 assert r.url == f'http://127.0.0.1:{self.http_port}/%E4%B8%AD%E6%96%87.html'
374 r.close()
375
376 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
377 def test_raise_http_error(self, handler):
378 with handler() as rh:
379 for bad_status in (400, 500, 599, 302):
380 with pytest.raises(HTTPError):
381 validate_and_send(rh, Request('http://127.0.0.1:%d/gen_%d' % (self.http_port, bad_status)))
382
383 # Should not raise an error
384 validate_and_send(rh, Request('http://127.0.0.1:%d/gen_200' % self.http_port)).close()
385
386 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
387 def test_response_url(self, handler):
388 with handler() as rh:
389 # Response url should be that of the last url in redirect chain
390 res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_301'))
391 assert res.url == f'http://127.0.0.1:{self.http_port}/method'
392 res.close()
393 res2 = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_200'))
394 assert res2.url == f'http://127.0.0.1:{self.http_port}/gen_200'
395 res2.close()
396
397 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
398 def test_redirect(self, handler):
399 with handler() as rh:
400 def do_req(redirect_status, method, assert_no_content=False):
401 data = b'testdata' if method in ('POST', 'PUT') else None
402 res = validate_and_send(
403 rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_{redirect_status}', method=method, data=data))
404
405 headers = b''
406 data_sent = b''
407 if data is not None:
408 data_sent += res.read(len(data))
409 if data_sent != data:
410 headers += data_sent
411 data_sent = b''
412
413 headers += res.read()
414
415 if assert_no_content or data is None:
416 assert b'Content-Type' not in headers
417 assert b'Content-Length' not in headers
418 else:
419 assert b'Content-Type' in headers
420 assert b'Content-Length' in headers
421
422 return data_sent.decode(), res.headers.get('method', '')
423
424 # A 303 must either use GET or HEAD for subsequent request
425 assert do_req(303, 'POST', True) == ('', 'GET')
426 assert do_req(303, 'HEAD') == ('', 'HEAD')
427
428 assert do_req(303, 'PUT', True) == ('', 'GET')
429
430 # 301 and 302 turn POST only into a GET
431 assert do_req(301, 'POST', True) == ('', 'GET')
432 assert do_req(301, 'HEAD') == ('', 'HEAD')
433 assert do_req(302, 'POST', True) == ('', 'GET')
434 assert do_req(302, 'HEAD') == ('', 'HEAD')
435
436 assert do_req(301, 'PUT') == ('testdata', 'PUT')
437 assert do_req(302, 'PUT') == ('testdata', 'PUT')
438
439 # 307 and 308 should not change method
440 for m in ('POST', 'PUT'):
441 assert do_req(307, m) == ('testdata', m)
442 assert do_req(308, m) == ('testdata', m)
443
444 assert do_req(307, 'HEAD') == ('', 'HEAD')
445 assert do_req(308, 'HEAD') == ('', 'HEAD')
446
447 # These should not redirect and instead raise an HTTPError
448 for code in (300, 304, 305, 306):
449 with pytest.raises(HTTPError):
450 do_req(code, 'GET')
451
452 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
453 def test_request_cookie_header(self, handler):
454 # We should accept a Cookie header being passed as in normal headers and handle it appropriately.
455 with handler() as rh:
456 # Specified Cookie header should be used
457 res = validate_and_send(
458 rh, Request(
459 f'http://127.0.0.1:{self.http_port}/headers',
460 headers={'Cookie': 'test=test'})).read().decode()
461 assert 'Cookie: test=test' in res
462
463 # Specified Cookie header should be removed on any redirect
464 res = validate_and_send(
465 rh, Request(
466 f'http://127.0.0.1:{self.http_port}/308-to-headers',
467 headers={'Cookie': 'test=test'})).read().decode()
468 assert 'Cookie: test=test' not in res
469
470 # Specified Cookie header should override global cookiejar for that request
471 cookiejar = YoutubeDLCookieJar()
472 cookiejar.set_cookie(http.cookiejar.Cookie(
473 version=0, name='test', value='ytdlp', port=None, port_specified=False,
474 domain='127.0.0.1', domain_specified=True, domain_initial_dot=False, path='/',
475 path_specified=True, secure=False, expires=None, discard=False, comment=None,
476 comment_url=None, rest={}))
477
478 with handler(cookiejar=cookiejar) as rh:
479 data = validate_and_send(
480 rh, Request(f'http://127.0.0.1:{self.http_port}/headers', headers={'cookie': 'test=test'})).read()
481 assert b'Cookie: test=ytdlp' not in data
482 assert b'Cookie: test=test' in data
483
484 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
485 def test_redirect_loop(self, handler):
486 with handler() as rh:
487 with pytest.raises(HTTPError, match='redirect loop'):
488 validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_loop'))
489
490 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
491 def test_incompleteread(self, handler):
492 with handler(timeout=2) as rh:
493 with pytest.raises(IncompleteRead):
494 validate_and_send(rh, Request('http://127.0.0.1:%d/incompleteread' % self.http_port)).read()
495
496 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
497 def test_cookies(self, handler):
498 cookiejar = YoutubeDLCookieJar()
499 cookiejar.set_cookie(http.cookiejar.Cookie(
500 0, 'test', 'ytdlp', None, False, '127.0.0.1', True,
501 False, '/headers', True, False, None, False, None, None, {}))
502
503 with handler(cookiejar=cookiejar) as rh:
504 data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read()
505 assert b'Cookie: test=ytdlp' in data
506
507 # Per request
508 with handler() as rh:
509 data = validate_and_send(
510 rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions={'cookiejar': cookiejar})).read()
511 assert b'Cookie: test=ytdlp' in data
512
513 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
514 def test_headers(self, handler):
515
516 with handler(headers=HTTPHeaderDict({'test1': 'test', 'test2': 'test2'})) as rh:
517 # Global Headers
518 data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read()
519 assert b'Test1: test' in data
520
521 # Per request headers, merged with global
522 data = validate_and_send(rh, Request(
523 f'http://127.0.0.1:{self.http_port}/headers', headers={'test2': 'changed', 'test3': 'test3'})).read()
524 assert b'Test1: test' in data
525 assert b'Test2: changed' in data
526 assert b'Test2: test2' not in data
527 assert b'Test3: test3' in data
528
529 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
530 def test_timeout(self, handler):
531 with handler() as rh:
532 # Default timeout is 20 seconds, so this should go through
533 validate_and_send(
534 rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_3'))
535
536 with handler(timeout=0.5) as rh:
537 with pytest.raises(TransportError):
538 validate_and_send(
539 rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1'))
540
541 # Per request timeout, should override handler timeout
542 validate_and_send(
543 rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1', extensions={'timeout': 4}))
544
545 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
546 def test_source_address(self, handler):
547 source_address = f'127.0.0.{random.randint(5, 255)}'
548 # on some systems these loopback addresses we need for testing may not be available
549 # see: https://github.com/yt-dlp/yt-dlp/issues/8890
550 verify_address_availability(source_address)
551 with handler(source_address=source_address) as rh:
552 data = validate_and_send(
553 rh, Request(f'http://127.0.0.1:{self.http_port}/source_address')).read().decode()
554 assert source_address == data
555
556 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
557 def test_gzip_trailing_garbage(self, handler):
558 with handler() as rh:
559 data = validate_and_send(rh, Request(f'http://localhost:{self.http_port}/trailing_garbage')).read().decode()
560 assert data == '<html><video src="/vid.mp4" /></html>'
561
562 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
563 @pytest.mark.skipif(not brotli, reason='brotli support is not installed')
564 def test_brotli(self, handler):
565 with handler() as rh:
566 res = validate_and_send(
567 rh, Request(
568 f'http://127.0.0.1:{self.http_port}/content-encoding',
569 headers={'ytdl-encoding': 'br'}))
570 assert res.headers.get('Content-Encoding') == 'br'
571 assert res.read() == b'<html><video src="/vid.mp4" /></html>'
572
573 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
574 def test_deflate(self, handler):
575 with handler() as rh:
576 res = validate_and_send(
577 rh, Request(
578 f'http://127.0.0.1:{self.http_port}/content-encoding',
579 headers={'ytdl-encoding': 'deflate'}))
580 assert res.headers.get('Content-Encoding') == 'deflate'
581 assert res.read() == b'<html><video src="/vid.mp4" /></html>'
582
583 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
584 def test_gzip(self, handler):
585 with handler() as rh:
586 res = validate_and_send(
587 rh, Request(
588 f'http://127.0.0.1:{self.http_port}/content-encoding',
589 headers={'ytdl-encoding': 'gzip'}))
590 assert res.headers.get('Content-Encoding') == 'gzip'
591 assert res.read() == b'<html><video src="/vid.mp4" /></html>'
592
593 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
594 def test_multiple_encodings(self, handler):
595 with handler() as rh:
596 for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'):
597 res = validate_and_send(
598 rh, Request(
599 f'http://127.0.0.1:{self.http_port}/content-encoding',
600 headers={'ytdl-encoding': pair}))
601 assert res.headers.get('Content-Encoding') == pair
602 assert res.read() == b'<html><video src="/vid.mp4" /></html>'
603
604 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
605 def test_unsupported_encoding(self, handler):
606 with handler() as rh:
607 res = validate_and_send(
608 rh, Request(
609 f'http://127.0.0.1:{self.http_port}/content-encoding',
610 headers={'ytdl-encoding': 'unsupported'}))
611 assert res.headers.get('Content-Encoding') == 'unsupported'
612 assert res.read() == b'raw'
613
614 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
615 def test_read(self, handler):
616 with handler() as rh:
617 res = validate_and_send(
618 rh, Request(f'http://127.0.0.1:{self.http_port}/headers'))
619 assert res.readable()
620 assert res.read(1) == b'H'
621 assert res.read(3) == b'ost'
622
623
624 class TestHTTPProxy(TestRequestHandlerBase):
625 @classmethod
626 def setup_class(cls):
627 super().setup_class()
628 # HTTP Proxy server
629 cls.proxy = http.server.ThreadingHTTPServer(
630 ('127.0.0.1', 0), _build_proxy_handler('normal'))
631 cls.proxy_port = http_server_port(cls.proxy)
632 cls.proxy_thread = threading.Thread(target=cls.proxy.serve_forever)
633 cls.proxy_thread.daemon = True
634 cls.proxy_thread.start()
635
636 # Geo proxy server
637 cls.geo_proxy = http.server.ThreadingHTTPServer(
638 ('127.0.0.1', 0), _build_proxy_handler('geo'))
639 cls.geo_port = http_server_port(cls.geo_proxy)
640 cls.geo_proxy_thread = threading.Thread(target=cls.geo_proxy.serve_forever)
641 cls.geo_proxy_thread.daemon = True
642 cls.geo_proxy_thread.start()
643
644 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
645 def test_http_proxy(self, handler):
646 http_proxy = f'http://127.0.0.1:{self.proxy_port}'
647 geo_proxy = f'http://127.0.0.1:{self.geo_port}'
648
649 # Test global http proxy
650 # Test per request http proxy
651 # Test per request http proxy disables proxy
652 url = 'http://foo.com/bar'
653
654 # Global HTTP proxy
655 with handler(proxies={'http': http_proxy}) as rh:
656 res = validate_and_send(rh, Request(url)).read().decode()
657 assert res == f'normal: {url}'
658
659 # Per request proxy overrides global
660 res = validate_and_send(rh, Request(url, proxies={'http': geo_proxy})).read().decode()
661 assert res == f'geo: {url}'
662
663 # and setting to None disables all proxies for that request
664 real_url = f'http://127.0.0.1:{self.http_port}/headers'
665 res = validate_and_send(
666 rh, Request(real_url, proxies={'http': None})).read().decode()
667 assert res != f'normal: {real_url}'
668 assert 'Accept' in res
669
670 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
671 def test_noproxy(self, handler):
672 with handler(proxies={'proxy': f'http://127.0.0.1:{self.proxy_port}'}) as rh:
673 # NO_PROXY
674 for no_proxy in (f'127.0.0.1:{self.http_port}', '127.0.0.1', 'localhost'):
675 nop_response = validate_and_send(
676 rh, Request(f'http://127.0.0.1:{self.http_port}/headers', proxies={'no': no_proxy})).read().decode(
677 'utf-8')
678 assert 'Accept' in nop_response
679
680 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
681 def test_allproxy(self, handler):
682 url = 'http://foo.com/bar'
683 with handler() as rh:
684 response = validate_and_send(rh, Request(url, proxies={'all': f'http://127.0.0.1:{self.proxy_port}'})).read().decode(
685 'utf-8')
686 assert response == f'normal: {url}'
687
688 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
689 def test_http_proxy_with_idn(self, handler):
690 with handler(proxies={
691 'http': f'http://127.0.0.1:{self.proxy_port}',
692 }) as rh:
693 url = 'http://中文.tw/'
694 response = rh.send(Request(url)).read().decode()
695 # b'xn--fiq228c' is '中文'.encode('idna')
696 assert response == 'normal: http://xn--fiq228c.tw/'
697
698
699 class TestClientCertificate:
700
701 @classmethod
702 def setup_class(cls):
703 certfn = os.path.join(TEST_DIR, 'testcert.pem')
704 cls.certdir = os.path.join(TEST_DIR, 'testdata', 'certificate')
705 cacertfn = os.path.join(cls.certdir, 'ca.crt')
706 cls.httpd = http.server.ThreadingHTTPServer(('127.0.0.1', 0), HTTPTestRequestHandler)
707 sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
708 sslctx.verify_mode = ssl.CERT_REQUIRED
709 sslctx.load_verify_locations(cafile=cacertfn)
710 sslctx.load_cert_chain(certfn, None)
711 cls.httpd.socket = sslctx.wrap_socket(cls.httpd.socket, server_side=True)
712 cls.port = http_server_port(cls.httpd)
713 cls.server_thread = threading.Thread(target=cls.httpd.serve_forever)
714 cls.server_thread.daemon = True
715 cls.server_thread.start()
716
717 def _run_test(self, handler, **handler_kwargs):
718 with handler(
719 # Disable client-side validation of unacceptable self-signed testcert.pem
720 # The test is of a check on the server side, so unaffected
721 verify=False,
722 **handler_kwargs,
723 ) as rh:
724 validate_and_send(rh, Request(f'https://127.0.0.1:{self.port}/video.html')).read().decode()
725
726 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
727 def test_certificate_combined_nopass(self, handler):
728 self._run_test(handler, client_cert={
729 'client_certificate': os.path.join(self.certdir, 'clientwithkey.crt'),
730 })
731
732 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
733 def test_certificate_nocombined_nopass(self, handler):
734 self._run_test(handler, client_cert={
735 'client_certificate': os.path.join(self.certdir, 'client.crt'),
736 'client_certificate_key': os.path.join(self.certdir, 'client.key'),
737 })
738
739 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
740 def test_certificate_combined_pass(self, handler):
741 self._run_test(handler, client_cert={
742 'client_certificate': os.path.join(self.certdir, 'clientwithencryptedkey.crt'),
743 'client_certificate_password': 'foobar',
744 })
745
746 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
747 def test_certificate_nocombined_pass(self, handler):
748 self._run_test(handler, client_cert={
749 'client_certificate': os.path.join(self.certdir, 'client.crt'),
750 'client_certificate_key': os.path.join(self.certdir, 'clientencrypted.key'),
751 'client_certificate_password': 'foobar',
752 })
753
754
755 class TestUrllibRequestHandler(TestRequestHandlerBase):
756 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
757 def test_file_urls(self, handler):
758 # See https://github.com/ytdl-org/youtube-dl/issues/8227
759 tf = tempfile.NamedTemporaryFile(delete=False)
760 tf.write(b'foobar')
761 tf.close()
762 req = Request(pathlib.Path(tf.name).as_uri())
763 with handler() as rh:
764 with pytest.raises(UnsupportedRequest):
765 rh.validate(req)
766
767 # Test that urllib never loaded FileHandler
768 with pytest.raises(TransportError):
769 rh.send(req)
770
771 with handler(enable_file_urls=True) as rh:
772 res = validate_and_send(rh, req)
773 assert res.read() == b'foobar'
774 res.close()
775
776 os.unlink(tf.name)
777
778 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
779 def test_http_error_returns_content(self, handler):
780 # urllib HTTPError will try close the underlying response if reference to the HTTPError object is lost
781 def get_response():
782 with handler() as rh:
783 # headers url
784 try:
785 validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_404'))
786 except HTTPError as e:
787 return e.response
788
789 assert get_response().read() == b'<html></html>'
790
791 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
792 def test_verify_cert_error_text(self, handler):
793 # Check the output of the error message
794 with handler() as rh:
795 with pytest.raises(
796 CertificateVerifyError,
797 match=r'\[SSL: CERTIFICATE_VERIFY_FAILED\] certificate verify failed: self.signed certificate'
798 ):
799 validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
800
801 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
802 @pytest.mark.parametrize('req,match,version_check', [
803 # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1256
804 # bpo-39603: Check implemented in 3.7.9+, 3.8.5+
805 (
806 Request('http://127.0.0.1', method='GET\n'),
807 'method can\'t contain control characters',
808 lambda v: v < (3, 7, 9) or (3, 8, 0) <= v < (3, 8, 5)
809 ),
810 # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1265
811 # bpo-38576: Check implemented in 3.7.8+, 3.8.3+
812 (
813 Request('http://127.0.0. 1', method='GET'),
814 'URL can\'t contain control characters',
815 lambda v: v < (3, 7, 8) or (3, 8, 0) <= v < (3, 8, 3)
816 ),
817 # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1288C31-L1288C50
818 (Request('http://127.0.0.1', headers={'foo\n': 'bar'}), 'Invalid header name', None),
819 ])
820 def test_httplib_validation_errors(self, handler, req, match, version_check):
821 if version_check and version_check(sys.version_info):
822 pytest.skip(f'Python {sys.version} version does not have the required validation for this test.')
823
824 with handler() as rh:
825 with pytest.raises(RequestError, match=match) as exc_info:
826 validate_and_send(rh, req)
827 assert not isinstance(exc_info.value, TransportError)
828
829
830 class TestRequestsRequestHandler(TestRequestHandlerBase):
831 @pytest.mark.parametrize('raised,expected', [
832 (lambda: requests.exceptions.ConnectTimeout(), TransportError),
833 (lambda: requests.exceptions.ReadTimeout(), TransportError),
834 (lambda: requests.exceptions.Timeout(), TransportError),
835 (lambda: requests.exceptions.ConnectionError(), TransportError),
836 (lambda: requests.exceptions.ProxyError(), ProxyError),
837 (lambda: requests.exceptions.SSLError('12[CERTIFICATE_VERIFY_FAILED]34'), CertificateVerifyError),
838 (lambda: requests.exceptions.SSLError(), SSLError),
839 (lambda: requests.exceptions.InvalidURL(), RequestError),
840 (lambda: requests.exceptions.InvalidHeader(), RequestError),
841 # catch-all: https://github.com/psf/requests/blob/main/src/requests/adapters.py#L535
842 (lambda: urllib3.exceptions.HTTPError(), TransportError),
843 (lambda: requests.exceptions.RequestException(), RequestError)
844 # (lambda: requests.exceptions.TooManyRedirects(), HTTPError) - Needs a response object
845 ])
846 @pytest.mark.parametrize('handler', ['Requests'], indirect=True)
847 def test_request_error_mapping(self, handler, monkeypatch, raised, expected):
848 with handler() as rh:
849 def mock_get_instance(*args, **kwargs):
850 class MockSession:
851 def request(self, *args, **kwargs):
852 raise raised()
853 return MockSession()
854
855 monkeypatch.setattr(rh, '_get_instance', mock_get_instance)
856
857 with pytest.raises(expected) as exc_info:
858 rh.send(Request('http://fake'))
859
860 assert exc_info.type is expected
861
862 @pytest.mark.parametrize('raised,expected,match', [
863 (lambda: urllib3.exceptions.SSLError(), SSLError, None),
864 (lambda: urllib3.exceptions.TimeoutError(), TransportError, None),
865 (lambda: urllib3.exceptions.ReadTimeoutError(None, None, None), TransportError, None),
866 (lambda: urllib3.exceptions.ProtocolError(), TransportError, None),
867 (lambda: urllib3.exceptions.DecodeError(), TransportError, None),
868 (lambda: urllib3.exceptions.HTTPError(), TransportError, None), # catch-all
869 (
870 lambda: urllib3.exceptions.ProtocolError('error', http.client.IncompleteRead(partial=b'abc', expected=4)),
871 IncompleteRead,
872 '3 bytes read, 4 more expected'
873 ),
874 (
875 lambda: urllib3.exceptions.ProtocolError('error', urllib3.exceptions.IncompleteRead(partial=3, expected=5)),
876 IncompleteRead,
877 '3 bytes read, 5 more expected'
878 ),
879 ])
880 @pytest.mark.parametrize('handler', ['Requests'], indirect=True)
881 def test_response_error_mapping(self, handler, monkeypatch, raised, expected, match):
882 from requests.models import Response as RequestsResponse
883 from urllib3.response import HTTPResponse as Urllib3Response
884
885 from yt_dlp.networking._requests import RequestsResponseAdapter
886 requests_res = RequestsResponse()
887 requests_res.raw = Urllib3Response(body=b'', status=200)
888 res = RequestsResponseAdapter(requests_res)
889
890 def mock_read(*args, **kwargs):
891 raise raised()
892 monkeypatch.setattr(res.fp, 'read', mock_read)
893
894 with pytest.raises(expected, match=match) as exc_info:
895 res.read()
896
897 assert exc_info.type is expected
898
899
900 def run_validation(handler, error, req, **handler_kwargs):
901 with handler(**handler_kwargs) as rh:
902 if error:
903 with pytest.raises(error):
904 rh.validate(req)
905 else:
906 rh.validate(req)
907
908
909 class TestRequestHandlerValidation:
910
911 class ValidationRH(RequestHandler):
912 def _send(self, request):
913 raise RequestError('test')
914
915 class NoCheckRH(ValidationRH):
916 _SUPPORTED_FEATURES = None
917 _SUPPORTED_PROXY_SCHEMES = None
918 _SUPPORTED_URL_SCHEMES = None
919
920 def _check_extensions(self, extensions):
921 extensions.clear()
922
923 class HTTPSupportedRH(ValidationRH):
924 _SUPPORTED_URL_SCHEMES = ('http',)
925
926 URL_SCHEME_TESTS = [
927 # scheme, expected to fail, handler kwargs
928 ('Urllib', [
929 ('http', False, {}),
930 ('https', False, {}),
931 ('data', False, {}),
932 ('ftp', False, {}),
933 ('file', UnsupportedRequest, {}),
934 ('file', False, {'enable_file_urls': True}),
935 ]),
936 ('Requests', [
937 ('http', False, {}),
938 ('https', False, {}),
939 ]),
940 ('Websockets', [
941 ('ws', False, {}),
942 ('wss', False, {}),
943 ]),
944 (NoCheckRH, [('http', False, {})]),
945 (ValidationRH, [('http', UnsupportedRequest, {})])
946 ]
947
948 PROXY_SCHEME_TESTS = [
949 # scheme, expected to fail
950 ('Urllib', 'http', [
951 ('http', False),
952 ('https', UnsupportedRequest),
953 ('socks4', False),
954 ('socks4a', False),
955 ('socks5', False),
956 ('socks5h', False),
957 ('socks', UnsupportedRequest),
958 ]),
959 ('Requests', 'http', [
960 ('http', False),
961 ('https', False),
962 ('socks4', False),
963 ('socks4a', False),
964 ('socks5', False),
965 ('socks5h', False),
966 ]),
967 (NoCheckRH, 'http', [('http', False)]),
968 (HTTPSupportedRH, 'http', [('http', UnsupportedRequest)]),
969 ('Websockets', 'ws', [('http', UnsupportedRequest)]),
970 (NoCheckRH, 'http', [('http', False)]),
971 (HTTPSupportedRH, 'http', [('http', UnsupportedRequest)]),
972 ]
973
974 PROXY_KEY_TESTS = [
975 # key, expected to fail
976 ('Urllib', [
977 ('all', False),
978 ('unrelated', False),
979 ]),
980 ('Requests', [
981 ('all', False),
982 ('unrelated', False),
983 ]),
984 (NoCheckRH, [('all', False)]),
985 (HTTPSupportedRH, [('all', UnsupportedRequest)]),
986 (HTTPSupportedRH, [('no', UnsupportedRequest)]),
987 ]
988
989 EXTENSION_TESTS = [
990 ('Urllib', 'http', [
991 ({'cookiejar': 'notacookiejar'}, AssertionError),
992 ({'cookiejar': YoutubeDLCookieJar()}, False),
993 ({'cookiejar': CookieJar()}, AssertionError),
994 ({'timeout': 1}, False),
995 ({'timeout': 'notatimeout'}, AssertionError),
996 ({'unsupported': 'value'}, UnsupportedRequest),
997 ]),
998 ('Requests', 'http', [
999 ({'cookiejar': 'notacookiejar'}, AssertionError),
1000 ({'cookiejar': YoutubeDLCookieJar()}, False),
1001 ({'timeout': 1}, False),
1002 ({'timeout': 'notatimeout'}, AssertionError),
1003 ({'unsupported': 'value'}, UnsupportedRequest),
1004 ]),
1005 (NoCheckRH, 'http', [
1006 ({'cookiejar': 'notacookiejar'}, False),
1007 ({'somerandom': 'test'}, False), # but any extension is allowed through
1008 ]),
1009 ('Websockets', 'ws', [
1010 ({'cookiejar': YoutubeDLCookieJar()}, False),
1011 ({'timeout': 2}, False),
1012 ]),
1013 ]
1014
1015 @pytest.mark.parametrize('handler,scheme,fail,handler_kwargs', [
1016 (handler_tests[0], scheme, fail, handler_kwargs)
1017 for handler_tests in URL_SCHEME_TESTS
1018 for scheme, fail, handler_kwargs in handler_tests[1]
1019
1020 ], indirect=['handler'])
1021 def test_url_scheme(self, handler, scheme, fail, handler_kwargs):
1022 run_validation(handler, fail, Request(f'{scheme}://'), **(handler_kwargs or {}))
1023
1024 @pytest.mark.parametrize('handler,fail', [('Urllib', False), ('Requests', False)], indirect=['handler'])
1025 def test_no_proxy(self, handler, fail):
1026 run_validation(handler, fail, Request('http://', proxies={'no': '127.0.0.1,github.com'}))
1027 run_validation(handler, fail, Request('http://'), proxies={'no': '127.0.0.1,github.com'})
1028
1029 @pytest.mark.parametrize('handler,proxy_key,fail', [
1030 (handler_tests[0], proxy_key, fail)
1031 for handler_tests in PROXY_KEY_TESTS
1032 for proxy_key, fail in handler_tests[1]
1033 ], indirect=['handler'])
1034 def test_proxy_key(self, handler, proxy_key, fail):
1035 run_validation(handler, fail, Request('http://', proxies={proxy_key: 'http://example.com'}))
1036 run_validation(handler, fail, Request('http://'), proxies={proxy_key: 'http://example.com'})
1037
1038 @pytest.mark.parametrize('handler,req_scheme,scheme,fail', [
1039 (handler_tests[0], handler_tests[1], scheme, fail)
1040 for handler_tests in PROXY_SCHEME_TESTS
1041 for scheme, fail in handler_tests[2]
1042 ], indirect=['handler'])
1043 def test_proxy_scheme(self, handler, req_scheme, scheme, fail):
1044 run_validation(handler, fail, Request(f'{req_scheme}://', proxies={req_scheme: f'{scheme}://example.com'}))
1045 run_validation(handler, fail, Request(f'{req_scheme}://'), proxies={req_scheme: f'{scheme}://example.com'})
1046
1047 @pytest.mark.parametrize('handler', ['Urllib', HTTPSupportedRH, 'Requests'], indirect=True)
1048 def test_empty_proxy(self, handler):
1049 run_validation(handler, False, Request('http://', proxies={'http': None}))
1050 run_validation(handler, False, Request('http://'), proxies={'http': None})
1051
1052 @pytest.mark.parametrize('proxy_url', ['//example.com', 'example.com', '127.0.0.1', '/a/b/c'])
1053 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
1054 def test_invalid_proxy_url(self, handler, proxy_url):
1055 run_validation(handler, UnsupportedRequest, Request('http://', proxies={'http': proxy_url}))
1056
1057 @pytest.mark.parametrize('handler,scheme,extensions,fail', [
1058 (handler_tests[0], handler_tests[1], extensions, fail)
1059 for handler_tests in EXTENSION_TESTS
1060 for extensions, fail in handler_tests[2]
1061 ], indirect=['handler'])
1062 def test_extension(self, handler, scheme, extensions, fail):
1063 run_validation(
1064 handler, fail, Request(f'{scheme}://', extensions=extensions))
1065
1066 def test_invalid_request_type(self):
1067 rh = self.ValidationRH(logger=FakeLogger())
1068 for method in (rh.validate, rh.send):
1069 with pytest.raises(TypeError, match='Expected an instance of Request'):
1070 method('not a request')
1071
1072
1073 class FakeResponse(Response):
1074 def __init__(self, request):
1075 # XXX: we could make request part of standard response interface
1076 self.request = request
1077 super().__init__(fp=io.BytesIO(b''), headers={}, url=request.url)
1078
1079
1080 class FakeRH(RequestHandler):
1081
1082 def _validate(self, request):
1083 return
1084
1085 def _send(self, request: Request):
1086 if request.url.startswith('ssl://'):
1087 raise SSLError(request.url[len('ssl://'):])
1088 return FakeResponse(request)
1089
1090
1091 class FakeRHYDL(FakeYDL):
1092 def __init__(self, *args, **kwargs):
1093 super().__init__(*args, **kwargs)
1094 self._request_director = self.build_request_director([FakeRH])
1095
1096
1097 class AllUnsupportedRHYDL(FakeYDL):
1098
1099 def __init__(self, *args, **kwargs):
1100
1101 class UnsupportedRH(RequestHandler):
1102 def _send(self, request: Request):
1103 pass
1104
1105 _SUPPORTED_FEATURES = ()
1106 _SUPPORTED_PROXY_SCHEMES = ()
1107 _SUPPORTED_URL_SCHEMES = ()
1108
1109 super().__init__(*args, **kwargs)
1110 self._request_director = self.build_request_director([UnsupportedRH])
1111
1112
1113 class TestRequestDirector:
1114
1115 def test_handler_operations(self):
1116 director = RequestDirector(logger=FakeLogger())
1117 handler = FakeRH(logger=FakeLogger())
1118 director.add_handler(handler)
1119 assert director.handlers.get(FakeRH.RH_KEY) is handler
1120
1121 # Handler should overwrite
1122 handler2 = FakeRH(logger=FakeLogger())
1123 director.add_handler(handler2)
1124 assert director.handlers.get(FakeRH.RH_KEY) is not handler
1125 assert director.handlers.get(FakeRH.RH_KEY) is handler2
1126 assert len(director.handlers) == 1
1127
1128 class AnotherFakeRH(FakeRH):
1129 pass
1130 director.add_handler(AnotherFakeRH(logger=FakeLogger()))
1131 assert len(director.handlers) == 2
1132 assert director.handlers.get(AnotherFakeRH.RH_KEY).RH_KEY == AnotherFakeRH.RH_KEY
1133
1134 director.handlers.pop(FakeRH.RH_KEY, None)
1135 assert director.handlers.get(FakeRH.RH_KEY) is None
1136 assert len(director.handlers) == 1
1137
1138 # RequestErrors should passthrough
1139 with pytest.raises(SSLError):
1140 director.send(Request('ssl://something'))
1141
1142 def test_send(self):
1143 director = RequestDirector(logger=FakeLogger())
1144 with pytest.raises(RequestError):
1145 director.send(Request('any://'))
1146 director.add_handler(FakeRH(logger=FakeLogger()))
1147 assert isinstance(director.send(Request('http://')), FakeResponse)
1148
1149 def test_unsupported_handlers(self):
1150 class SupportedRH(RequestHandler):
1151 _SUPPORTED_URL_SCHEMES = ['http']
1152
1153 def _send(self, request: Request):
1154 return Response(fp=io.BytesIO(b'supported'), headers={}, url=request.url)
1155
1156 director = RequestDirector(logger=FakeLogger())
1157 director.add_handler(SupportedRH(logger=FakeLogger()))
1158 director.add_handler(FakeRH(logger=FakeLogger()))
1159
1160 # First should take preference
1161 assert director.send(Request('http://')).read() == b'supported'
1162 assert director.send(Request('any://')).read() == b''
1163
1164 director.handlers.pop(FakeRH.RH_KEY)
1165 with pytest.raises(NoSupportingHandlers):
1166 director.send(Request('any://'))
1167
1168 def test_unexpected_error(self):
1169 director = RequestDirector(logger=FakeLogger())
1170
1171 class UnexpectedRH(FakeRH):
1172 def _send(self, request: Request):
1173 raise TypeError('something')
1174
1175 director.add_handler(UnexpectedRH(logger=FakeLogger))
1176 with pytest.raises(NoSupportingHandlers, match=r'1 unexpected error'):
1177 director.send(Request('any://'))
1178
1179 director.handlers.clear()
1180 assert len(director.handlers) == 0
1181
1182 # Should not be fatal
1183 director.add_handler(FakeRH(logger=FakeLogger()))
1184 director.add_handler(UnexpectedRH(logger=FakeLogger))
1185 assert director.send(Request('any://'))
1186
1187 def test_preference(self):
1188 director = RequestDirector(logger=FakeLogger())
1189 director.add_handler(FakeRH(logger=FakeLogger()))
1190
1191 class SomeRH(RequestHandler):
1192 _SUPPORTED_URL_SCHEMES = ['http']
1193
1194 def _send(self, request: Request):
1195 return Response(fp=io.BytesIO(b'supported'), headers={}, url=request.url)
1196
1197 def some_preference(rh, request):
1198 return (0 if not isinstance(rh, SomeRH)
1199 else 100 if 'prefer' in request.headers
1200 else -1)
1201
1202 director.add_handler(SomeRH(logger=FakeLogger()))
1203 director.preferences.add(some_preference)
1204
1205 assert director.send(Request('http://')).read() == b''
1206 assert director.send(Request('http://', headers={'prefer': '1'})).read() == b'supported'
1207
1208
1209 # XXX: do we want to move this to test_YoutubeDL.py?
1210 class TestYoutubeDLNetworking:
1211
1212 @staticmethod
1213 def build_handler(ydl, handler: RequestHandler = FakeRH):
1214 return ydl.build_request_director([handler]).handlers.get(handler.RH_KEY)
1215
1216 def test_compat_opener(self):
1217 with FakeYDL() as ydl:
1218 with warnings.catch_warnings():
1219 warnings.simplefilter('ignore', category=DeprecationWarning)
1220 assert isinstance(ydl._opener, urllib.request.OpenerDirector)
1221
1222 @pytest.mark.parametrize('proxy,expected', [
1223 ('http://127.0.0.1:8080', {'all': 'http://127.0.0.1:8080'}),
1224 ('', {'all': '__noproxy__'}),
1225 (None, {'http': 'http://127.0.0.1:8081', 'https': 'http://127.0.0.1:8081'}) # env, set https
1226 ])
1227 def test_proxy(self, proxy, expected):
1228 old_http_proxy = os.environ.get('HTTP_PROXY')
1229 try:
1230 os.environ['HTTP_PROXY'] = 'http://127.0.0.1:8081' # ensure that provided proxies override env
1231 with FakeYDL({'proxy': proxy}) as ydl:
1232 assert ydl.proxies == expected
1233 finally:
1234 if old_http_proxy:
1235 os.environ['HTTP_PROXY'] = old_http_proxy
1236
1237 def test_compat_request(self):
1238 with FakeRHYDL() as ydl:
1239 assert ydl.urlopen('test://')
1240 urllib_req = urllib.request.Request('http://foo.bar', data=b'test', method='PUT', headers={'X-Test': '1'})
1241 urllib_req.add_unredirected_header('Cookie', 'bob=bob')
1242 urllib_req.timeout = 2
1243 with warnings.catch_warnings():
1244 warnings.simplefilter('ignore', category=DeprecationWarning)
1245 req = ydl.urlopen(urllib_req).request
1246 assert req.url == urllib_req.get_full_url()
1247 assert req.data == urllib_req.data
1248 assert req.method == urllib_req.get_method()
1249 assert 'X-Test' in req.headers
1250 assert 'Cookie' in req.headers
1251 assert req.extensions.get('timeout') == 2
1252
1253 with pytest.raises(AssertionError):
1254 ydl.urlopen(None)
1255
1256 def test_extract_basic_auth(self):
1257 with FakeRHYDL() as ydl:
1258 res = ydl.urlopen(Request('http://user:pass@foo.bar'))
1259 assert res.request.headers['Authorization'] == 'Basic dXNlcjpwYXNz'
1260
1261 def test_sanitize_url(self):
1262 with FakeRHYDL() as ydl:
1263 res = ydl.urlopen(Request('httpss://foo.bar'))
1264 assert res.request.url == 'https://foo.bar'
1265
1266 def test_file_urls_error(self):
1267 # use urllib handler
1268 with FakeYDL() as ydl:
1269 with pytest.raises(RequestError, match=r'file:// URLs are disabled by default'):
1270 ydl.urlopen('file://')
1271
1272 @pytest.mark.parametrize('scheme', (['ws', 'wss']))
1273 def test_websocket_unavailable_error(self, scheme):
1274 with AllUnsupportedRHYDL() as ydl:
1275 with pytest.raises(RequestError, match=r'This request requires WebSocket support'):
1276 ydl.urlopen(f'{scheme}://')
1277
1278 def test_legacy_server_connect_error(self):
1279 with FakeRHYDL() as ydl:
1280 for error in ('UNSAFE_LEGACY_RENEGOTIATION_DISABLED', 'SSLV3_ALERT_HANDSHAKE_FAILURE'):
1281 with pytest.raises(RequestError, match=r'Try using --legacy-server-connect'):
1282 ydl.urlopen(f'ssl://{error}')
1283
1284 with pytest.raises(SSLError, match='testerror'):
1285 ydl.urlopen('ssl://testerror')
1286
1287 @pytest.mark.parametrize('proxy_key,proxy_url,expected', [
1288 ('http', '__noproxy__', None),
1289 ('no', '127.0.0.1,foo.bar', '127.0.0.1,foo.bar'),
1290 ('https', 'example.com', 'http://example.com'),
1291 ('https', '//example.com', 'http://example.com'),
1292 ('https', 'socks5://example.com', 'socks5h://example.com'),
1293 ('http', 'socks://example.com', 'socks4://example.com'),
1294 ('http', 'socks4://example.com', 'socks4://example.com'),
1295 ('unrelated', '/bad/proxy', '/bad/proxy'), # clean_proxies should ignore bad proxies
1296 ])
1297 def test_clean_proxy(self, proxy_key, proxy_url, expected):
1298 # proxies should be cleaned in urlopen()
1299 with FakeRHYDL() as ydl:
1300 req = ydl.urlopen(Request('test://', proxies={proxy_key: proxy_url})).request
1301 assert req.proxies[proxy_key] == expected
1302
1303 # and should also be cleaned when building the handler
1304 env_key = f'{proxy_key.upper()}_PROXY'
1305 old_env_proxy = os.environ.get(env_key)
1306 try:
1307 os.environ[env_key] = proxy_url # ensure that provided proxies override env
1308 with FakeYDL() as ydl:
1309 rh = self.build_handler(ydl)
1310 assert rh.proxies[proxy_key] == expected
1311 finally:
1312 if old_env_proxy:
1313 os.environ[env_key] = old_env_proxy
1314
1315 def test_clean_proxy_header(self):
1316 with FakeRHYDL() as ydl:
1317 req = ydl.urlopen(Request('test://', headers={'ytdl-request-proxy': '//foo.bar'})).request
1318 assert 'ytdl-request-proxy' not in req.headers
1319 assert req.proxies == {'all': 'http://foo.bar'}
1320
1321 with FakeYDL({'http_headers': {'ytdl-request-proxy': '//foo.bar'}}) as ydl:
1322 rh = self.build_handler(ydl)
1323 assert 'ytdl-request-proxy' not in rh.headers
1324 assert rh.proxies == {'all': 'http://foo.bar'}
1325
1326 def test_clean_header(self):
1327 with FakeRHYDL() as ydl:
1328 res = ydl.urlopen(Request('test://', headers={'Youtubedl-no-compression': True}))
1329 assert 'Youtubedl-no-compression' not in res.request.headers
1330 assert res.request.headers.get('Accept-Encoding') == 'identity'
1331
1332 with FakeYDL({'http_headers': {'Youtubedl-no-compression': True}}) as ydl:
1333 rh = self.build_handler(ydl)
1334 assert 'Youtubedl-no-compression' not in rh.headers
1335 assert rh.headers.get('Accept-Encoding') == 'identity'
1336
1337 with FakeYDL({'http_headers': {'Ytdl-socks-proxy': 'socks://localhost:1080'}}) as ydl:
1338 rh = self.build_handler(ydl)
1339 assert 'Ytdl-socks-proxy' not in rh.headers
1340
1341 def test_build_handler_params(self):
1342 with FakeYDL({
1343 'http_headers': {'test': 'testtest'},
1344 'socket_timeout': 2,
1345 'proxy': 'http://127.0.0.1:8080',
1346 'source_address': '127.0.0.45',
1347 'debug_printtraffic': True,
1348 'compat_opts': ['no-certifi'],
1349 'nocheckcertificate': True,
1350 'legacyserverconnect': True,
1351 }) as ydl:
1352 rh = self.build_handler(ydl)
1353 assert rh.headers.get('test') == 'testtest'
1354 assert 'Accept' in rh.headers # ensure std_headers are still there
1355 assert rh.timeout == 2
1356 assert rh.proxies.get('all') == 'http://127.0.0.1:8080'
1357 assert rh.source_address == '127.0.0.45'
1358 assert rh.verbose is True
1359 assert rh.prefer_system_certs is True
1360 assert rh.verify is False
1361 assert rh.legacy_ssl_support is True
1362
1363 @pytest.mark.parametrize('ydl_params', [
1364 {'client_certificate': 'fakecert.crt'},
1365 {'client_certificate': 'fakecert.crt', 'client_certificate_key': 'fakekey.key'},
1366 {'client_certificate': 'fakecert.crt', 'client_certificate_key': 'fakekey.key', 'client_certificate_password': 'foobar'},
1367 {'client_certificate_key': 'fakekey.key', 'client_certificate_password': 'foobar'},
1368 ])
1369 def test_client_certificate(self, ydl_params):
1370 with FakeYDL(ydl_params) as ydl:
1371 rh = self.build_handler(ydl)
1372 assert rh._client_cert == ydl_params # XXX: Too bound to implementation
1373
1374 def test_urllib_file_urls(self):
1375 with FakeYDL({'enable_file_urls': False}) as ydl:
1376 rh = self.build_handler(ydl, UrllibRH)
1377 assert rh.enable_file_urls is False
1378
1379 with FakeYDL({'enable_file_urls': True}) as ydl:
1380 rh = self.build_handler(ydl, UrllibRH)
1381 assert rh.enable_file_urls is True
1382
1383 def test_compat_opt_prefer_urllib(self):
1384 # This assumes urllib only has a preference when this compat opt is given
1385 with FakeYDL({'compat_opts': ['prefer-legacy-http-handler']}) as ydl:
1386 director = ydl.build_request_director([UrllibRH])
1387 assert len(director.preferences) == 1
1388 assert director.preferences.pop()(UrllibRH, None)
1389
1390
1391 class TestRequest:
1392
1393 def test_query(self):
1394 req = Request('http://example.com?q=something', query={'v': 'xyz'})
1395 assert req.url == 'http://example.com?q=something&v=xyz'
1396
1397 req.update(query={'v': '123'})
1398 assert req.url == 'http://example.com?q=something&v=123'
1399 req.update(url='http://example.com', query={'v': 'xyz'})
1400 assert req.url == 'http://example.com?v=xyz'
1401
1402 def test_method(self):
1403 req = Request('http://example.com')
1404 assert req.method == 'GET'
1405 req.data = b'test'
1406 assert req.method == 'POST'
1407 req.data = None
1408 assert req.method == 'GET'
1409 req.data = b'test2'
1410 req.method = 'PUT'
1411 assert req.method == 'PUT'
1412 req.data = None
1413 assert req.method == 'PUT'
1414 with pytest.raises(TypeError):
1415 req.method = 1
1416
1417 def test_request_helpers(self):
1418 assert HEADRequest('http://example.com').method == 'HEAD'
1419 assert PUTRequest('http://example.com').method == 'PUT'
1420
1421 def test_headers(self):
1422 req = Request('http://example.com', headers={'tesT': 'test'})
1423 assert req.headers == HTTPHeaderDict({'test': 'test'})
1424 req.update(headers={'teSt2': 'test2'})
1425 assert req.headers == HTTPHeaderDict({'test': 'test', 'test2': 'test2'})
1426
1427 req.headers = new_headers = HTTPHeaderDict({'test': 'test'})
1428 assert req.headers == HTTPHeaderDict({'test': 'test'})
1429 assert req.headers is new_headers
1430
1431 # test converts dict to case insensitive dict
1432 req.headers = new_headers = {'test2': 'test2'}
1433 assert isinstance(req.headers, HTTPHeaderDict)
1434 assert req.headers is not new_headers
1435
1436 with pytest.raises(TypeError):
1437 req.headers = None
1438
1439 def test_data_type(self):
1440 req = Request('http://example.com')
1441 assert req.data is None
1442 # test bytes is allowed
1443 req.data = b'test'
1444 assert req.data == b'test'
1445 # test iterable of bytes is allowed
1446 i = [b'test', b'test2']
1447 req.data = i
1448 assert req.data == i
1449
1450 # test file-like object is allowed
1451 f = io.BytesIO(b'test')
1452 req.data = f
1453 assert req.data == f
1454
1455 # common mistake: test str not allowed
1456 with pytest.raises(TypeError):
1457 req.data = 'test'
1458 assert req.data != 'test'
1459
1460 # common mistake: test dict is not allowed
1461 with pytest.raises(TypeError):
1462 req.data = {'test': 'test'}
1463 assert req.data != {'test': 'test'}
1464
1465 def test_content_length_header(self):
1466 req = Request('http://example.com', headers={'Content-Length': '0'}, data=b'')
1467 assert req.headers.get('Content-Length') == '0'
1468
1469 req.data = b'test'
1470 assert 'Content-Length' not in req.headers
1471
1472 req = Request('http://example.com', headers={'Content-Length': '10'})
1473 assert 'Content-Length' not in req.headers
1474
1475 def test_content_type_header(self):
1476 req = Request('http://example.com', headers={'Content-Type': 'test'}, data=b'test')
1477 assert req.headers.get('Content-Type') == 'test'
1478 req.data = b'test2'
1479 assert req.headers.get('Content-Type') == 'test'
1480 req.data = None
1481 assert 'Content-Type' not in req.headers
1482 req.data = b'test3'
1483 assert req.headers.get('Content-Type') == 'application/x-www-form-urlencoded'
1484
1485 def test_update_req(self):
1486 req = Request('http://example.com')
1487 assert req.data is None
1488 assert req.method == 'GET'
1489 assert 'Content-Type' not in req.headers
1490 # Test that zero-byte payloads will be sent
1491 req.update(data=b'')
1492 assert req.data == b''
1493 assert req.method == 'POST'
1494 assert req.headers.get('Content-Type') == 'application/x-www-form-urlencoded'
1495
1496 def test_proxies(self):
1497 req = Request(url='http://example.com', proxies={'http': 'http://127.0.0.1:8080'})
1498 assert req.proxies == {'http': 'http://127.0.0.1:8080'}
1499
1500 def test_extensions(self):
1501 req = Request(url='http://example.com', extensions={'timeout': 2})
1502 assert req.extensions == {'timeout': 2}
1503
1504 def test_copy(self):
1505 req = Request(
1506 url='http://example.com',
1507 extensions={'cookiejar': CookieJar()},
1508 headers={'Accept-Encoding': 'br'},
1509 proxies={'http': 'http://127.0.0.1'},
1510 data=[b'123']
1511 )
1512 req_copy = req.copy()
1513 assert req_copy is not req
1514 assert req_copy.url == req.url
1515 assert req_copy.headers == req.headers
1516 assert req_copy.headers is not req.headers
1517 assert req_copy.proxies == req.proxies
1518 assert req_copy.proxies is not req.proxies
1519
1520 # Data is not able to be copied
1521 assert req_copy.data == req.data
1522 assert req_copy.data is req.data
1523
1524 # Shallow copy extensions
1525 assert req_copy.extensions is not req.extensions
1526 assert req_copy.extensions['cookiejar'] == req.extensions['cookiejar']
1527
1528 # Subclasses are copied by default
1529 class AnotherRequest(Request):
1530 pass
1531
1532 req = AnotherRequest(url='http://127.0.0.1')
1533 assert isinstance(req.copy(), AnotherRequest)
1534
1535 def test_url(self):
1536 req = Request(url='https://фtest.example.com/ some spaceв?ä=c',)
1537 assert req.url == 'https://xn--test-z6d.example.com/%20some%20space%D0%B2?%C3%A4=c'
1538
1539 assert Request(url='//example.com').url == 'http://example.com'
1540
1541 with pytest.raises(TypeError):
1542 Request(url='https://').url = None
1543
1544
1545 class TestResponse:
1546
1547 @pytest.mark.parametrize('reason,status,expected', [
1548 ('custom', 200, 'custom'),
1549 (None, 404, 'Not Found'), # fallback status
1550 ('', 403, 'Forbidden'),
1551 (None, 999, None)
1552 ])
1553 def test_reason(self, reason, status, expected):
1554 res = Response(io.BytesIO(b''), url='test://', headers={}, status=status, reason=reason)
1555 assert res.reason == expected
1556
1557 def test_headers(self):
1558 headers = Message()
1559 headers.add_header('Test', 'test')
1560 headers.add_header('Test', 'test2')
1561 headers.add_header('content-encoding', 'br')
1562 res = Response(io.BytesIO(b''), headers=headers, url='test://')
1563 assert res.headers.get_all('test') == ['test', 'test2']
1564 assert 'Content-Encoding' in res.headers
1565
1566 def test_get_header(self):
1567 headers = Message()
1568 headers.add_header('Set-Cookie', 'cookie1')
1569 headers.add_header('Set-cookie', 'cookie2')
1570 headers.add_header('Test', 'test')
1571 headers.add_header('Test', 'test2')
1572 res = Response(io.BytesIO(b''), headers=headers, url='test://')
1573 assert res.get_header('test') == 'test, test2'
1574 assert res.get_header('set-Cookie') == 'cookie1'
1575 assert res.get_header('notexist', 'default') == 'default'
1576
1577 def test_compat(self):
1578 res = Response(io.BytesIO(b''), url='test://', status=404, headers={'test': 'test'})
1579 with warnings.catch_warnings():
1580 warnings.simplefilter('ignore', category=DeprecationWarning)
1581 assert res.code == res.getcode() == res.status
1582 assert res.geturl() == res.url
1583 assert res.info() is res.headers
1584 assert res.getheader('test') == res.get_header('test')