]> jfr.im git - yt-dlp.git/blob - test/test_networking.py
[rh:requests] Add handler for `requests` HTTP library (#3668)
[yt-dlp.git] / test / test_networking.py
1 #!/usr/bin/env python3
2
3 # Allow direct execution
4 import os
5 import sys
6
7 import pytest
8
9 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
10
11 import gzip
12 import http.client
13 import http.cookiejar
14 import http.server
15 import io
16 import pathlib
17 import random
18 import ssl
19 import tempfile
20 import threading
21 import time
22 import urllib.error
23 import urllib.request
24 import warnings
25 import zlib
26 from email.message import Message
27 from http.cookiejar import CookieJar
28
29 from test.helper import FakeYDL, http_server_port
30 from yt_dlp.cookies import YoutubeDLCookieJar
31 from yt_dlp.dependencies import brotli, requests, urllib3
32 from yt_dlp.networking import (
33 HEADRequest,
34 PUTRequest,
35 Request,
36 RequestDirector,
37 RequestHandler,
38 Response,
39 )
40 from yt_dlp.networking._urllib import UrllibRH
41 from yt_dlp.networking.exceptions import (
42 CertificateVerifyError,
43 HTTPError,
44 IncompleteRead,
45 NoSupportingHandlers,
46 ProxyError,
47 RequestError,
48 SSLError,
49 TransportError,
50 UnsupportedRequest,
51 )
52 from yt_dlp.utils._utils import _YDLLogger as FakeLogger
53 from yt_dlp.utils.networking import HTTPHeaderDict
54
55 TEST_DIR = os.path.dirname(os.path.abspath(__file__))
56
57
58 def _build_proxy_handler(name):
59 class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
60 proxy_name = name
61
62 def log_message(self, format, *args):
63 pass
64
65 def do_GET(self):
66 self.send_response(200)
67 self.send_header('Content-Type', 'text/plain; charset=utf-8')
68 self.end_headers()
69 self.wfile.write('{self.proxy_name}: {self.path}'.format(self=self).encode())
70 return HTTPTestRequestHandler
71
72
73 class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
74 protocol_version = 'HTTP/1.1'
75
76 def log_message(self, format, *args):
77 pass
78
79 def _headers(self):
80 payload = str(self.headers).encode()
81 self.send_response(200)
82 self.send_header('Content-Type', 'application/json')
83 self.send_header('Content-Length', str(len(payload)))
84 self.end_headers()
85 self.wfile.write(payload)
86
87 def _redirect(self):
88 self.send_response(int(self.path[len('/redirect_'):]))
89 self.send_header('Location', '/method')
90 self.send_header('Content-Length', '0')
91 self.end_headers()
92
93 def _method(self, method, payload=None):
94 self.send_response(200)
95 self.send_header('Content-Length', str(len(payload or '')))
96 self.send_header('Method', method)
97 self.end_headers()
98 if payload:
99 self.wfile.write(payload)
100
101 def _status(self, status):
102 payload = f'<html>{status} NOT FOUND</html>'.encode()
103 self.send_response(int(status))
104 self.send_header('Content-Type', 'text/html; charset=utf-8')
105 self.send_header('Content-Length', str(len(payload)))
106 self.end_headers()
107 self.wfile.write(payload)
108
109 def _read_data(self):
110 if 'Content-Length' in self.headers:
111 return self.rfile.read(int(self.headers['Content-Length']))
112
113 def do_POST(self):
114 data = self._read_data() + str(self.headers).encode()
115 if self.path.startswith('/redirect_'):
116 self._redirect()
117 elif self.path.startswith('/method'):
118 self._method('POST', data)
119 elif self.path.startswith('/headers'):
120 self._headers()
121 else:
122 self._status(404)
123
124 def do_HEAD(self):
125 if self.path.startswith('/redirect_'):
126 self._redirect()
127 elif self.path.startswith('/method'):
128 self._method('HEAD')
129 else:
130 self._status(404)
131
132 def do_PUT(self):
133 data = self._read_data() + str(self.headers).encode()
134 if self.path.startswith('/redirect_'):
135 self._redirect()
136 elif self.path.startswith('/method'):
137 self._method('PUT', data)
138 else:
139 self._status(404)
140
141 def do_GET(self):
142 if self.path == '/video.html':
143 payload = b'<html><video src="/vid.mp4" /></html>'
144 self.send_response(200)
145 self.send_header('Content-Type', 'text/html; charset=utf-8')
146 self.send_header('Content-Length', str(len(payload)))
147 self.end_headers()
148 self.wfile.write(payload)
149 elif self.path == '/vid.mp4':
150 payload = b'\x00\x00\x00\x00\x20\x66\x74[video]'
151 self.send_response(200)
152 self.send_header('Content-Type', 'video/mp4')
153 self.send_header('Content-Length', str(len(payload)))
154 self.end_headers()
155 self.wfile.write(payload)
156 elif self.path == '/%E4%B8%AD%E6%96%87.html':
157 payload = b'<html><video src="/vid.mp4" /></html>'
158 self.send_response(200)
159 self.send_header('Content-Type', 'text/html; charset=utf-8')
160 self.send_header('Content-Length', str(len(payload)))
161 self.end_headers()
162 self.wfile.write(payload)
163 elif self.path == '/%c7%9f':
164 payload = b'<html><video src="/vid.mp4" /></html>'
165 self.send_response(200)
166 self.send_header('Content-Type', 'text/html; charset=utf-8')
167 self.send_header('Content-Length', str(len(payload)))
168 self.end_headers()
169 self.wfile.write(payload)
170 elif self.path.startswith('/redirect_loop'):
171 self.send_response(301)
172 self.send_header('Location', self.path)
173 self.send_header('Content-Length', '0')
174 self.end_headers()
175 elif self.path == '/redirect_dotsegments':
176 self.send_response(301)
177 # redirect to /headers but with dot segments before
178 self.send_header('Location', '/a/b/./../../headers')
179 self.send_header('Content-Length', '0')
180 self.end_headers()
181 elif self.path.startswith('/redirect_'):
182 self._redirect()
183 elif self.path.startswith('/method'):
184 self._method('GET', str(self.headers).encode())
185 elif self.path.startswith('/headers'):
186 self._headers()
187 elif self.path.startswith('/308-to-headers'):
188 self.send_response(308)
189 self.send_header('Location', '/headers')
190 self.send_header('Content-Length', '0')
191 self.end_headers()
192 elif self.path == '/trailing_garbage':
193 payload = b'<html><video src="/vid.mp4" /></html>'
194 self.send_response(200)
195 self.send_header('Content-Type', 'text/html; charset=utf-8')
196 self.send_header('Content-Encoding', 'gzip')
197 buf = io.BytesIO()
198 with gzip.GzipFile(fileobj=buf, mode='wb') as f:
199 f.write(payload)
200 compressed = buf.getvalue() + b'trailing garbage'
201 self.send_header('Content-Length', str(len(compressed)))
202 self.end_headers()
203 self.wfile.write(compressed)
204 elif self.path == '/302-non-ascii-redirect':
205 new_url = f'http://127.0.0.1:{http_server_port(self.server)}/中文.html'
206 self.send_response(301)
207 self.send_header('Location', new_url)
208 self.send_header('Content-Length', '0')
209 self.end_headers()
210 elif self.path == '/content-encoding':
211 encodings = self.headers.get('ytdl-encoding', '')
212 payload = b'<html><video src="/vid.mp4" /></html>'
213 for encoding in filter(None, (e.strip() for e in encodings.split(','))):
214 if encoding == 'br' and brotli:
215 payload = brotli.compress(payload)
216 elif encoding == 'gzip':
217 buf = io.BytesIO()
218 with gzip.GzipFile(fileobj=buf, mode='wb') as f:
219 f.write(payload)
220 payload = buf.getvalue()
221 elif encoding == 'deflate':
222 payload = zlib.compress(payload)
223 elif encoding == 'unsupported':
224 payload = b'raw'
225 break
226 else:
227 self._status(415)
228 return
229 self.send_response(200)
230 self.send_header('Content-Encoding', encodings)
231 self.send_header('Content-Length', str(len(payload)))
232 self.end_headers()
233 self.wfile.write(payload)
234 elif self.path.startswith('/gen_'):
235 payload = b'<html></html>'
236 self.send_response(int(self.path[len('/gen_'):]))
237 self.send_header('Content-Type', 'text/html; charset=utf-8')
238 self.send_header('Content-Length', str(len(payload)))
239 self.end_headers()
240 self.wfile.write(payload)
241 elif self.path.startswith('/incompleteread'):
242 payload = b'<html></html>'
243 self.send_response(200)
244 self.send_header('Content-Type', 'text/html; charset=utf-8')
245 self.send_header('Content-Length', '234234')
246 self.end_headers()
247 self.wfile.write(payload)
248 self.finish()
249 elif self.path.startswith('/timeout_'):
250 time.sleep(int(self.path[len('/timeout_'):]))
251 self._headers()
252 elif self.path == '/source_address':
253 payload = str(self.client_address[0]).encode()
254 self.send_response(200)
255 self.send_header('Content-Type', 'text/html; charset=utf-8')
256 self.send_header('Content-Length', str(len(payload)))
257 self.end_headers()
258 self.wfile.write(payload)
259 self.finish()
260 else:
261 self._status(404)
262
263 def send_header(self, keyword, value):
264 """
265 Forcibly allow HTTP server to send non percent-encoded non-ASCII characters in headers.
266 This is against what is defined in RFC 3986, however we need to test we support this
267 since some sites incorrectly do this.
268 """
269 if keyword.lower() == 'connection':
270 return super().send_header(keyword, value)
271
272 if not hasattr(self, '_headers_buffer'):
273 self._headers_buffer = []
274
275 self._headers_buffer.append(f'{keyword}: {value}\r\n'.encode())
276
277
278 def validate_and_send(rh, req):
279 rh.validate(req)
280 return rh.send(req)
281
282
283 class TestRequestHandlerBase:
284 @classmethod
285 def setup_class(cls):
286 cls.http_httpd = http.server.ThreadingHTTPServer(
287 ('127.0.0.1', 0), HTTPTestRequestHandler)
288 cls.http_port = http_server_port(cls.http_httpd)
289 cls.http_server_thread = threading.Thread(target=cls.http_httpd.serve_forever)
290 # FIXME: we should probably stop the http server thread after each test
291 # See: https://github.com/yt-dlp/yt-dlp/pull/7094#discussion_r1199746041
292 cls.http_server_thread.daemon = True
293 cls.http_server_thread.start()
294
295 # HTTPS server
296 certfn = os.path.join(TEST_DIR, 'testcert.pem')
297 cls.https_httpd = http.server.ThreadingHTTPServer(
298 ('127.0.0.1', 0), HTTPTestRequestHandler)
299 sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
300 sslctx.load_cert_chain(certfn, None)
301 cls.https_httpd.socket = sslctx.wrap_socket(cls.https_httpd.socket, server_side=True)
302 cls.https_port = http_server_port(cls.https_httpd)
303 cls.https_server_thread = threading.Thread(target=cls.https_httpd.serve_forever)
304 cls.https_server_thread.daemon = True
305 cls.https_server_thread.start()
306
307
308 class TestHTTPRequestHandler(TestRequestHandlerBase):
309 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
310 def test_verify_cert(self, handler):
311 with handler() as rh:
312 with pytest.raises(CertificateVerifyError):
313 validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
314
315 with handler(verify=False) as rh:
316 r = validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
317 assert r.status == 200
318 r.close()
319
320 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
321 def test_ssl_error(self, handler):
322 # HTTPS server with too old TLS version
323 # XXX: is there a better way to test this than to create a new server?
324 https_httpd = http.server.ThreadingHTTPServer(
325 ('127.0.0.1', 0), HTTPTestRequestHandler)
326 sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
327 https_httpd.socket = sslctx.wrap_socket(https_httpd.socket, server_side=True)
328 https_port = http_server_port(https_httpd)
329 https_server_thread = threading.Thread(target=https_httpd.serve_forever)
330 https_server_thread.daemon = True
331 https_server_thread.start()
332
333 with handler(verify=False) as rh:
334 with pytest.raises(SSLError, match='sslv3 alert handshake failure') as exc_info:
335 validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers'))
336 assert not issubclass(exc_info.type, CertificateVerifyError)
337
338 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
339 def test_percent_encode(self, handler):
340 with handler() as rh:
341 # Unicode characters should be encoded with uppercase percent-encoding
342 res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/中文.html'))
343 assert res.status == 200
344 res.close()
345 # don't normalize existing percent encodings
346 res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/%c7%9f'))
347 assert res.status == 200
348 res.close()
349
350 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
351 def test_remove_dot_segments(self, handler):
352 with handler() as rh:
353 # This isn't a comprehensive test,
354 # but it should be enough to check whether the handler is removing dot segments
355 res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/a/b/./../../headers'))
356 assert res.status == 200
357 assert res.url == f'http://127.0.0.1:{self.http_port}/headers'
358 res.close()
359
360 res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_dotsegments'))
361 assert res.status == 200
362 assert res.url == f'http://127.0.0.1:{self.http_port}/headers'
363 res.close()
364
365 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
366 def test_unicode_path_redirection(self, handler):
367 with handler() as rh:
368 r = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/302-non-ascii-redirect'))
369 assert r.url == f'http://127.0.0.1:{self.http_port}/%E4%B8%AD%E6%96%87.html'
370 r.close()
371
372 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
373 def test_raise_http_error(self, handler):
374 with handler() as rh:
375 for bad_status in (400, 500, 599, 302):
376 with pytest.raises(HTTPError):
377 validate_and_send(rh, Request('http://127.0.0.1:%d/gen_%d' % (self.http_port, bad_status)))
378
379 # Should not raise an error
380 validate_and_send(rh, Request('http://127.0.0.1:%d/gen_200' % self.http_port)).close()
381
382 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
383 def test_response_url(self, handler):
384 with handler() as rh:
385 # Response url should be that of the last url in redirect chain
386 res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_301'))
387 assert res.url == f'http://127.0.0.1:{self.http_port}/method'
388 res.close()
389 res2 = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_200'))
390 assert res2.url == f'http://127.0.0.1:{self.http_port}/gen_200'
391 res2.close()
392
393 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
394 def test_redirect(self, handler):
395 with handler() as rh:
396 def do_req(redirect_status, method, assert_no_content=False):
397 data = b'testdata' if method in ('POST', 'PUT') else None
398 res = validate_and_send(
399 rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_{redirect_status}', method=method, data=data))
400
401 headers = b''
402 data_sent = b''
403 if data is not None:
404 data_sent += res.read(len(data))
405 if data_sent != data:
406 headers += data_sent
407 data_sent = b''
408
409 headers += res.read()
410
411 if assert_no_content or data is None:
412 assert b'Content-Type' not in headers
413 assert b'Content-Length' not in headers
414 else:
415 assert b'Content-Type' in headers
416 assert b'Content-Length' in headers
417
418 return data_sent.decode(), res.headers.get('method', '')
419
420 # A 303 must either use GET or HEAD for subsequent request
421 assert do_req(303, 'POST', True) == ('', 'GET')
422 assert do_req(303, 'HEAD') == ('', 'HEAD')
423
424 assert do_req(303, 'PUT', True) == ('', 'GET')
425
426 # 301 and 302 turn POST only into a GET
427 assert do_req(301, 'POST', True) == ('', 'GET')
428 assert do_req(301, 'HEAD') == ('', 'HEAD')
429 assert do_req(302, 'POST', True) == ('', 'GET')
430 assert do_req(302, 'HEAD') == ('', 'HEAD')
431
432 assert do_req(301, 'PUT') == ('testdata', 'PUT')
433 assert do_req(302, 'PUT') == ('testdata', 'PUT')
434
435 # 307 and 308 should not change method
436 for m in ('POST', 'PUT'):
437 assert do_req(307, m) == ('testdata', m)
438 assert do_req(308, m) == ('testdata', m)
439
440 assert do_req(307, 'HEAD') == ('', 'HEAD')
441 assert do_req(308, 'HEAD') == ('', 'HEAD')
442
443 # These should not redirect and instead raise an HTTPError
444 for code in (300, 304, 305, 306):
445 with pytest.raises(HTTPError):
446 do_req(code, 'GET')
447
448 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
449 def test_request_cookie_header(self, handler):
450 # We should accept a Cookie header being passed as in normal headers and handle it appropriately.
451 with handler() as rh:
452 # Specified Cookie header should be used
453 res = validate_and_send(
454 rh, Request(
455 f'http://127.0.0.1:{self.http_port}/headers',
456 headers={'Cookie': 'test=test'})).read().decode()
457 assert 'Cookie: test=test' in res
458
459 # Specified Cookie header should be removed on any redirect
460 res = validate_and_send(
461 rh, Request(
462 f'http://127.0.0.1:{self.http_port}/308-to-headers',
463 headers={'Cookie': 'test=test'})).read().decode()
464 assert 'Cookie: test=test' not in res
465
466 # Specified Cookie header should override global cookiejar for that request
467 cookiejar = YoutubeDLCookieJar()
468 cookiejar.set_cookie(http.cookiejar.Cookie(
469 version=0, name='test', value='ytdlp', port=None, port_specified=False,
470 domain='127.0.0.1', domain_specified=True, domain_initial_dot=False, path='/',
471 path_specified=True, secure=False, expires=None, discard=False, comment=None,
472 comment_url=None, rest={}))
473
474 with handler(cookiejar=cookiejar) as rh:
475 data = validate_and_send(
476 rh, Request(f'http://127.0.0.1:{self.http_port}/headers', headers={'cookie': 'test=test'})).read()
477 assert b'Cookie: test=ytdlp' not in data
478 assert b'Cookie: test=test' in data
479
480 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
481 def test_redirect_loop(self, handler):
482 with handler() as rh:
483 with pytest.raises(HTTPError, match='redirect loop'):
484 validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_loop'))
485
486 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
487 def test_incompleteread(self, handler):
488 with handler(timeout=2) as rh:
489 with pytest.raises(IncompleteRead):
490 validate_and_send(rh, Request('http://127.0.0.1:%d/incompleteread' % self.http_port)).read()
491
492 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
493 def test_cookies(self, handler):
494 cookiejar = YoutubeDLCookieJar()
495 cookiejar.set_cookie(http.cookiejar.Cookie(
496 0, 'test', 'ytdlp', None, False, '127.0.0.1', True,
497 False, '/headers', True, False, None, False, None, None, {}))
498
499 with handler(cookiejar=cookiejar) as rh:
500 data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read()
501 assert b'Cookie: test=ytdlp' in data
502
503 # Per request
504 with handler() as rh:
505 data = validate_and_send(
506 rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions={'cookiejar': cookiejar})).read()
507 assert b'Cookie: test=ytdlp' in data
508
509 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
510 def test_headers(self, handler):
511
512 with handler(headers=HTTPHeaderDict({'test1': 'test', 'test2': 'test2'})) as rh:
513 # Global Headers
514 data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read()
515 assert b'Test1: test' in data
516
517 # Per request headers, merged with global
518 data = validate_and_send(rh, Request(
519 f'http://127.0.0.1:{self.http_port}/headers', headers={'test2': 'changed', 'test3': 'test3'})).read()
520 assert b'Test1: test' in data
521 assert b'Test2: changed' in data
522 assert b'Test2: test2' not in data
523 assert b'Test3: test3' in data
524
525 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
526 def test_timeout(self, handler):
527 with handler() as rh:
528 # Default timeout is 20 seconds, so this should go through
529 validate_and_send(
530 rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_3'))
531
532 with handler(timeout=0.5) as rh:
533 with pytest.raises(TransportError):
534 validate_and_send(
535 rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1'))
536
537 # Per request timeout, should override handler timeout
538 validate_and_send(
539 rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1', extensions={'timeout': 4}))
540
541 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
542 def test_source_address(self, handler):
543 source_address = f'127.0.0.{random.randint(5, 255)}'
544 with handler(source_address=source_address) as rh:
545 data = validate_and_send(
546 rh, Request(f'http://127.0.0.1:{self.http_port}/source_address')).read().decode()
547 assert source_address == data
548
549 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
550 def test_gzip_trailing_garbage(self, handler):
551 with handler() as rh:
552 data = validate_and_send(rh, Request(f'http://localhost:{self.http_port}/trailing_garbage')).read().decode()
553 assert data == '<html><video src="/vid.mp4" /></html>'
554
555 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
556 @pytest.mark.skipif(not brotli, reason='brotli support is not installed')
557 def test_brotli(self, handler):
558 with handler() as rh:
559 res = validate_and_send(
560 rh, Request(
561 f'http://127.0.0.1:{self.http_port}/content-encoding',
562 headers={'ytdl-encoding': 'br'}))
563 assert res.headers.get('Content-Encoding') == 'br'
564 assert res.read() == b'<html><video src="/vid.mp4" /></html>'
565
566 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
567 def test_deflate(self, handler):
568 with handler() as rh:
569 res = validate_and_send(
570 rh, Request(
571 f'http://127.0.0.1:{self.http_port}/content-encoding',
572 headers={'ytdl-encoding': 'deflate'}))
573 assert res.headers.get('Content-Encoding') == 'deflate'
574 assert res.read() == b'<html><video src="/vid.mp4" /></html>'
575
576 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
577 def test_gzip(self, handler):
578 with handler() as rh:
579 res = validate_and_send(
580 rh, Request(
581 f'http://127.0.0.1:{self.http_port}/content-encoding',
582 headers={'ytdl-encoding': 'gzip'}))
583 assert res.headers.get('Content-Encoding') == 'gzip'
584 assert res.read() == b'<html><video src="/vid.mp4" /></html>'
585
586 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
587 def test_multiple_encodings(self, handler):
588 with handler() as rh:
589 for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'):
590 res = validate_and_send(
591 rh, Request(
592 f'http://127.0.0.1:{self.http_port}/content-encoding',
593 headers={'ytdl-encoding': pair}))
594 assert res.headers.get('Content-Encoding') == pair
595 assert res.read() == b'<html><video src="/vid.mp4" /></html>'
596
597 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
598 def test_unsupported_encoding(self, handler):
599 with handler() as rh:
600 res = validate_and_send(
601 rh, Request(
602 f'http://127.0.0.1:{self.http_port}/content-encoding',
603 headers={'ytdl-encoding': 'unsupported'}))
604 assert res.headers.get('Content-Encoding') == 'unsupported'
605 assert res.read() == b'raw'
606
607 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
608 def test_read(self, handler):
609 with handler() as rh:
610 res = validate_and_send(
611 rh, Request(f'http://127.0.0.1:{self.http_port}/headers'))
612 assert res.readable()
613 assert res.read(1) == b'H'
614 assert res.read(3) == b'ost'
615
616
617 class TestHTTPProxy(TestRequestHandlerBase):
618 @classmethod
619 def setup_class(cls):
620 super().setup_class()
621 # HTTP Proxy server
622 cls.proxy = http.server.ThreadingHTTPServer(
623 ('127.0.0.1', 0), _build_proxy_handler('normal'))
624 cls.proxy_port = http_server_port(cls.proxy)
625 cls.proxy_thread = threading.Thread(target=cls.proxy.serve_forever)
626 cls.proxy_thread.daemon = True
627 cls.proxy_thread.start()
628
629 # Geo proxy server
630 cls.geo_proxy = http.server.ThreadingHTTPServer(
631 ('127.0.0.1', 0), _build_proxy_handler('geo'))
632 cls.geo_port = http_server_port(cls.geo_proxy)
633 cls.geo_proxy_thread = threading.Thread(target=cls.geo_proxy.serve_forever)
634 cls.geo_proxy_thread.daemon = True
635 cls.geo_proxy_thread.start()
636
637 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
638 def test_http_proxy(self, handler):
639 http_proxy = f'http://127.0.0.1:{self.proxy_port}'
640 geo_proxy = f'http://127.0.0.1:{self.geo_port}'
641
642 # Test global http proxy
643 # Test per request http proxy
644 # Test per request http proxy disables proxy
645 url = 'http://foo.com/bar'
646
647 # Global HTTP proxy
648 with handler(proxies={'http': http_proxy}) as rh:
649 res = validate_and_send(rh, Request(url)).read().decode()
650 assert res == f'normal: {url}'
651
652 # Per request proxy overrides global
653 res = validate_and_send(rh, Request(url, proxies={'http': geo_proxy})).read().decode()
654 assert res == f'geo: {url}'
655
656 # and setting to None disables all proxies for that request
657 real_url = f'http://127.0.0.1:{self.http_port}/headers'
658 res = validate_and_send(
659 rh, Request(real_url, proxies={'http': None})).read().decode()
660 assert res != f'normal: {real_url}'
661 assert 'Accept' in res
662
663 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
664 def test_noproxy(self, handler):
665 with handler(proxies={'proxy': f'http://127.0.0.1:{self.proxy_port}'}) as rh:
666 # NO_PROXY
667 for no_proxy in (f'127.0.0.1:{self.http_port}', '127.0.0.1', 'localhost'):
668 nop_response = validate_and_send(
669 rh, Request(f'http://127.0.0.1:{self.http_port}/headers', proxies={'no': no_proxy})).read().decode(
670 'utf-8')
671 assert 'Accept' in nop_response
672
673 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
674 def test_allproxy(self, handler):
675 url = 'http://foo.com/bar'
676 with handler() as rh:
677 response = validate_and_send(rh, Request(url, proxies={'all': f'http://127.0.0.1:{self.proxy_port}'})).read().decode(
678 'utf-8')
679 assert response == f'normal: {url}'
680
681 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
682 def test_http_proxy_with_idn(self, handler):
683 with handler(proxies={
684 'http': f'http://127.0.0.1:{self.proxy_port}',
685 }) as rh:
686 url = 'http://中文.tw/'
687 response = rh.send(Request(url)).read().decode()
688 # b'xn--fiq228c' is '中文'.encode('idna')
689 assert response == 'normal: http://xn--fiq228c.tw/'
690
691
692 class TestClientCertificate:
693
694 @classmethod
695 def setup_class(cls):
696 certfn = os.path.join(TEST_DIR, 'testcert.pem')
697 cls.certdir = os.path.join(TEST_DIR, 'testdata', 'certificate')
698 cacertfn = os.path.join(cls.certdir, 'ca.crt')
699 cls.httpd = http.server.ThreadingHTTPServer(('127.0.0.1', 0), HTTPTestRequestHandler)
700 sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
701 sslctx.verify_mode = ssl.CERT_REQUIRED
702 sslctx.load_verify_locations(cafile=cacertfn)
703 sslctx.load_cert_chain(certfn, None)
704 cls.httpd.socket = sslctx.wrap_socket(cls.httpd.socket, server_side=True)
705 cls.port = http_server_port(cls.httpd)
706 cls.server_thread = threading.Thread(target=cls.httpd.serve_forever)
707 cls.server_thread.daemon = True
708 cls.server_thread.start()
709
710 def _run_test(self, handler, **handler_kwargs):
711 with handler(
712 # Disable client-side validation of unacceptable self-signed testcert.pem
713 # The test is of a check on the server side, so unaffected
714 verify=False,
715 **handler_kwargs,
716 ) as rh:
717 validate_and_send(rh, Request(f'https://127.0.0.1:{self.port}/video.html')).read().decode()
718
719 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
720 def test_certificate_combined_nopass(self, handler):
721 self._run_test(handler, client_cert={
722 'client_certificate': os.path.join(self.certdir, 'clientwithkey.crt'),
723 })
724
725 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
726 def test_certificate_nocombined_nopass(self, handler):
727 self._run_test(handler, client_cert={
728 'client_certificate': os.path.join(self.certdir, 'client.crt'),
729 'client_certificate_key': os.path.join(self.certdir, 'client.key'),
730 })
731
732 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
733 def test_certificate_combined_pass(self, handler):
734 self._run_test(handler, client_cert={
735 'client_certificate': os.path.join(self.certdir, 'clientwithencryptedkey.crt'),
736 'client_certificate_password': 'foobar',
737 })
738
739 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
740 def test_certificate_nocombined_pass(self, handler):
741 self._run_test(handler, client_cert={
742 'client_certificate': os.path.join(self.certdir, 'client.crt'),
743 'client_certificate_key': os.path.join(self.certdir, 'clientencrypted.key'),
744 'client_certificate_password': 'foobar',
745 })
746
747
748 class TestUrllibRequestHandler(TestRequestHandlerBase):
749 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
750 def test_file_urls(self, handler):
751 # See https://github.com/ytdl-org/youtube-dl/issues/8227
752 tf = tempfile.NamedTemporaryFile(delete=False)
753 tf.write(b'foobar')
754 tf.close()
755 req = Request(pathlib.Path(tf.name).as_uri())
756 with handler() as rh:
757 with pytest.raises(UnsupportedRequest):
758 rh.validate(req)
759
760 # Test that urllib never loaded FileHandler
761 with pytest.raises(TransportError):
762 rh.send(req)
763
764 with handler(enable_file_urls=True) as rh:
765 res = validate_and_send(rh, req)
766 assert res.read() == b'foobar'
767 res.close()
768
769 os.unlink(tf.name)
770
771 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
772 def test_http_error_returns_content(self, handler):
773 # urllib HTTPError will try close the underlying response if reference to the HTTPError object is lost
774 def get_response():
775 with handler() as rh:
776 # headers url
777 try:
778 validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_404'))
779 except HTTPError as e:
780 return e.response
781
782 assert get_response().read() == b'<html></html>'
783
784 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
785 def test_verify_cert_error_text(self, handler):
786 # Check the output of the error message
787 with handler() as rh:
788 with pytest.raises(
789 CertificateVerifyError,
790 match=r'\[SSL: CERTIFICATE_VERIFY_FAILED\] certificate verify failed: self.signed certificate'
791 ):
792 validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
793
794 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
795 @pytest.mark.parametrize('req,match,version_check', [
796 # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1256
797 # bpo-39603: Check implemented in 3.7.9+, 3.8.5+
798 (
799 Request('http://127.0.0.1', method='GET\n'),
800 'method can\'t contain control characters',
801 lambda v: v < (3, 7, 9) or (3, 8, 0) <= v < (3, 8, 5)
802 ),
803 # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1265
804 # bpo-38576: Check implemented in 3.7.8+, 3.8.3+
805 (
806 Request('http://127.0.0. 1', method='GET'),
807 'URL can\'t contain control characters',
808 lambda v: v < (3, 7, 8) or (3, 8, 0) <= v < (3, 8, 3)
809 ),
810 # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1288C31-L1288C50
811 (Request('http://127.0.0.1', headers={'foo\n': 'bar'}), 'Invalid header name', None),
812 ])
813 def test_httplib_validation_errors(self, handler, req, match, version_check):
814 if version_check and version_check(sys.version_info):
815 pytest.skip(f'Python {sys.version} version does not have the required validation for this test.')
816
817 with handler() as rh:
818 with pytest.raises(RequestError, match=match) as exc_info:
819 validate_and_send(rh, req)
820 assert not isinstance(exc_info.value, TransportError)
821
822
823 class TestRequestsRequestHandler(TestRequestHandlerBase):
824 @pytest.mark.parametrize('raised,expected', [
825 (lambda: requests.exceptions.ConnectTimeout(), TransportError),
826 (lambda: requests.exceptions.ReadTimeout(), TransportError),
827 (lambda: requests.exceptions.Timeout(), TransportError),
828 (lambda: requests.exceptions.ConnectionError(), TransportError),
829 (lambda: requests.exceptions.ProxyError(), ProxyError),
830 (lambda: requests.exceptions.SSLError('12[CERTIFICATE_VERIFY_FAILED]34'), CertificateVerifyError),
831 (lambda: requests.exceptions.SSLError(), SSLError),
832 (lambda: requests.exceptions.InvalidURL(), RequestError),
833 (lambda: requests.exceptions.InvalidHeader(), RequestError),
834 # catch-all: https://github.com/psf/requests/blob/main/src/requests/adapters.py#L535
835 (lambda: urllib3.exceptions.HTTPError(), TransportError),
836 (lambda: requests.exceptions.RequestException(), RequestError)
837 # (lambda: requests.exceptions.TooManyRedirects(), HTTPError) - Needs a response object
838 ])
839 @pytest.mark.parametrize('handler', ['Requests'], indirect=True)
840 def test_request_error_mapping(self, handler, monkeypatch, raised, expected):
841 with handler() as rh:
842 def mock_get_instance(*args, **kwargs):
843 class MockSession:
844 def request(self, *args, **kwargs):
845 raise raised()
846 return MockSession()
847
848 monkeypatch.setattr(rh, '_get_instance', mock_get_instance)
849
850 with pytest.raises(expected) as exc_info:
851 rh.send(Request('http://fake'))
852
853 assert exc_info.type is expected
854
855 @pytest.mark.parametrize('raised,expected,match', [
856 (lambda: urllib3.exceptions.SSLError(), SSLError, None),
857 (lambda: urllib3.exceptions.TimeoutError(), TransportError, None),
858 (lambda: urllib3.exceptions.ReadTimeoutError(None, None, None), TransportError, None),
859 (lambda: urllib3.exceptions.ProtocolError(), TransportError, None),
860 (lambda: urllib3.exceptions.DecodeError(), TransportError, None),
861 (lambda: urllib3.exceptions.HTTPError(), TransportError, None), # catch-all
862 (
863 lambda: urllib3.exceptions.ProtocolError('error', http.client.IncompleteRead(partial=b'abc', expected=4)),
864 IncompleteRead,
865 '3 bytes read, 4 more expected'
866 ),
867 (
868 lambda: urllib3.exceptions.IncompleteRead(partial=3, expected=5),
869 IncompleteRead,
870 '3 bytes read, 5 more expected'
871 ),
872 ])
873 @pytest.mark.parametrize('handler', ['Requests'], indirect=True)
874 def test_response_error_mapping(self, handler, monkeypatch, raised, expected, match):
875 from urllib3.response import HTTPResponse as Urllib3Response
876 from requests.models import Response as RequestsResponse
877 from yt_dlp.networking._requests import RequestsResponseAdapter
878 requests_res = RequestsResponse()
879 requests_res.raw = Urllib3Response(body=b'', status=200)
880 res = RequestsResponseAdapter(requests_res)
881
882 def mock_read(*args, **kwargs):
883 raise raised()
884 monkeypatch.setattr(res.fp, 'read', mock_read)
885
886 with pytest.raises(expected, match=match) as exc_info:
887 res.read()
888
889 assert exc_info.type is expected
890
891
892 def run_validation(handler, error, req, **handler_kwargs):
893 with handler(**handler_kwargs) as rh:
894 if error:
895 with pytest.raises(error):
896 rh.validate(req)
897 else:
898 rh.validate(req)
899
900
901 class TestRequestHandlerValidation:
902
903 class ValidationRH(RequestHandler):
904 def _send(self, request):
905 raise RequestError('test')
906
907 class NoCheckRH(ValidationRH):
908 _SUPPORTED_FEATURES = None
909 _SUPPORTED_PROXY_SCHEMES = None
910 _SUPPORTED_URL_SCHEMES = None
911
912 def _check_extensions(self, extensions):
913 extensions.clear()
914
915 class HTTPSupportedRH(ValidationRH):
916 _SUPPORTED_URL_SCHEMES = ('http',)
917
918 URL_SCHEME_TESTS = [
919 # scheme, expected to fail, handler kwargs
920 ('Urllib', [
921 ('http', False, {}),
922 ('https', False, {}),
923 ('data', False, {}),
924 ('ftp', False, {}),
925 ('file', UnsupportedRequest, {}),
926 ('file', False, {'enable_file_urls': True}),
927 ]),
928 ('Requests', [
929 ('http', False, {}),
930 ('https', False, {}),
931 ]),
932 (NoCheckRH, [('http', False, {})]),
933 (ValidationRH, [('http', UnsupportedRequest, {})])
934 ]
935
936 PROXY_SCHEME_TESTS = [
937 # scheme, expected to fail
938 ('Urllib', [
939 ('http', False),
940 ('https', UnsupportedRequest),
941 ('socks4', False),
942 ('socks4a', False),
943 ('socks5', False),
944 ('socks5h', False),
945 ('socks', UnsupportedRequest),
946 ]),
947 ('Requests', [
948 ('http', False),
949 ('https', False),
950 ('socks4', False),
951 ('socks4a', False),
952 ('socks5', False),
953 ('socks5h', False),
954 ]),
955 (NoCheckRH, [('http', False)]),
956 (HTTPSupportedRH, [('http', UnsupportedRequest)]),
957 ]
958
959 PROXY_KEY_TESTS = [
960 # key, expected to fail
961 ('Urllib', [
962 ('all', False),
963 ('unrelated', False),
964 ]),
965 ('Requests', [
966 ('all', False),
967 ('unrelated', False),
968 ]),
969 (NoCheckRH, [('all', False)]),
970 (HTTPSupportedRH, [('all', UnsupportedRequest)]),
971 (HTTPSupportedRH, [('no', UnsupportedRequest)]),
972 ]
973
974 EXTENSION_TESTS = [
975 ('Urllib', [
976 ({'cookiejar': 'notacookiejar'}, AssertionError),
977 ({'cookiejar': YoutubeDLCookieJar()}, False),
978 ({'cookiejar': CookieJar()}, AssertionError),
979 ({'timeout': 1}, False),
980 ({'timeout': 'notatimeout'}, AssertionError),
981 ({'unsupported': 'value'}, UnsupportedRequest),
982 ]),
983 ('Requests', [
984 ({'cookiejar': 'notacookiejar'}, AssertionError),
985 ({'cookiejar': YoutubeDLCookieJar()}, False),
986 ({'timeout': 1}, False),
987 ({'timeout': 'notatimeout'}, AssertionError),
988 ({'unsupported': 'value'}, UnsupportedRequest),
989 ]),
990 (NoCheckRH, [
991 ({'cookiejar': 'notacookiejar'}, False),
992 ({'somerandom': 'test'}, False), # but any extension is allowed through
993 ]),
994 ]
995
996 @pytest.mark.parametrize('handler,scheme,fail,handler_kwargs', [
997 (handler_tests[0], scheme, fail, handler_kwargs)
998 for handler_tests in URL_SCHEME_TESTS
999 for scheme, fail, handler_kwargs in handler_tests[1]
1000
1001 ], indirect=['handler'])
1002 def test_url_scheme(self, handler, scheme, fail, handler_kwargs):
1003 run_validation(handler, fail, Request(f'{scheme}://'), **(handler_kwargs or {}))
1004
1005 @pytest.mark.parametrize('handler,fail', [('Urllib', False), ('Requests', False)], indirect=['handler'])
1006 def test_no_proxy(self, handler, fail):
1007 run_validation(handler, fail, Request('http://', proxies={'no': '127.0.0.1,github.com'}))
1008 run_validation(handler, fail, Request('http://'), proxies={'no': '127.0.0.1,github.com'})
1009
1010 @pytest.mark.parametrize('handler,proxy_key,fail', [
1011 (handler_tests[0], proxy_key, fail)
1012 for handler_tests in PROXY_KEY_TESTS
1013 for proxy_key, fail in handler_tests[1]
1014 ], indirect=['handler'])
1015 def test_proxy_key(self, handler, proxy_key, fail):
1016 run_validation(handler, fail, Request('http://', proxies={proxy_key: 'http://example.com'}))
1017 run_validation(handler, fail, Request('http://'), proxies={proxy_key: 'http://example.com'})
1018
1019 @pytest.mark.parametrize('handler,scheme,fail', [
1020 (handler_tests[0], scheme, fail)
1021 for handler_tests in PROXY_SCHEME_TESTS
1022 for scheme, fail in handler_tests[1]
1023 ], indirect=['handler'])
1024 def test_proxy_scheme(self, handler, scheme, fail):
1025 run_validation(handler, fail, Request('http://', proxies={'http': f'{scheme}://example.com'}))
1026 run_validation(handler, fail, Request('http://'), proxies={'http': f'{scheme}://example.com'})
1027
1028 @pytest.mark.parametrize('handler', ['Urllib', HTTPSupportedRH, 'Requests'], indirect=True)
1029 def test_empty_proxy(self, handler):
1030 run_validation(handler, False, Request('http://', proxies={'http': None}))
1031 run_validation(handler, False, Request('http://'), proxies={'http': None})
1032
1033 @pytest.mark.parametrize('proxy_url', ['//example.com', 'example.com', '127.0.0.1', '/a/b/c'])
1034 @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
1035 def test_invalid_proxy_url(self, handler, proxy_url):
1036 run_validation(handler, UnsupportedRequest, Request('http://', proxies={'http': proxy_url}))
1037
1038 @pytest.mark.parametrize('handler,extensions,fail', [
1039 (handler_tests[0], extensions, fail)
1040 for handler_tests in EXTENSION_TESTS
1041 for extensions, fail in handler_tests[1]
1042 ], indirect=['handler'])
1043 def test_extension(self, handler, extensions, fail):
1044 run_validation(
1045 handler, fail, Request('http://', extensions=extensions))
1046
1047 def test_invalid_request_type(self):
1048 rh = self.ValidationRH(logger=FakeLogger())
1049 for method in (rh.validate, rh.send):
1050 with pytest.raises(TypeError, match='Expected an instance of Request'):
1051 method('not a request')
1052
1053
1054 class FakeResponse(Response):
1055 def __init__(self, request):
1056 # XXX: we could make request part of standard response interface
1057 self.request = request
1058 super().__init__(fp=io.BytesIO(b''), headers={}, url=request.url)
1059
1060
1061 class FakeRH(RequestHandler):
1062
1063 def _validate(self, request):
1064 return
1065
1066 def _send(self, request: Request):
1067 if request.url.startswith('ssl://'):
1068 raise SSLError(request.url[len('ssl://'):])
1069 return FakeResponse(request)
1070
1071
1072 class FakeRHYDL(FakeYDL):
1073 def __init__(self, *args, **kwargs):
1074 super().__init__(*args, **kwargs)
1075 self._request_director = self.build_request_director([FakeRH])
1076
1077
1078 class TestRequestDirector:
1079
1080 def test_handler_operations(self):
1081 director = RequestDirector(logger=FakeLogger())
1082 handler = FakeRH(logger=FakeLogger())
1083 director.add_handler(handler)
1084 assert director.handlers.get(FakeRH.RH_KEY) is handler
1085
1086 # Handler should overwrite
1087 handler2 = FakeRH(logger=FakeLogger())
1088 director.add_handler(handler2)
1089 assert director.handlers.get(FakeRH.RH_KEY) is not handler
1090 assert director.handlers.get(FakeRH.RH_KEY) is handler2
1091 assert len(director.handlers) == 1
1092
1093 class AnotherFakeRH(FakeRH):
1094 pass
1095 director.add_handler(AnotherFakeRH(logger=FakeLogger()))
1096 assert len(director.handlers) == 2
1097 assert director.handlers.get(AnotherFakeRH.RH_KEY).RH_KEY == AnotherFakeRH.RH_KEY
1098
1099 director.handlers.pop(FakeRH.RH_KEY, None)
1100 assert director.handlers.get(FakeRH.RH_KEY) is None
1101 assert len(director.handlers) == 1
1102
1103 # RequestErrors should passthrough
1104 with pytest.raises(SSLError):
1105 director.send(Request('ssl://something'))
1106
1107 def test_send(self):
1108 director = RequestDirector(logger=FakeLogger())
1109 with pytest.raises(RequestError):
1110 director.send(Request('any://'))
1111 director.add_handler(FakeRH(logger=FakeLogger()))
1112 assert isinstance(director.send(Request('http://')), FakeResponse)
1113
1114 def test_unsupported_handlers(self):
1115 class SupportedRH(RequestHandler):
1116 _SUPPORTED_URL_SCHEMES = ['http']
1117
1118 def _send(self, request: Request):
1119 return Response(fp=io.BytesIO(b'supported'), headers={}, url=request.url)
1120
1121 director = RequestDirector(logger=FakeLogger())
1122 director.add_handler(SupportedRH(logger=FakeLogger()))
1123 director.add_handler(FakeRH(logger=FakeLogger()))
1124
1125 # First should take preference
1126 assert director.send(Request('http://')).read() == b'supported'
1127 assert director.send(Request('any://')).read() == b''
1128
1129 director.handlers.pop(FakeRH.RH_KEY)
1130 with pytest.raises(NoSupportingHandlers):
1131 director.send(Request('any://'))
1132
1133 def test_unexpected_error(self):
1134 director = RequestDirector(logger=FakeLogger())
1135
1136 class UnexpectedRH(FakeRH):
1137 def _send(self, request: Request):
1138 raise TypeError('something')
1139
1140 director.add_handler(UnexpectedRH(logger=FakeLogger))
1141 with pytest.raises(NoSupportingHandlers, match=r'1 unexpected error'):
1142 director.send(Request('any://'))
1143
1144 director.handlers.clear()
1145 assert len(director.handlers) == 0
1146
1147 # Should not be fatal
1148 director.add_handler(FakeRH(logger=FakeLogger()))
1149 director.add_handler(UnexpectedRH(logger=FakeLogger))
1150 assert director.send(Request('any://'))
1151
1152 def test_preference(self):
1153 director = RequestDirector(logger=FakeLogger())
1154 director.add_handler(FakeRH(logger=FakeLogger()))
1155
1156 class SomeRH(RequestHandler):
1157 _SUPPORTED_URL_SCHEMES = ['http']
1158
1159 def _send(self, request: Request):
1160 return Response(fp=io.BytesIO(b'supported'), headers={}, url=request.url)
1161
1162 def some_preference(rh, request):
1163 return (0 if not isinstance(rh, SomeRH)
1164 else 100 if 'prefer' in request.headers
1165 else -1)
1166
1167 director.add_handler(SomeRH(logger=FakeLogger()))
1168 director.preferences.add(some_preference)
1169
1170 assert director.send(Request('http://')).read() == b''
1171 assert director.send(Request('http://', headers={'prefer': '1'})).read() == b'supported'
1172
1173
1174 # XXX: do we want to move this to test_YoutubeDL.py?
1175 class TestYoutubeDLNetworking:
1176
1177 @staticmethod
1178 def build_handler(ydl, handler: RequestHandler = FakeRH):
1179 return ydl.build_request_director([handler]).handlers.get(handler.RH_KEY)
1180
1181 def test_compat_opener(self):
1182 with FakeYDL() as ydl:
1183 with warnings.catch_warnings():
1184 warnings.simplefilter('ignore', category=DeprecationWarning)
1185 assert isinstance(ydl._opener, urllib.request.OpenerDirector)
1186
1187 @pytest.mark.parametrize('proxy,expected', [
1188 ('http://127.0.0.1:8080', {'all': 'http://127.0.0.1:8080'}),
1189 ('', {'all': '__noproxy__'}),
1190 (None, {'http': 'http://127.0.0.1:8081', 'https': 'http://127.0.0.1:8081'}) # env, set https
1191 ])
1192 def test_proxy(self, proxy, expected):
1193 old_http_proxy = os.environ.get('HTTP_PROXY')
1194 try:
1195 os.environ['HTTP_PROXY'] = 'http://127.0.0.1:8081' # ensure that provided proxies override env
1196 with FakeYDL({'proxy': proxy}) as ydl:
1197 assert ydl.proxies == expected
1198 finally:
1199 if old_http_proxy:
1200 os.environ['HTTP_PROXY'] = old_http_proxy
1201
1202 def test_compat_request(self):
1203 with FakeRHYDL() as ydl:
1204 assert ydl.urlopen('test://')
1205 urllib_req = urllib.request.Request('http://foo.bar', data=b'test', method='PUT', headers={'X-Test': '1'})
1206 urllib_req.add_unredirected_header('Cookie', 'bob=bob')
1207 urllib_req.timeout = 2
1208 with warnings.catch_warnings():
1209 warnings.simplefilter('ignore', category=DeprecationWarning)
1210 req = ydl.urlopen(urllib_req).request
1211 assert req.url == urllib_req.get_full_url()
1212 assert req.data == urllib_req.data
1213 assert req.method == urllib_req.get_method()
1214 assert 'X-Test' in req.headers
1215 assert 'Cookie' in req.headers
1216 assert req.extensions.get('timeout') == 2
1217
1218 with pytest.raises(AssertionError):
1219 ydl.urlopen(None)
1220
1221 def test_extract_basic_auth(self):
1222 with FakeRHYDL() as ydl:
1223 res = ydl.urlopen(Request('http://user:pass@foo.bar'))
1224 assert res.request.headers['Authorization'] == 'Basic dXNlcjpwYXNz'
1225
1226 def test_sanitize_url(self):
1227 with FakeRHYDL() as ydl:
1228 res = ydl.urlopen(Request('httpss://foo.bar'))
1229 assert res.request.url == 'https://foo.bar'
1230
1231 def test_file_urls_error(self):
1232 # use urllib handler
1233 with FakeYDL() as ydl:
1234 with pytest.raises(RequestError, match=r'file:// URLs are disabled by default'):
1235 ydl.urlopen('file://')
1236
1237 def test_legacy_server_connect_error(self):
1238 with FakeRHYDL() as ydl:
1239 for error in ('UNSAFE_LEGACY_RENEGOTIATION_DISABLED', 'SSLV3_ALERT_HANDSHAKE_FAILURE'):
1240 with pytest.raises(RequestError, match=r'Try using --legacy-server-connect'):
1241 ydl.urlopen(f'ssl://{error}')
1242
1243 with pytest.raises(SSLError, match='testerror'):
1244 ydl.urlopen('ssl://testerror')
1245
1246 @pytest.mark.parametrize('proxy_key,proxy_url,expected', [
1247 ('http', '__noproxy__', None),
1248 ('no', '127.0.0.1,foo.bar', '127.0.0.1,foo.bar'),
1249 ('https', 'example.com', 'http://example.com'),
1250 ('https', '//example.com', 'http://example.com'),
1251 ('https', 'socks5://example.com', 'socks5h://example.com'),
1252 ('http', 'socks://example.com', 'socks4://example.com'),
1253 ('http', 'socks4://example.com', 'socks4://example.com'),
1254 ('unrelated', '/bad/proxy', '/bad/proxy'), # clean_proxies should ignore bad proxies
1255 ])
1256 def test_clean_proxy(self, proxy_key, proxy_url, expected):
1257 # proxies should be cleaned in urlopen()
1258 with FakeRHYDL() as ydl:
1259 req = ydl.urlopen(Request('test://', proxies={proxy_key: proxy_url})).request
1260 assert req.proxies[proxy_key] == expected
1261
1262 # and should also be cleaned when building the handler
1263 env_key = f'{proxy_key.upper()}_PROXY'
1264 old_env_proxy = os.environ.get(env_key)
1265 try:
1266 os.environ[env_key] = proxy_url # ensure that provided proxies override env
1267 with FakeYDL() as ydl:
1268 rh = self.build_handler(ydl)
1269 assert rh.proxies[proxy_key] == expected
1270 finally:
1271 if old_env_proxy:
1272 os.environ[env_key] = old_env_proxy
1273
1274 def test_clean_proxy_header(self):
1275 with FakeRHYDL() as ydl:
1276 req = ydl.urlopen(Request('test://', headers={'ytdl-request-proxy': '//foo.bar'})).request
1277 assert 'ytdl-request-proxy' not in req.headers
1278 assert req.proxies == {'all': 'http://foo.bar'}
1279
1280 with FakeYDL({'http_headers': {'ytdl-request-proxy': '//foo.bar'}}) as ydl:
1281 rh = self.build_handler(ydl)
1282 assert 'ytdl-request-proxy' not in rh.headers
1283 assert rh.proxies == {'all': 'http://foo.bar'}
1284
1285 def test_clean_header(self):
1286 with FakeRHYDL() as ydl:
1287 res = ydl.urlopen(Request('test://', headers={'Youtubedl-no-compression': True}))
1288 assert 'Youtubedl-no-compression' not in res.request.headers
1289 assert res.request.headers.get('Accept-Encoding') == 'identity'
1290
1291 with FakeYDL({'http_headers': {'Youtubedl-no-compression': True}}) as ydl:
1292 rh = self.build_handler(ydl)
1293 assert 'Youtubedl-no-compression' not in rh.headers
1294 assert rh.headers.get('Accept-Encoding') == 'identity'
1295
1296 def test_build_handler_params(self):
1297 with FakeYDL({
1298 'http_headers': {'test': 'testtest'},
1299 'socket_timeout': 2,
1300 'proxy': 'http://127.0.0.1:8080',
1301 'source_address': '127.0.0.45',
1302 'debug_printtraffic': True,
1303 'compat_opts': ['no-certifi'],
1304 'nocheckcertificate': True,
1305 'legacyserverconnect': True,
1306 }) as ydl:
1307 rh = self.build_handler(ydl)
1308 assert rh.headers.get('test') == 'testtest'
1309 assert 'Accept' in rh.headers # ensure std_headers are still there
1310 assert rh.timeout == 2
1311 assert rh.proxies.get('all') == 'http://127.0.0.1:8080'
1312 assert rh.source_address == '127.0.0.45'
1313 assert rh.verbose is True
1314 assert rh.prefer_system_certs is True
1315 assert rh.verify is False
1316 assert rh.legacy_ssl_support is True
1317
1318 @pytest.mark.parametrize('ydl_params', [
1319 {'client_certificate': 'fakecert.crt'},
1320 {'client_certificate': 'fakecert.crt', 'client_certificate_key': 'fakekey.key'},
1321 {'client_certificate': 'fakecert.crt', 'client_certificate_key': 'fakekey.key', 'client_certificate_password': 'foobar'},
1322 {'client_certificate_key': 'fakekey.key', 'client_certificate_password': 'foobar'},
1323 ])
1324 def test_client_certificate(self, ydl_params):
1325 with FakeYDL(ydl_params) as ydl:
1326 rh = self.build_handler(ydl)
1327 assert rh._client_cert == ydl_params # XXX: Too bound to implementation
1328
1329 def test_urllib_file_urls(self):
1330 with FakeYDL({'enable_file_urls': False}) as ydl:
1331 rh = self.build_handler(ydl, UrllibRH)
1332 assert rh.enable_file_urls is False
1333
1334 with FakeYDL({'enable_file_urls': True}) as ydl:
1335 rh = self.build_handler(ydl, UrllibRH)
1336 assert rh.enable_file_urls is True
1337
1338 def test_compat_opt_prefer_urllib(self):
1339 # This assumes urllib only has a preference when this compat opt is given
1340 with FakeYDL({'compat_opts': ['prefer-legacy-http-handler']}) as ydl:
1341 director = ydl.build_request_director([UrllibRH])
1342 assert len(director.preferences) == 1
1343 assert director.preferences.pop()(UrllibRH, None)
1344
1345
1346 class TestRequest:
1347
1348 def test_query(self):
1349 req = Request('http://example.com?q=something', query={'v': 'xyz'})
1350 assert req.url == 'http://example.com?q=something&v=xyz'
1351
1352 req.update(query={'v': '123'})
1353 assert req.url == 'http://example.com?q=something&v=123'
1354 req.update(url='http://example.com', query={'v': 'xyz'})
1355 assert req.url == 'http://example.com?v=xyz'
1356
1357 def test_method(self):
1358 req = Request('http://example.com')
1359 assert req.method == 'GET'
1360 req.data = b'test'
1361 assert req.method == 'POST'
1362 req.data = None
1363 assert req.method == 'GET'
1364 req.data = b'test2'
1365 req.method = 'PUT'
1366 assert req.method == 'PUT'
1367 req.data = None
1368 assert req.method == 'PUT'
1369 with pytest.raises(TypeError):
1370 req.method = 1
1371
1372 def test_request_helpers(self):
1373 assert HEADRequest('http://example.com').method == 'HEAD'
1374 assert PUTRequest('http://example.com').method == 'PUT'
1375
1376 def test_headers(self):
1377 req = Request('http://example.com', headers={'tesT': 'test'})
1378 assert req.headers == HTTPHeaderDict({'test': 'test'})
1379 req.update(headers={'teSt2': 'test2'})
1380 assert req.headers == HTTPHeaderDict({'test': 'test', 'test2': 'test2'})
1381
1382 req.headers = new_headers = HTTPHeaderDict({'test': 'test'})
1383 assert req.headers == HTTPHeaderDict({'test': 'test'})
1384 assert req.headers is new_headers
1385
1386 # test converts dict to case insensitive dict
1387 req.headers = new_headers = {'test2': 'test2'}
1388 assert isinstance(req.headers, HTTPHeaderDict)
1389 assert req.headers is not new_headers
1390
1391 with pytest.raises(TypeError):
1392 req.headers = None
1393
1394 def test_data_type(self):
1395 req = Request('http://example.com')
1396 assert req.data is None
1397 # test bytes is allowed
1398 req.data = b'test'
1399 assert req.data == b'test'
1400 # test iterable of bytes is allowed
1401 i = [b'test', b'test2']
1402 req.data = i
1403 assert req.data == i
1404
1405 # test file-like object is allowed
1406 f = io.BytesIO(b'test')
1407 req.data = f
1408 assert req.data == f
1409
1410 # common mistake: test str not allowed
1411 with pytest.raises(TypeError):
1412 req.data = 'test'
1413 assert req.data != 'test'
1414
1415 # common mistake: test dict is not allowed
1416 with pytest.raises(TypeError):
1417 req.data = {'test': 'test'}
1418 assert req.data != {'test': 'test'}
1419
1420 def test_content_length_header(self):
1421 req = Request('http://example.com', headers={'Content-Length': '0'}, data=b'')
1422 assert req.headers.get('Content-Length') == '0'
1423
1424 req.data = b'test'
1425 assert 'Content-Length' not in req.headers
1426
1427 req = Request('http://example.com', headers={'Content-Length': '10'})
1428 assert 'Content-Length' not in req.headers
1429
1430 def test_content_type_header(self):
1431 req = Request('http://example.com', headers={'Content-Type': 'test'}, data=b'test')
1432 assert req.headers.get('Content-Type') == 'test'
1433 req.data = b'test2'
1434 assert req.headers.get('Content-Type') == 'test'
1435 req.data = None
1436 assert 'Content-Type' not in req.headers
1437 req.data = b'test3'
1438 assert req.headers.get('Content-Type') == 'application/x-www-form-urlencoded'
1439
1440 def test_update_req(self):
1441 req = Request('http://example.com')
1442 assert req.data is None
1443 assert req.method == 'GET'
1444 assert 'Content-Type' not in req.headers
1445 # Test that zero-byte payloads will be sent
1446 req.update(data=b'')
1447 assert req.data == b''
1448 assert req.method == 'POST'
1449 assert req.headers.get('Content-Type') == 'application/x-www-form-urlencoded'
1450
1451 def test_proxies(self):
1452 req = Request(url='http://example.com', proxies={'http': 'http://127.0.0.1:8080'})
1453 assert req.proxies == {'http': 'http://127.0.0.1:8080'}
1454
1455 def test_extensions(self):
1456 req = Request(url='http://example.com', extensions={'timeout': 2})
1457 assert req.extensions == {'timeout': 2}
1458
1459 def test_copy(self):
1460 req = Request(
1461 url='http://example.com',
1462 extensions={'cookiejar': CookieJar()},
1463 headers={'Accept-Encoding': 'br'},
1464 proxies={'http': 'http://127.0.0.1'},
1465 data=[b'123']
1466 )
1467 req_copy = req.copy()
1468 assert req_copy is not req
1469 assert req_copy.url == req.url
1470 assert req_copy.headers == req.headers
1471 assert req_copy.headers is not req.headers
1472 assert req_copy.proxies == req.proxies
1473 assert req_copy.proxies is not req.proxies
1474
1475 # Data is not able to be copied
1476 assert req_copy.data == req.data
1477 assert req_copy.data is req.data
1478
1479 # Shallow copy extensions
1480 assert req_copy.extensions is not req.extensions
1481 assert req_copy.extensions['cookiejar'] == req.extensions['cookiejar']
1482
1483 # Subclasses are copied by default
1484 class AnotherRequest(Request):
1485 pass
1486
1487 req = AnotherRequest(url='http://127.0.0.1')
1488 assert isinstance(req.copy(), AnotherRequest)
1489
1490 def test_url(self):
1491 req = Request(url='https://фtest.example.com/ some spaceв?ä=c',)
1492 assert req.url == 'https://xn--test-z6d.example.com/%20some%20space%D0%B2?%C3%A4=c'
1493
1494 assert Request(url='//example.com').url == 'http://example.com'
1495
1496 with pytest.raises(TypeError):
1497 Request(url='https://').url = None
1498
1499
1500 class TestResponse:
1501
1502 @pytest.mark.parametrize('reason,status,expected', [
1503 ('custom', 200, 'custom'),
1504 (None, 404, 'Not Found'), # fallback status
1505 ('', 403, 'Forbidden'),
1506 (None, 999, None)
1507 ])
1508 def test_reason(self, reason, status, expected):
1509 res = Response(io.BytesIO(b''), url='test://', headers={}, status=status, reason=reason)
1510 assert res.reason == expected
1511
1512 def test_headers(self):
1513 headers = Message()
1514 headers.add_header('Test', 'test')
1515 headers.add_header('Test', 'test2')
1516 headers.add_header('content-encoding', 'br')
1517 res = Response(io.BytesIO(b''), headers=headers, url='test://')
1518 assert res.headers.get_all('test') == ['test', 'test2']
1519 assert 'Content-Encoding' in res.headers
1520
1521 def test_get_header(self):
1522 headers = Message()
1523 headers.add_header('Set-Cookie', 'cookie1')
1524 headers.add_header('Set-cookie', 'cookie2')
1525 headers.add_header('Test', 'test')
1526 headers.add_header('Test', 'test2')
1527 res = Response(io.BytesIO(b''), headers=headers, url='test://')
1528 assert res.get_header('test') == 'test, test2'
1529 assert res.get_header('set-Cookie') == 'cookie1'
1530 assert res.get_header('notexist', 'default') == 'default'
1531
1532 def test_compat(self):
1533 res = Response(io.BytesIO(b''), url='test://', status=404, headers={'test': 'test'})
1534 with warnings.catch_warnings():
1535 warnings.simplefilter('ignore', category=DeprecationWarning)
1536 assert res.code == res.getcode() == res.status
1537 assert res.geturl() == res.url
1538 assert res.info() is res.headers
1539 assert res.getheader('test') == res.get_header('test')