]> jfr.im git - yt-dlp.git/blame - test/test_networking.py
[networking] Add strict Request extension checking (#7604)
[yt-dlp.git] / test / test_networking.py
CommitLineData
cc52de43 1#!/usr/bin/env python3
54007a45 2
83fda3c0
PH
3# Allow direct execution
4import os
5import sys
227bf1a3 6
7import pytest
f8271158 8
83fda3c0
PH
9sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
10
227bf1a3 11import functools
08916a49 12import gzip
227bf1a3 13import http.client
08916a49 14import http.cookiejar
54007a45 15import http.server
227bf1a3 16import inspect
08916a49 17import io
18import pathlib
227bf1a3 19import random
f8271158 20import ssl
08916a49 21import tempfile
f8271158 22import threading
227bf1a3 23import time
08916a49 24import urllib.error
ac668111 25import urllib.request
227bf1a3 26import warnings
daafbf49 27import zlib
227bf1a3 28from email.message import Message
29from http.cookiejar import CookieJar
f8271158 30
227bf1a3 31from test.helper import FakeYDL, http_server_port
daafbf49 32from yt_dlp.dependencies import brotli
227bf1a3 33from yt_dlp.networking import (
34 HEADRequest,
35 PUTRequest,
36 Request,
37 RequestDirector,
38 RequestHandler,
39 Response,
40)
41from yt_dlp.networking._urllib import UrllibRH
42from yt_dlp.networking.common import _REQUEST_HANDLERS
43from yt_dlp.networking.exceptions import (
44 CertificateVerifyError,
45 HTTPError,
46 IncompleteRead,
47 NoSupportingHandlers,
48 RequestError,
49 SSLError,
50 TransportError,
51 UnsupportedRequest,
52)
53from yt_dlp.utils._utils import _YDLLogger as FakeLogger
54from yt_dlp.utils.networking import HTTPHeaderDict
83fda3c0
PH
55
56TEST_DIR = os.path.dirname(os.path.abspath(__file__))
57
03d8d4df 58
227bf1a3 59def _build_proxy_handler(name):
60 class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
61 proxy_name = name
62
63 def log_message(self, format, *args):
64 pass
65
66 def do_GET(self):
67 self.send_response(200)
68 self.send_header('Content-Type', 'text/plain; charset=utf-8')
69 self.end_headers()
70 self.wfile.write('{self.proxy_name}: {self.path}'.format(self=self).encode())
71 return HTTPTestRequestHandler
72
73
ac668111 74class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
08916a49 75 protocol_version = 'HTTP/1.1'
76
83fda3c0
PH
77 def log_message(self, format, *args):
78 pass
79
08916a49 80 def _headers(self):
227bf1a3 81 payload = str(self.headers).encode()
08916a49 82 self.send_response(200)
83 self.send_header('Content-Type', 'application/json')
84 self.send_header('Content-Length', str(len(payload)))
85 self.end_headers()
86 self.wfile.write(payload)
87
88 def _redirect(self):
89 self.send_response(int(self.path[len('/redirect_'):]))
90 self.send_header('Location', '/method')
91 self.send_header('Content-Length', '0')
92 self.end_headers()
93
94 def _method(self, method, payload=None):
95 self.send_response(200)
96 self.send_header('Content-Length', str(len(payload or '')))
97 self.send_header('Method', method)
98 self.end_headers()
99 if payload:
100 self.wfile.write(payload)
101
102 def _status(self, status):
103 payload = f'<html>{status} NOT FOUND</html>'.encode()
104 self.send_response(int(status))
105 self.send_header('Content-Type', 'text/html; charset=utf-8')
106 self.send_header('Content-Length', str(len(payload)))
107 self.end_headers()
108 self.wfile.write(payload)
109
110 def _read_data(self):
111 if 'Content-Length' in self.headers:
112 return self.rfile.read(int(self.headers['Content-Length']))
113
114 def do_POST(self):
227bf1a3 115 data = self._read_data() + str(self.headers).encode()
08916a49 116 if self.path.startswith('/redirect_'):
117 self._redirect()
118 elif self.path.startswith('/method'):
119 self._method('POST', data)
120 elif self.path.startswith('/headers'):
121 self._headers()
122 else:
123 self._status(404)
124
125 def do_HEAD(self):
126 if self.path.startswith('/redirect_'):
127 self._redirect()
128 elif self.path.startswith('/method'):
129 self._method('HEAD')
130 else:
131 self._status(404)
132
133 def do_PUT(self):
227bf1a3 134 data = self._read_data() + str(self.headers).encode()
08916a49 135 if self.path.startswith('/redirect_'):
136 self._redirect()
137 elif self.path.startswith('/method'):
138 self._method('PUT', data)
139 else:
140 self._status(404)
141
83fda3c0
PH
142 def do_GET(self):
143 if self.path == '/video.html':
08916a49 144 payload = b'<html><video src="/vid.mp4" /></html>'
83fda3c0
PH
145 self.send_response(200)
146 self.send_header('Content-Type', 'text/html; charset=utf-8')
227bf1a3 147 self.send_header('Content-Length', str(len(payload)))
83fda3c0 148 self.end_headers()
08916a49 149 self.wfile.write(payload)
83fda3c0 150 elif self.path == '/vid.mp4':
08916a49 151 payload = b'\x00\x00\x00\x00\x20\x66\x74[video]'
83fda3c0
PH
152 self.send_response(200)
153 self.send_header('Content-Type', 'video/mp4')
08916a49 154 self.send_header('Content-Length', str(len(payload)))
83fda3c0 155 self.end_headers()
08916a49 156 self.wfile.write(payload)
8c32e5dc 157 elif self.path == '/%E4%B8%AD%E6%96%87.html':
08916a49 158 payload = b'<html><video src="/vid.mp4" /></html>'
8c32e5dc
YCH
159 self.send_response(200)
160 self.send_header('Content-Type', 'text/html; charset=utf-8')
08916a49 161 self.send_header('Content-Length', str(len(payload)))
162 self.end_headers()
163 self.wfile.write(payload)
164 elif self.path == '/%c7%9f':
165 payload = b'<html><video src="/vid.mp4" /></html>'
166 self.send_response(200)
167 self.send_header('Content-Type', 'text/html; charset=utf-8')
168 self.send_header('Content-Length', str(len(payload)))
169 self.end_headers()
170 self.wfile.write(payload)
227bf1a3 171 elif self.path.startswith('/redirect_loop'):
172 self.send_response(301)
173 self.send_header('Location', self.path)
174 self.send_header('Content-Length', '0')
175 self.end_headers()
08916a49 176 elif self.path.startswith('/redirect_'):
177 self._redirect()
178 elif self.path.startswith('/method'):
227bf1a3 179 self._method('GET', str(self.headers).encode())
08916a49 180 elif self.path.startswith('/headers'):
181 self._headers()
f8b4bcc0 182 elif self.path.startswith('/308-to-headers'):
183 self.send_response(308)
184 self.send_header('Location', '/headers')
185 self.send_header('Content-Length', '0')
186 self.end_headers()
08916a49 187 elif self.path == '/trailing_garbage':
188 payload = b'<html><video src="/vid.mp4" /></html>'
189 self.send_response(200)
190 self.send_header('Content-Type', 'text/html; charset=utf-8')
191 self.send_header('Content-Encoding', 'gzip')
192 buf = io.BytesIO()
193 with gzip.GzipFile(fileobj=buf, mode='wb') as f:
194 f.write(payload)
195 compressed = buf.getvalue() + b'trailing garbage'
196 self.send_header('Content-Length', str(len(compressed)))
197 self.end_headers()
198 self.wfile.write(compressed)
199 elif self.path == '/302-non-ascii-redirect':
200 new_url = f'http://127.0.0.1:{http_server_port(self.server)}/中文.html'
201 self.send_response(301)
202 self.send_header('Location', new_url)
203 self.send_header('Content-Length', '0')
8c32e5dc 204 self.end_headers()
daafbf49 205 elif self.path == '/content-encoding':
206 encodings = self.headers.get('ytdl-encoding', '')
207 payload = b'<html><video src="/vid.mp4" /></html>'
208 for encoding in filter(None, (e.strip() for e in encodings.split(','))):
209 if encoding == 'br' and brotli:
210 payload = brotli.compress(payload)
211 elif encoding == 'gzip':
212 buf = io.BytesIO()
213 with gzip.GzipFile(fileobj=buf, mode='wb') as f:
214 f.write(payload)
215 payload = buf.getvalue()
216 elif encoding == 'deflate':
217 payload = zlib.compress(payload)
218 elif encoding == 'unsupported':
219 payload = b'raw'
220 break
221 else:
222 self._status(415)
223 return
224 self.send_response(200)
225 self.send_header('Content-Encoding', encodings)
226 self.send_header('Content-Length', str(len(payload)))
227 self.end_headers()
228 self.wfile.write(payload)
227bf1a3 229 elif self.path.startswith('/gen_'):
230 payload = b'<html></html>'
231 self.send_response(int(self.path[len('/gen_'):]))
232 self.send_header('Content-Type', 'text/html; charset=utf-8')
233 self.send_header('Content-Length', str(len(payload)))
234 self.end_headers()
235 self.wfile.write(payload)
236 elif self.path.startswith('/incompleteread'):
237 payload = b'<html></html>'
238 self.send_response(200)
239 self.send_header('Content-Type', 'text/html; charset=utf-8')
240 self.send_header('Content-Length', '234234')
241 self.end_headers()
242 self.wfile.write(payload)
243 self.finish()
244 elif self.path.startswith('/timeout_'):
245 time.sleep(int(self.path[len('/timeout_'):]))
246 self._headers()
247 elif self.path == '/source_address':
248 payload = str(self.client_address[0]).encode()
249 self.send_response(200)
250 self.send_header('Content-Type', 'text/html; charset=utf-8')
251 self.send_header('Content-Length', str(len(payload)))
252 self.end_headers()
253 self.wfile.write(payload)
254 self.finish()
83fda3c0 255 else:
08916a49 256 self._status(404)
257
258 def send_header(self, keyword, value):
259 """
260 Forcibly allow HTTP server to send non percent-encoded non-ASCII characters in headers.
261 This is against what is defined in RFC 3986, however we need to test we support this
262 since some sites incorrectly do this.
263 """
264 if keyword.lower() == 'connection':
265 return super().send_header(keyword, value)
266
267 if not hasattr(self, '_headers_buffer'):
268 self._headers_buffer = []
269
270 self._headers_buffer.append(f'{keyword}: {value}\r\n'.encode())
83fda3c0
PH
271
272
227bf1a3 273def validate_and_send(rh, req):
274 rh.validate(req)
275 return rh.send(req)
83fda3c0 276
83fda3c0 277
227bf1a3 278class TestRequestHandlerBase:
279 @classmethod
280 def setup_class(cls):
281 cls.http_httpd = http.server.ThreadingHTTPServer(
f19eae42 282 ('127.0.0.1', 0), HTTPTestRequestHandler)
227bf1a3 283 cls.http_port = http_server_port(cls.http_httpd)
284 cls.http_server_thread = threading.Thread(target=cls.http_httpd.serve_forever)
08916a49 285 # FIXME: we should probably stop the http server thread after each test
286 # See: https://github.com/yt-dlp/yt-dlp/pull/7094#discussion_r1199746041
227bf1a3 287 cls.http_server_thread.daemon = True
288 cls.http_server_thread.start()
08916a49 289
290 # HTTPS server
83fda3c0 291 certfn = os.path.join(TEST_DIR, 'testcert.pem')
227bf1a3 292 cls.https_httpd = http.server.ThreadingHTTPServer(
f19eae42 293 ('127.0.0.1', 0), HTTPTestRequestHandler)
b6dc37fe 294 sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
295 sslctx.load_cert_chain(certfn, None)
227bf1a3 296 cls.https_httpd.socket = sslctx.wrap_socket(cls.https_httpd.socket, server_side=True)
297 cls.https_port = http_server_port(cls.https_httpd)
298 cls.https_server_thread = threading.Thread(target=cls.https_httpd.serve_forever)
299 cls.https_server_thread.daemon = True
300 cls.https_server_thread.start()
301
302
303@pytest.fixture
304def handler(request):
305 RH_KEY = request.param
306 if inspect.isclass(RH_KEY) and issubclass(RH_KEY, RequestHandler):
307 handler = RH_KEY
308 elif RH_KEY in _REQUEST_HANDLERS:
309 handler = _REQUEST_HANDLERS[RH_KEY]
310 else:
311 pytest.skip(f'{RH_KEY} request handler is not available')
312
313 return functools.partial(handler, logger=FakeLogger)
314
315
316class TestHTTPRequestHandler(TestRequestHandlerBase):
317 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
318 def test_verify_cert(self, handler):
319 with handler() as rh:
320 with pytest.raises(CertificateVerifyError):
321 validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
322
323 with handler(verify=False) as rh:
324 r = validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
325 assert r.status == 200
08916a49 326 r.close()
327
227bf1a3 328 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
329 def test_ssl_error(self, handler):
330 # HTTPS server with too old TLS version
331 # XXX: is there a better way to test this than to create a new server?
332 https_httpd = http.server.ThreadingHTTPServer(
333 ('127.0.0.1', 0), HTTPTestRequestHandler)
334 sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
335 https_httpd.socket = sslctx.wrap_socket(https_httpd.socket, server_side=True)
336 https_port = http_server_port(https_httpd)
337 https_server_thread = threading.Thread(target=https_httpd.serve_forever)
338 https_server_thread.daemon = True
339 https_server_thread.start()
340
341 with handler(verify=False) as rh:
342 with pytest.raises(SSLError, match='sslv3 alert handshake failure') as exc_info:
343 validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers'))
344 assert not issubclass(exc_info.type, CertificateVerifyError)
345
346 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
347 def test_percent_encode(self, handler):
348 with handler() as rh:
08916a49 349 # Unicode characters should be encoded with uppercase percent-encoding
227bf1a3 350 res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/中文.html'))
351 assert res.status == 200
08916a49 352 res.close()
353 # don't normalize existing percent encodings
227bf1a3 354 res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/%c7%9f'))
355 assert res.status == 200
08916a49 356 res.close()
357
227bf1a3 358 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
359 def test_unicode_path_redirection(self, handler):
360 with handler() as rh:
361 r = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/302-non-ascii-redirect'))
362 assert r.url == f'http://127.0.0.1:{self.http_port}/%E4%B8%AD%E6%96%87.html'
08916a49 363 r.close()
364
227bf1a3 365 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
366 def test_raise_http_error(self, handler):
367 with handler() as rh:
368 for bad_status in (400, 500, 599, 302):
369 with pytest.raises(HTTPError):
370 validate_and_send(rh, Request('http://127.0.0.1:%d/gen_%d' % (self.http_port, bad_status)))
371
372 # Should not raise an error
373 validate_and_send(rh, Request('http://127.0.0.1:%d/gen_200' % self.http_port)).close()
374
375 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
376 def test_response_url(self, handler):
377 with handler() as rh:
378 # Response url should be that of the last url in redirect chain
379 res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_301'))
380 assert res.url == f'http://127.0.0.1:{self.http_port}/method'
381 res.close()
382 res2 = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_200'))
383 assert res2.url == f'http://127.0.0.1:{self.http_port}/gen_200'
384 res2.close()
385
386 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
387 def test_redirect(self, handler):
388 with handler() as rh:
389 def do_req(redirect_status, method, assert_no_content=False):
08916a49 390 data = b'testdata' if method in ('POST', 'PUT') else None
227bf1a3 391 res = validate_and_send(
392 rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_{redirect_status}', method=method, data=data))
393
394 headers = b''
395 data_sent = b''
396 if data is not None:
397 data_sent += res.read(len(data))
398 if data_sent != data:
399 headers += data_sent
400 data_sent = b''
401
402 headers += res.read()
403
404 if assert_no_content or data is None:
405 assert b'Content-Type' not in headers
406 assert b'Content-Length' not in headers
407 else:
408 assert b'Content-Type' in headers
409 assert b'Content-Length' in headers
410
411 return data_sent.decode(), res.headers.get('method', '')
08916a49 412
413 # A 303 must either use GET or HEAD for subsequent request
227bf1a3 414 assert do_req(303, 'POST', True) == ('', 'GET')
415 assert do_req(303, 'HEAD') == ('', 'HEAD')
08916a49 416
227bf1a3 417 assert do_req(303, 'PUT', True) == ('', 'GET')
08916a49 418
419 # 301 and 302 turn POST only into a GET
227bf1a3 420 assert do_req(301, 'POST', True) == ('', 'GET')
421 assert do_req(301, 'HEAD') == ('', 'HEAD')
422 assert do_req(302, 'POST', True) == ('', 'GET')
423 assert do_req(302, 'HEAD') == ('', 'HEAD')
08916a49 424
227bf1a3 425 assert do_req(301, 'PUT') == ('testdata', 'PUT')
426 assert do_req(302, 'PUT') == ('testdata', 'PUT')
08916a49 427
428 # 307 and 308 should not change method
429 for m in ('POST', 'PUT'):
227bf1a3 430 assert do_req(307, m) == ('testdata', m)
431 assert do_req(308, m) == ('testdata', m)
08916a49 432
227bf1a3 433 assert do_req(307, 'HEAD') == ('', 'HEAD')
434 assert do_req(308, 'HEAD') == ('', 'HEAD')
08916a49 435
436 # These should not redirect and instead raise an HTTPError
437 for code in (300, 304, 305, 306):
227bf1a3 438 with pytest.raises(HTTPError):
08916a49 439 do_req(code, 'GET')
440
227bf1a3 441 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
442 def test_request_cookie_header(self, handler):
f8b4bcc0 443 # We should accept a Cookie header being passed as in normal headers and handle it appropriately.
227bf1a3 444 with handler() as rh:
f8b4bcc0 445 # Specified Cookie header should be used
227bf1a3 446 res = validate_and_send(
447 rh, Request(
08916a49 448 f'http://127.0.0.1:{self.http_port}/headers',
227bf1a3 449 headers={'Cookie': 'test=test'})).read().decode()
450 assert 'Cookie: test=test' in res
08916a49 451
227bf1a3 452 # Specified Cookie header should be removed on any redirect
453 res = validate_and_send(
454 rh, Request(
455 f'http://127.0.0.1:{self.http_port}/308-to-headers',
456 headers={'Cookie': 'test=test'})).read().decode()
457 assert 'Cookie: test=test' not in res
458
459 # Specified Cookie header should override global cookiejar for that request
460 cookiejar = http.cookiejar.CookieJar()
461 cookiejar.set_cookie(http.cookiejar.Cookie(
462 version=0, name='test', value='ytdlp', port=None, port_specified=False,
463 domain='127.0.0.1', domain_specified=True, domain_initial_dot=False, path='/',
464 path_specified=True, secure=False, expires=None, discard=False, comment=None,
465 comment_url=None, rest={}))
466
467 with handler(cookiejar=cookiejar) as rh:
468 data = validate_and_send(
469 rh, Request(f'http://127.0.0.1:{self.http_port}/headers', headers={'cookie': 'test=test'})).read()
470 assert b'Cookie: test=ytdlp' not in data
471 assert b'Cookie: test=test' in data
472
473 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
474 def test_redirect_loop(self, handler):
475 with handler() as rh:
476 with pytest.raises(HTTPError, match='redirect loop'):
477 validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_loop'))
478
479 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
480 def test_incompleteread(self, handler):
481 with handler(timeout=2) as rh:
482 with pytest.raises(IncompleteRead):
483 validate_and_send(rh, Request('http://127.0.0.1:%d/incompleteread' % self.http_port)).read()
484
485 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
486 def test_cookies(self, handler):
487 cookiejar = http.cookiejar.CookieJar()
488 cookiejar.set_cookie(http.cookiejar.Cookie(
489 0, 'test', 'ytdlp', None, False, '127.0.0.1', True,
490 False, '/headers', True, False, None, False, None, None, {}))
491
492 with handler(cookiejar=cookiejar) as rh:
493 data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read()
494 assert b'Cookie: test=ytdlp' in data
495
496 # Per request
497 with handler() as rh:
498 data = validate_and_send(
499 rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions={'cookiejar': cookiejar})).read()
500 assert b'Cookie: test=ytdlp' in data
501
502 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
503 def test_headers(self, handler):
504
505 with handler(headers=HTTPHeaderDict({'test1': 'test', 'test2': 'test2'})) as rh:
506 # Global Headers
507 data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read()
508 assert b'Test1: test' in data
509
510 # Per request headers, merged with global
511 data = validate_and_send(rh, Request(
512 f'http://127.0.0.1:{self.http_port}/headers', headers={'test2': 'changed', 'test3': 'test3'})).read()
513 assert b'Test1: test' in data
514 assert b'Test2: changed' in data
515 assert b'Test2: test2' not in data
516 assert b'Test3: test3' in data
517
518 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
519 def test_timeout(self, handler):
520 with handler() as rh:
521 # Default timeout is 20 seconds, so this should go through
522 validate_and_send(
523 rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_3'))
524
525 with handler(timeout=0.5) as rh:
526 with pytest.raises(TransportError):
527 validate_and_send(
528 rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1'))
529
530 # Per request timeout, should override handler timeout
531 validate_and_send(
532 rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1', extensions={'timeout': 4}))
533
534 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
535 def test_source_address(self, handler):
536 source_address = f'127.0.0.{random.randint(5, 255)}'
537 with handler(source_address=source_address) as rh:
538 data = validate_and_send(
539 rh, Request(f'http://127.0.0.1:{self.http_port}/source_address')).read().decode()
540 assert source_address == data
541
542 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
543 def test_gzip_trailing_garbage(self, handler):
544 with handler() as rh:
545 data = validate_and_send(rh, Request(f'http://localhost:{self.http_port}/trailing_garbage')).read().decode()
546 assert data == '<html><video src="/vid.mp4" /></html>'
547
548 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
549 @pytest.mark.skipif(not brotli, reason='brotli support is not installed')
550 def test_brotli(self, handler):
551 with handler() as rh:
552 res = validate_and_send(
553 rh, Request(
daafbf49 554 f'http://127.0.0.1:{self.http_port}/content-encoding',
555 headers={'ytdl-encoding': 'br'}))
227bf1a3 556 assert res.headers.get('Content-Encoding') == 'br'
557 assert res.read() == b'<html><video src="/vid.mp4" /></html>'
558
559 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
560 def test_deflate(self, handler):
561 with handler() as rh:
562 res = validate_and_send(
563 rh, Request(
daafbf49 564 f'http://127.0.0.1:{self.http_port}/content-encoding',
565 headers={'ytdl-encoding': 'deflate'}))
227bf1a3 566 assert res.headers.get('Content-Encoding') == 'deflate'
567 assert res.read() == b'<html><video src="/vid.mp4" /></html>'
568
569 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
570 def test_gzip(self, handler):
571 with handler() as rh:
572 res = validate_and_send(
573 rh, Request(
daafbf49 574 f'http://127.0.0.1:{self.http_port}/content-encoding',
575 headers={'ytdl-encoding': 'gzip'}))
227bf1a3 576 assert res.headers.get('Content-Encoding') == 'gzip'
577 assert res.read() == b'<html><video src="/vid.mp4" /></html>'
daafbf49 578
227bf1a3 579 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
580 def test_multiple_encodings(self, handler):
581 with handler() as rh:
daafbf49 582 for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'):
227bf1a3 583 res = validate_and_send(
584 rh, Request(
daafbf49 585 f'http://127.0.0.1:{self.http_port}/content-encoding',
586 headers={'ytdl-encoding': pair}))
227bf1a3 587 assert res.headers.get('Content-Encoding') == pair
588 assert res.read() == b'<html><video src="/vid.mp4" /></html>'
589
590 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
591 def test_unsupported_encoding(self, handler):
592 with handler() as rh:
593 res = validate_and_send(
594 rh, Request(
daafbf49 595 f'http://127.0.0.1:{self.http_port}/content-encoding',
596 headers={'ytdl-encoding': 'unsupported'}))
227bf1a3 597 assert res.headers.get('Content-Encoding') == 'unsupported'
598 assert res.read() == b'raw'
599
600 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
601 def test_read(self, handler):
602 with handler() as rh:
603 res = validate_and_send(
604 rh, Request(f'http://127.0.0.1:{self.http_port}/headers'))
605 assert res.readable()
606 assert res.read(1) == b'H'
607 assert res.read(3) == b'ost'
608
609
610class TestHTTPProxy(TestRequestHandlerBase):
611 @classmethod
612 def setup_class(cls):
613 super().setup_class()
614 # HTTP Proxy server
615 cls.proxy = http.server.ThreadingHTTPServer(
616 ('127.0.0.1', 0), _build_proxy_handler('normal'))
617 cls.proxy_port = http_server_port(cls.proxy)
618 cls.proxy_thread = threading.Thread(target=cls.proxy.serve_forever)
619 cls.proxy_thread.daemon = True
620 cls.proxy_thread.start()
daafbf49 621
227bf1a3 622 # Geo proxy server
623 cls.geo_proxy = http.server.ThreadingHTTPServer(
624 ('127.0.0.1', 0), _build_proxy_handler('geo'))
625 cls.geo_port = http_server_port(cls.geo_proxy)
626 cls.geo_proxy_thread = threading.Thread(target=cls.geo_proxy.serve_forever)
627 cls.geo_proxy_thread.daemon = True
628 cls.geo_proxy_thread.start()
629
630 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
631 def test_http_proxy(self, handler):
632 http_proxy = f'http://127.0.0.1:{self.proxy_port}'
633 geo_proxy = f'http://127.0.0.1:{self.geo_port}'
634
635 # Test global http proxy
636 # Test per request http proxy
637 # Test per request http proxy disables proxy
638 url = 'http://foo.com/bar'
01218f91 639
227bf1a3 640 # Global HTTP proxy
641 with handler(proxies={'http': http_proxy}) as rh:
642 res = validate_and_send(rh, Request(url)).read().decode()
643 assert res == f'normal: {url}'
644
645 # Per request proxy overrides global
646 res = validate_and_send(rh, Request(url, proxies={'http': geo_proxy})).read().decode()
647 assert res == f'geo: {url}'
648
649 # and setting to None disables all proxies for that request
650 real_url = f'http://127.0.0.1:{self.http_port}/headers'
651 res = validate_and_send(
652 rh, Request(real_url, proxies={'http': None})).read().decode()
653 assert res != f'normal: {real_url}'
654 assert 'Accept' in res
655
656 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
657 def test_noproxy(self, handler):
658 with handler(proxies={'proxy': f'http://127.0.0.1:{self.proxy_port}'}) as rh:
659 # NO_PROXY
660 for no_proxy in (f'127.0.0.1:{self.http_port}', '127.0.0.1', 'localhost'):
661 nop_response = validate_and_send(
662 rh, Request(f'http://127.0.0.1:{self.http_port}/headers', proxies={'no': no_proxy})).read().decode(
663 'utf-8')
664 assert 'Accept' in nop_response
665
666 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
667 def test_allproxy(self, handler):
668 url = 'http://foo.com/bar'
669 with handler() as rh:
670 response = validate_and_send(rh, Request(url, proxies={'all': f'http://127.0.0.1:{self.proxy_port}'})).read().decode(
671 'utf-8')
672 assert response == f'normal: {url}'
673
674 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
675 def test_http_proxy_with_idn(self, handler):
676 with handler(proxies={
677 'http': f'http://127.0.0.1:{self.proxy_port}',
678 }) as rh:
679 url = 'http://中文.tw/'
680 response = rh.send(Request(url)).read().decode()
681 # b'xn--fiq228c' is '中文'.encode('idna')
682 assert response == 'normal: http://xn--fiq228c.tw/'
683
684
685class TestClientCertificate:
686
687 @classmethod
688 def setup_class(cls):
bb58c9ed 689 certfn = os.path.join(TEST_DIR, 'testcert.pem')
227bf1a3 690 cls.certdir = os.path.join(TEST_DIR, 'testdata', 'certificate')
691 cacertfn = os.path.join(cls.certdir, 'ca.crt')
692 cls.httpd = http.server.ThreadingHTTPServer(('127.0.0.1', 0), HTTPTestRequestHandler)
bb58c9ed 693 sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
694 sslctx.verify_mode = ssl.CERT_REQUIRED
695 sslctx.load_verify_locations(cafile=cacertfn)
696 sslctx.load_cert_chain(certfn, None)
227bf1a3 697 cls.httpd.socket = sslctx.wrap_socket(cls.httpd.socket, server_side=True)
698 cls.port = http_server_port(cls.httpd)
699 cls.server_thread = threading.Thread(target=cls.httpd.serve_forever)
700 cls.server_thread.daemon = True
701 cls.server_thread.start()
702
703 def _run_test(self, handler, **handler_kwargs):
704 with handler(
bb58c9ed 705 # Disable client-side validation of unacceptable self-signed testcert.pem
706 # The test is of a check on the server side, so unaffected
227bf1a3 707 verify=False,
708 **handler_kwargs,
709 ) as rh:
710 validate_and_send(rh, Request(f'https://127.0.0.1:{self.port}/video.html')).read().decode()
711
712 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
713 def test_certificate_combined_nopass(self, handler):
714 self._run_test(handler, client_cert={
715 'client_certificate': os.path.join(self.certdir, 'clientwithkey.crt'),
bb58c9ed 716 })
bb58c9ed 717
227bf1a3 718 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
719 def test_certificate_nocombined_nopass(self, handler):
720 self._run_test(handler, client_cert={
721 'client_certificate': os.path.join(self.certdir, 'client.crt'),
722 'client_certificate_key': os.path.join(self.certdir, 'client.key'),
723 })
bb58c9ed 724
227bf1a3 725 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
726 def test_certificate_combined_pass(self, handler):
727 self._run_test(handler, client_cert={
728 'client_certificate': os.path.join(self.certdir, 'clientwithencryptedkey.crt'),
729 'client_certificate_password': 'foobar',
730 })
bb58c9ed 731
227bf1a3 732 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
733 def test_certificate_nocombined_pass(self, handler):
734 self._run_test(handler, client_cert={
735 'client_certificate': os.path.join(self.certdir, 'client.crt'),
736 'client_certificate_key': os.path.join(self.certdir, 'clientencrypted.key'),
737 'client_certificate_password': 'foobar',
738 })
bb58c9ed 739
bb58c9ed 740
227bf1a3 741class TestUrllibRequestHandler(TestRequestHandlerBase):
742 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
743 def test_file_urls(self, handler):
744 # See https://github.com/ytdl-org/youtube-dl/issues/8227
745 tf = tempfile.NamedTemporaryFile(delete=False)
746 tf.write(b'foobar')
747 tf.close()
748 req = Request(pathlib.Path(tf.name).as_uri())
749 with handler() as rh:
750 with pytest.raises(UnsupportedRequest):
751 rh.validate(req)
752
753 # Test that urllib never loaded FileHandler
754 with pytest.raises(TransportError):
755 rh.send(req)
756
757 with handler(enable_file_urls=True) as rh:
758 res = validate_and_send(rh, req)
759 assert res.read() == b'foobar'
760 res.close()
bb58c9ed 761
227bf1a3 762 os.unlink(tf.name)
01218f91 763
227bf1a3 764 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
765 def test_http_error_returns_content(self, handler):
766 # urllib HTTPError will try close the underlying response if reference to the HTTPError object is lost
767 def get_response():
768 with handler() as rh:
769 # headers url
770 try:
771 validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_404'))
772 except HTTPError as e:
773 return e.response
774
775 assert get_response().read() == b'<html></html>'
776
777 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
778 def test_verify_cert_error_text(self, handler):
779 # Check the output of the error message
780 with handler() as rh:
781 with pytest.raises(
782 CertificateVerifyError,
783 match=r'\[SSL: CERTIFICATE_VERIFY_FAILED\] certificate verify failed: self.signed certificate'
784 ):
785 validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
786
787 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
788 def test_httplib_validation_errors(self, handler):
789 with handler() as rh:
790
791 # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1256
792 with pytest.raises(RequestError, match='method can\'t contain control characters') as exc_info:
793 validate_and_send(rh, Request('http://127.0.0.1', method='GET\n'))
794 assert not isinstance(exc_info.value, TransportError)
795
796 # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1265
797 with pytest.raises(RequestError, match='URL can\'t contain control characters') as exc_info:
798 validate_and_send(rh, Request('http://127.0.0. 1', method='GET\n'))
799 assert not isinstance(exc_info.value, TransportError)
800
801 # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1288C31-L1288C50
802 with pytest.raises(RequestError, match='Invalid header name') as exc_info:
803 validate_and_send(rh, Request('http://127.0.0.1', headers={'foo\n': 'bar'}))
804 assert not isinstance(exc_info.value, TransportError)
805
806
86aea0d3 807def run_validation(handler, error, req, **handler_kwargs):
227bf1a3 808 with handler(**handler_kwargs) as rh:
86aea0d3 809 if error:
810 with pytest.raises(error):
227bf1a3 811 rh.validate(req)
812 else:
813 rh.validate(req)
814
815
816class TestRequestHandlerValidation:
817
818 class ValidationRH(RequestHandler):
819 def _send(self, request):
820 raise RequestError('test')
821
822 class NoCheckRH(ValidationRH):
823 _SUPPORTED_FEATURES = None
824 _SUPPORTED_PROXY_SCHEMES = None
825 _SUPPORTED_URL_SCHEMES = None
826
86aea0d3 827 def _check_extensions(self, extensions):
828 extensions.clear()
829
227bf1a3 830 class HTTPSupportedRH(ValidationRH):
831 _SUPPORTED_URL_SCHEMES = ('http',)
832
833 URL_SCHEME_TESTS = [
834 # scheme, expected to fail, handler kwargs
835 ('Urllib', [
836 ('http', False, {}),
837 ('https', False, {}),
838 ('data', False, {}),
839 ('ftp', False, {}),
86aea0d3 840 ('file', UnsupportedRequest, {}),
227bf1a3 841 ('file', False, {'enable_file_urls': True}),
842 ]),
843 (NoCheckRH, [('http', False, {})]),
86aea0d3 844 (ValidationRH, [('http', UnsupportedRequest, {})])
227bf1a3 845 ]
846
847 PROXY_SCHEME_TESTS = [
848 # scheme, expected to fail
849 ('Urllib', [
850 ('http', False),
86aea0d3 851 ('https', UnsupportedRequest),
227bf1a3 852 ('socks4', False),
853 ('socks4a', False),
854 ('socks5', False),
855 ('socks5h', False),
86aea0d3 856 ('socks', UnsupportedRequest),
227bf1a3 857 ]),
858 (NoCheckRH, [('http', False)]),
86aea0d3 859 (HTTPSupportedRH, [('http', UnsupportedRequest)]),
227bf1a3 860 ]
861
862 PROXY_KEY_TESTS = [
863 # key, expected to fail
864 ('Urllib', [
865 ('all', False),
866 ('unrelated', False),
867 ]),
868 (NoCheckRH, [('all', False)]),
86aea0d3 869 (HTTPSupportedRH, [('all', UnsupportedRequest)]),
870 (HTTPSupportedRH, [('no', UnsupportedRequest)]),
871 ]
872
873 EXTENSION_TESTS = [
874 ('Urllib', [
875 ({'cookiejar': 'notacookiejar'}, AssertionError),
876 ({'cookiejar': CookieJar()}, False),
877 ({'timeout': 1}, False),
878 ({'timeout': 'notatimeout'}, AssertionError),
879 ({'unsupported': 'value'}, UnsupportedRequest),
880 ]),
881 (NoCheckRH, [
882 ({'cookiejar': 'notacookiejar'}, False),
883 ({'somerandom': 'test'}, False), # but any extension is allowed through
884 ]),
227bf1a3 885 ]
886
887 @pytest.mark.parametrize('handler,scheme,fail,handler_kwargs', [
888 (handler_tests[0], scheme, fail, handler_kwargs)
889 for handler_tests in URL_SCHEME_TESTS
890 for scheme, fail, handler_kwargs in handler_tests[1]
891
892 ], indirect=['handler'])
893 def test_url_scheme(self, handler, scheme, fail, handler_kwargs):
894 run_validation(handler, fail, Request(f'{scheme}://'), **(handler_kwargs or {}))
895
896 @pytest.mark.parametrize('handler,fail', [('Urllib', False)], indirect=['handler'])
897 def test_no_proxy(self, handler, fail):
898 run_validation(handler, fail, Request('http://', proxies={'no': '127.0.0.1,github.com'}))
899 run_validation(handler, fail, Request('http://'), proxies={'no': '127.0.0.1,github.com'})
900
901 @pytest.mark.parametrize('handler,proxy_key,fail', [
902 (handler_tests[0], proxy_key, fail)
903 for handler_tests in PROXY_KEY_TESTS
904 for proxy_key, fail in handler_tests[1]
905 ], indirect=['handler'])
906 def test_proxy_key(self, handler, proxy_key, fail):
907 run_validation(handler, fail, Request('http://', proxies={proxy_key: 'http://example.com'}))
908 run_validation(handler, fail, Request('http://'), proxies={proxy_key: 'http://example.com'})
909
910 @pytest.mark.parametrize('handler,scheme,fail', [
911 (handler_tests[0], scheme, fail)
912 for handler_tests in PROXY_SCHEME_TESTS
913 for scheme, fail in handler_tests[1]
914 ], indirect=['handler'])
915 def test_proxy_scheme(self, handler, scheme, fail):
916 run_validation(handler, fail, Request('http://', proxies={'http': f'{scheme}://example.com'}))
917 run_validation(handler, fail, Request('http://'), proxies={'http': f'{scheme}://example.com'})
918
919 @pytest.mark.parametrize('handler', ['Urllib', HTTPSupportedRH], indirect=True)
920 def test_empty_proxy(self, handler):
921 run_validation(handler, False, Request('http://', proxies={'http': None}))
922 run_validation(handler, False, Request('http://'), proxies={'http': None})
923
924 @pytest.mark.parametrize('proxy_url', ['//example.com', 'example.com', '127.0.0.1'])
925 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
926 def test_missing_proxy_scheme(self, handler, proxy_url):
86aea0d3 927 run_validation(handler, UnsupportedRequest, Request('http://', proxies={'http': 'example.com'}))
227bf1a3 928
86aea0d3 929 @pytest.mark.parametrize('handler,extensions,fail', [
930 (handler_tests[0], extensions, fail)
931 for handler_tests in EXTENSION_TESTS
932 for extensions, fail in handler_tests[1]
933 ], indirect=['handler'])
934 def test_extension(self, handler, extensions, fail):
935 run_validation(
936 handler, fail, Request('http://', extensions=extensions))
227bf1a3 937
938 def test_invalid_request_type(self):
939 rh = self.ValidationRH(logger=FakeLogger())
940 for method in (rh.validate, rh.send):
941 with pytest.raises(TypeError, match='Expected an instance of Request'):
942 method('not a request')
943
944
945class FakeResponse(Response):
946 def __init__(self, request):
947 # XXX: we could make request part of standard response interface
948 self.request = request
949 super().__init__(fp=io.BytesIO(b''), headers={}, url=request.url)
950
951
952class FakeRH(RequestHandler):
953
954 def _validate(self, request):
955 return
956
957 def _send(self, request: Request):
958 if request.url.startswith('ssl://'):
959 raise SSLError(request.url[len('ssl://'):])
960 return FakeResponse(request)
961
962
963class FakeRHYDL(FakeYDL):
964 def __init__(self, *args, **kwargs):
965 super().__init__(*args, **kwargs)
966 self._request_director = self.build_request_director([FakeRH])
967
968
969class TestRequestDirector:
970
971 def test_handler_operations(self):
972 director = RequestDirector(logger=FakeLogger())
973 handler = FakeRH(logger=FakeLogger())
974 director.add_handler(handler)
975 assert director.handlers.get(FakeRH.RH_KEY) is handler
976
977 # Handler should overwrite
978 handler2 = FakeRH(logger=FakeLogger())
979 director.add_handler(handler2)
980 assert director.handlers.get(FakeRH.RH_KEY) is not handler
981 assert director.handlers.get(FakeRH.RH_KEY) is handler2
982 assert len(director.handlers) == 1
983
984 class AnotherFakeRH(FakeRH):
01218f91 985 pass
227bf1a3 986 director.add_handler(AnotherFakeRH(logger=FakeLogger()))
987 assert len(director.handlers) == 2
988 assert director.handlers.get(AnotherFakeRH.RH_KEY).RH_KEY == AnotherFakeRH.RH_KEY
01218f91 989
227bf1a3 990 director.handlers.pop(FakeRH.RH_KEY, None)
991 assert director.handlers.get(FakeRH.RH_KEY) is None
992 assert len(director.handlers) == 1
01218f91 993
227bf1a3 994 # RequestErrors should passthrough
995 with pytest.raises(SSLError):
996 director.send(Request('ssl://something'))
01218f91 997
227bf1a3 998 def test_send(self):
999 director = RequestDirector(logger=FakeLogger())
1000 with pytest.raises(RequestError):
1001 director.send(Request('any://'))
1002 director.add_handler(FakeRH(logger=FakeLogger()))
1003 assert isinstance(director.send(Request('http://')), FakeResponse)
01218f91 1004
227bf1a3 1005 def test_unsupported_handlers(self):
1006 director = RequestDirector(logger=FakeLogger())
1007 director.add_handler(FakeRH(logger=FakeLogger()))
01218f91 1008
227bf1a3 1009 class SupportedRH(RequestHandler):
1010 _SUPPORTED_URL_SCHEMES = ['http']
01218f91 1011
227bf1a3 1012 def _send(self, request: Request):
1013 return Response(fp=io.BytesIO(b'supported'), headers={}, url=request.url)
efbed08d 1014
227bf1a3 1015 # This handler should by default take preference over FakeRH
1016 director.add_handler(SupportedRH(logger=FakeLogger()))
1017 assert director.send(Request('http://')).read() == b'supported'
1018 assert director.send(Request('any://')).read() == b''
582be358 1019
227bf1a3 1020 director.handlers.pop(FakeRH.RH_KEY)
1021 with pytest.raises(NoSupportingHandlers):
1022 director.send(Request('any://'))
1023
1024 def test_unexpected_error(self):
1025 director = RequestDirector(logger=FakeLogger())
1026
1027 class UnexpectedRH(FakeRH):
1028 def _send(self, request: Request):
1029 raise TypeError('something')
1030
1031 director.add_handler(UnexpectedRH(logger=FakeLogger))
1032 with pytest.raises(NoSupportingHandlers, match=r'1 unexpected error'):
1033 director.send(Request('any://'))
1034
1035 director.handlers.clear()
1036 assert len(director.handlers) == 0
1037
1038 # Should not be fatal
1039 director.add_handler(FakeRH(logger=FakeLogger()))
1040 director.add_handler(UnexpectedRH(logger=FakeLogger))
1041 assert director.send(Request('any://'))
1042
1043
1044# XXX: do we want to move this to test_YoutubeDL.py?
1045class TestYoutubeDLNetworking:
1046
1047 @staticmethod
1048 def build_handler(ydl, handler: RequestHandler = FakeRH):
1049 return ydl.build_request_director([handler]).handlers.get(handler.RH_KEY)
1050
1051 def test_compat_opener(self):
08916a49 1052 with FakeYDL() as ydl:
227bf1a3 1053 with warnings.catch_warnings():
1054 warnings.simplefilter('ignore', category=DeprecationWarning)
1055 assert isinstance(ydl._opener, urllib.request.OpenerDirector)
1056
1057 @pytest.mark.parametrize('proxy,expected', [
1058 ('http://127.0.0.1:8080', {'all': 'http://127.0.0.1:8080'}),
1059 ('', {'all': '__noproxy__'}),
1060 (None, {'http': 'http://127.0.0.1:8081', 'https': 'http://127.0.0.1:8081'}) # env, set https
1061 ])
1062 def test_proxy(self, proxy, expected):
1063 old_http_proxy = os.environ.get('HTTP_PROXY')
1064 try:
1065 os.environ['HTTP_PROXY'] = 'http://127.0.0.1:8081' # ensure that provided proxies override env
1066 with FakeYDL({'proxy': proxy}) as ydl:
1067 assert ydl.proxies == expected
1068 finally:
1069 if old_http_proxy:
1070 os.environ['HTTP_PROXY'] = old_http_proxy
1071
1072 def test_compat_request(self):
1073 with FakeRHYDL() as ydl:
1074 assert ydl.urlopen('test://')
1075 urllib_req = urllib.request.Request('http://foo.bar', data=b'test', method='PUT', headers={'X-Test': '1'})
1076 urllib_req.add_unredirected_header('Cookie', 'bob=bob')
1077 urllib_req.timeout = 2
3d2623a8 1078 with warnings.catch_warnings():
1079 warnings.simplefilter('ignore', category=DeprecationWarning)
1080 req = ydl.urlopen(urllib_req).request
1081 assert req.url == urllib_req.get_full_url()
1082 assert req.data == urllib_req.data
1083 assert req.method == urllib_req.get_method()
1084 assert 'X-Test' in req.headers
1085 assert 'Cookie' in req.headers
1086 assert req.extensions.get('timeout') == 2
227bf1a3 1087
1088 with pytest.raises(AssertionError):
1089 ydl.urlopen(None)
1090
1091 def test_extract_basic_auth(self):
1092 with FakeRHYDL() as ydl:
1093 res = ydl.urlopen(Request('http://user:pass@foo.bar'))
1094 assert res.request.headers['Authorization'] == 'Basic dXNlcjpwYXNz'
1095
1096 def test_sanitize_url(self):
1097 with FakeRHYDL() as ydl:
1098 res = ydl.urlopen(Request('httpss://foo.bar'))
1099 assert res.request.url == 'https://foo.bar'
1100
1101 def test_file_urls_error(self):
1102 # use urllib handler
1103 with FakeYDL() as ydl:
1104 with pytest.raises(RequestError, match=r'file:// URLs are disabled by default'):
1105 ydl.urlopen('file://')
1106
1107 def test_legacy_server_connect_error(self):
1108 with FakeRHYDL() as ydl:
1109 for error in ('UNSAFE_LEGACY_RENEGOTIATION_DISABLED', 'SSLV3_ALERT_HANDSHAKE_FAILURE'):
1110 with pytest.raises(RequestError, match=r'Try using --legacy-server-connect'):
1111 ydl.urlopen(f'ssl://{error}')
1112
1113 with pytest.raises(SSLError, match='testerror'):
1114 ydl.urlopen('ssl://testerror')
1115
1116 @pytest.mark.parametrize('proxy_key,proxy_url,expected', [
1117 ('http', '__noproxy__', None),
1118 ('no', '127.0.0.1,foo.bar', '127.0.0.1,foo.bar'),
1119 ('https', 'example.com', 'http://example.com'),
1120 ('https', 'socks5://example.com', 'socks5h://example.com'),
1121 ('http', 'socks://example.com', 'socks4://example.com'),
1122 ('http', 'socks4://example.com', 'socks4://example.com'),
1123 ])
1124 def test_clean_proxy(self, proxy_key, proxy_url, expected):
1125 # proxies should be cleaned in urlopen()
1126 with FakeRHYDL() as ydl:
1127 req = ydl.urlopen(Request('test://', proxies={proxy_key: proxy_url})).request
1128 assert req.proxies[proxy_key] == expected
1129
1130 # and should also be cleaned when building the handler
1131 env_key = f'{proxy_key.upper()}_PROXY'
1132 old_env_proxy = os.environ.get(env_key)
1133 try:
1134 os.environ[env_key] = proxy_url # ensure that provided proxies override env
1135 with FakeYDL() as ydl:
1136 rh = self.build_handler(ydl)
1137 assert rh.proxies[proxy_key] == expected
1138 finally:
1139 if old_env_proxy:
1140 os.environ[env_key] = old_env_proxy
1141
1142 def test_clean_proxy_header(self):
1143 with FakeRHYDL() as ydl:
1144 req = ydl.urlopen(Request('test://', headers={'ytdl-request-proxy': '//foo.bar'})).request
1145 assert 'ytdl-request-proxy' not in req.headers
1146 assert req.proxies == {'all': 'http://foo.bar'}
1147
1148 with FakeYDL({'http_headers': {'ytdl-request-proxy': '//foo.bar'}}) as ydl:
1149 rh = self.build_handler(ydl)
1150 assert 'ytdl-request-proxy' not in rh.headers
1151 assert rh.proxies == {'all': 'http://foo.bar'}
1152
1153 def test_clean_header(self):
1154 with FakeRHYDL() as ydl:
1155 res = ydl.urlopen(Request('test://', headers={'Youtubedl-no-compression': True}))
1156 assert 'Youtubedl-no-compression' not in res.request.headers
1157 assert res.request.headers.get('Accept-Encoding') == 'identity'
1158
1159 with FakeYDL({'http_headers': {'Youtubedl-no-compression': True}}) as ydl:
1160 rh = self.build_handler(ydl)
1161 assert 'Youtubedl-no-compression' not in rh.headers
1162 assert rh.headers.get('Accept-Encoding') == 'identity'
1163
1164 def test_build_handler_params(self):
1165 with FakeYDL({
1166 'http_headers': {'test': 'testtest'},
1167 'socket_timeout': 2,
1168 'proxy': 'http://127.0.0.1:8080',
1169 'source_address': '127.0.0.45',
1170 'debug_printtraffic': True,
1171 'compat_opts': ['no-certifi'],
1172 'nocheckcertificate': True,
75dc8e67 1173 'legacyserverconnect': True,
227bf1a3 1174 }) as ydl:
1175 rh = self.build_handler(ydl)
1176 assert rh.headers.get('test') == 'testtest'
1177 assert 'Accept' in rh.headers # ensure std_headers are still there
1178 assert rh.timeout == 2
1179 assert rh.proxies.get('all') == 'http://127.0.0.1:8080'
1180 assert rh.source_address == '127.0.0.45'
1181 assert rh.verbose is True
1182 assert rh.prefer_system_certs is True
1183 assert rh.verify is False
1184 assert rh.legacy_ssl_support is True
1185
1186 @pytest.mark.parametrize('ydl_params', [
1187 {'client_certificate': 'fakecert.crt'},
1188 {'client_certificate': 'fakecert.crt', 'client_certificate_key': 'fakekey.key'},
1189 {'client_certificate': 'fakecert.crt', 'client_certificate_key': 'fakekey.key', 'client_certificate_password': 'foobar'},
1190 {'client_certificate_key': 'fakekey.key', 'client_certificate_password': 'foobar'},
1191 ])
1192 def test_client_certificate(self, ydl_params):
1193 with FakeYDL(ydl_params) as ydl:
1194 rh = self.build_handler(ydl)
1195 assert rh._client_cert == ydl_params # XXX: Too bound to implementation
1196
1197 def test_urllib_file_urls(self):
1198 with FakeYDL({'enable_file_urls': False}) as ydl:
1199 rh = self.build_handler(ydl, UrllibRH)
1200 assert rh.enable_file_urls is False
08916a49 1201
227bf1a3 1202 with FakeYDL({'enable_file_urls': True}) as ydl:
1203 rh = self.build_handler(ydl, UrllibRH)
1204 assert rh.enable_file_urls is True
1205
1206
1207class TestRequest:
1208
1209 def test_query(self):
1210 req = Request('http://example.com?q=something', query={'v': 'xyz'})
1211 assert req.url == 'http://example.com?q=something&v=xyz'
1212
1213 req.update(query={'v': '123'})
1214 assert req.url == 'http://example.com?q=something&v=123'
1215 req.update(url='http://example.com', query={'v': 'xyz'})
1216 assert req.url == 'http://example.com?v=xyz'
1217
1218 def test_method(self):
1219 req = Request('http://example.com')
1220 assert req.method == 'GET'
1221 req.data = b'test'
1222 assert req.method == 'POST'
1223 req.data = None
1224 assert req.method == 'GET'
1225 req.data = b'test2'
1226 req.method = 'PUT'
1227 assert req.method == 'PUT'
1228 req.data = None
1229 assert req.method == 'PUT'
1230 with pytest.raises(TypeError):
1231 req.method = 1
1232
1233 def test_request_helpers(self):
1234 assert HEADRequest('http://example.com').method == 'HEAD'
1235 assert PUTRequest('http://example.com').method == 'PUT'
1236
1237 def test_headers(self):
1238 req = Request('http://example.com', headers={'tesT': 'test'})
1239 assert req.headers == HTTPHeaderDict({'test': 'test'})
1240 req.update(headers={'teSt2': 'test2'})
1241 assert req.headers == HTTPHeaderDict({'test': 'test', 'test2': 'test2'})
1242
1243 req.headers = new_headers = HTTPHeaderDict({'test': 'test'})
1244 assert req.headers == HTTPHeaderDict({'test': 'test'})
1245 assert req.headers is new_headers
1246
1247 # test converts dict to case insensitive dict
1248 req.headers = new_headers = {'test2': 'test2'}
1249 assert isinstance(req.headers, HTTPHeaderDict)
1250 assert req.headers is not new_headers
1251
1252 with pytest.raises(TypeError):
1253 req.headers = None
1254
1255 def test_data_type(self):
1256 req = Request('http://example.com')
1257 assert req.data is None
1258 # test bytes is allowed
1259 req.data = b'test'
1260 assert req.data == b'test'
1261 # test iterable of bytes is allowed
1262 i = [b'test', b'test2']
1263 req.data = i
1264 assert req.data == i
1265
1266 # test file-like object is allowed
1267 f = io.BytesIO(b'test')
1268 req.data = f
1269 assert req.data == f
1270
1271 # common mistake: test str not allowed
1272 with pytest.raises(TypeError):
1273 req.data = 'test'
1274 assert req.data != 'test'
1275
1276 # common mistake: test dict is not allowed
1277 with pytest.raises(TypeError):
1278 req.data = {'test': 'test'}
1279 assert req.data != {'test': 'test'}
1280
1281 def test_content_length_header(self):
1282 req = Request('http://example.com', headers={'Content-Length': '0'}, data=b'')
1283 assert req.headers.get('Content-Length') == '0'
1284
1285 req.data = b'test'
1286 assert 'Content-Length' not in req.headers
1287
1288 req = Request('http://example.com', headers={'Content-Length': '10'})
1289 assert 'Content-Length' not in req.headers
1290
1291 def test_content_type_header(self):
1292 req = Request('http://example.com', headers={'Content-Type': 'test'}, data=b'test')
1293 assert req.headers.get('Content-Type') == 'test'
1294 req.data = b'test2'
1295 assert req.headers.get('Content-Type') == 'test'
1296 req.data = None
1297 assert 'Content-Type' not in req.headers
1298 req.data = b'test3'
1299 assert req.headers.get('Content-Type') == 'application/x-www-form-urlencoded'
1300
71baa490 1301 def test_update_req(self):
1302 req = Request('http://example.com')
1303 assert req.data is None
1304 assert req.method == 'GET'
1305 assert 'Content-Type' not in req.headers
1306 # Test that zero-byte payloads will be sent
1307 req.update(data=b'')
1308 assert req.data == b''
1309 assert req.method == 'POST'
1310 assert req.headers.get('Content-Type') == 'application/x-www-form-urlencoded'
1311
227bf1a3 1312 def test_proxies(self):
1313 req = Request(url='http://example.com', proxies={'http': 'http://127.0.0.1:8080'})
1314 assert req.proxies == {'http': 'http://127.0.0.1:8080'}
1315
1316 def test_extensions(self):
1317 req = Request(url='http://example.com', extensions={'timeout': 2})
1318 assert req.extensions == {'timeout': 2}
1319
1320 def test_copy(self):
1321 req = Request(
1322 url='http://example.com',
1323 extensions={'cookiejar': CookieJar()},
1324 headers={'Accept-Encoding': 'br'},
1325 proxies={'http': 'http://127.0.0.1'},
1326 data=[b'123']
1327 )
1328 req_copy = req.copy()
1329 assert req_copy is not req
1330 assert req_copy.url == req.url
1331 assert req_copy.headers == req.headers
1332 assert req_copy.headers is not req.headers
1333 assert req_copy.proxies == req.proxies
1334 assert req_copy.proxies is not req.proxies
1335
1336 # Data is not able to be copied
1337 assert req_copy.data == req.data
1338 assert req_copy.data is req.data
1339
1340 # Shallow copy extensions
1341 assert req_copy.extensions is not req.extensions
1342 assert req_copy.extensions['cookiejar'] == req.extensions['cookiejar']
1343
1344 # Subclasses are copied by default
1345 class AnotherRequest(Request):
1346 pass
08916a49 1347
227bf1a3 1348 req = AnotherRequest(url='http://127.0.0.1')
1349 assert isinstance(req.copy(), AnotherRequest)
1350
1351 def test_url(self):
1352 req = Request(url='https://фtest.example.com/ some spaceв?ä=c',)
1353 assert req.url == 'https://xn--test-z6d.example.com/%20some%20space%D0%B2?%C3%A4=c'
1354
1355 assert Request(url='//example.com').url == 'http://example.com'
1356
1357 with pytest.raises(TypeError):
1358 Request(url='https://').url = None
1359
1360
1361class TestResponse:
1362
1363 @pytest.mark.parametrize('reason,status,expected', [
1364 ('custom', 200, 'custom'),
1365 (None, 404, 'Not Found'), # fallback status
1366 ('', 403, 'Forbidden'),
1367 (None, 999, None)
1368 ])
1369 def test_reason(self, reason, status, expected):
1370 res = Response(io.BytesIO(b''), url='test://', headers={}, status=status, reason=reason)
1371 assert res.reason == expected
1372
1373 def test_headers(self):
1374 headers = Message()
1375 headers.add_header('Test', 'test')
1376 headers.add_header('Test', 'test2')
1377 headers.add_header('content-encoding', 'br')
1378 res = Response(io.BytesIO(b''), headers=headers, url='test://')
1379 assert res.headers.get_all('test') == ['test', 'test2']
1380 assert 'Content-Encoding' in res.headers
1381
1382 def test_get_header(self):
1383 headers = Message()
1384 headers.add_header('Set-Cookie', 'cookie1')
1385 headers.add_header('Set-cookie', 'cookie2')
1386 headers.add_header('Test', 'test')
1387 headers.add_header('Test', 'test2')
1388 res = Response(io.BytesIO(b''), headers=headers, url='test://')
1389 assert res.get_header('test') == 'test, test2'
1390 assert res.get_header('set-Cookie') == 'cookie1'
1391 assert res.get_header('notexist', 'default') == 'default'
1392
1393 def test_compat(self):
1394 res = Response(io.BytesIO(b''), url='test://', status=404, headers={'test': 'test'})
3d2623a8 1395 with warnings.catch_warnings():
1396 warnings.simplefilter('ignore', category=DeprecationWarning)
1397 assert res.code == res.getcode() == res.status
1398 assert res.geturl() == res.url
1399 assert res.info() is res.headers
1400 assert res.getheader('test') == res.get_header('test')