]> jfr.im git - yt-dlp.git/blame - test/test_networking.py
Fix `--check-formats`
[yt-dlp.git] / test / test_networking.py
CommitLineData
cc52de43 1#!/usr/bin/env python3
54007a45 2
83fda3c0
PH
3# Allow direct execution
4import os
5import sys
227bf1a3 6
7import pytest
f8271158 8
83fda3c0
PH
9sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
10
227bf1a3 11import functools
08916a49 12import gzip
227bf1a3 13import http.client
08916a49 14import http.cookiejar
54007a45 15import http.server
227bf1a3 16import inspect
08916a49 17import io
18import pathlib
227bf1a3 19import random
f8271158 20import ssl
08916a49 21import tempfile
f8271158 22import threading
227bf1a3 23import time
08916a49 24import urllib.error
ac668111 25import urllib.request
227bf1a3 26import warnings
daafbf49 27import zlib
227bf1a3 28from email.message import Message
29from http.cookiejar import CookieJar
f8271158 30
227bf1a3 31from test.helper import FakeYDL, http_server_port
daafbf49 32from yt_dlp.dependencies import brotli
227bf1a3 33from yt_dlp.networking import (
34 HEADRequest,
35 PUTRequest,
36 Request,
37 RequestDirector,
38 RequestHandler,
39 Response,
40)
41from yt_dlp.networking._urllib import UrllibRH
42from yt_dlp.networking.common import _REQUEST_HANDLERS
43from yt_dlp.networking.exceptions import (
44 CertificateVerifyError,
45 HTTPError,
46 IncompleteRead,
47 NoSupportingHandlers,
48 RequestError,
49 SSLError,
50 TransportError,
51 UnsupportedRequest,
52)
53from yt_dlp.utils._utils import _YDLLogger as FakeLogger
54from yt_dlp.utils.networking import HTTPHeaderDict
83fda3c0
PH
55
56TEST_DIR = os.path.dirname(os.path.abspath(__file__))
57
03d8d4df 58
227bf1a3 59def _build_proxy_handler(name):
60 class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
61 proxy_name = name
62
63 def log_message(self, format, *args):
64 pass
65
66 def do_GET(self):
67 self.send_response(200)
68 self.send_header('Content-Type', 'text/plain; charset=utf-8')
69 self.end_headers()
70 self.wfile.write('{self.proxy_name}: {self.path}'.format(self=self).encode())
71 return HTTPTestRequestHandler
72
73
ac668111 74class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
08916a49 75 protocol_version = 'HTTP/1.1'
76
83fda3c0
PH
77 def log_message(self, format, *args):
78 pass
79
08916a49 80 def _headers(self):
227bf1a3 81 payload = str(self.headers).encode()
08916a49 82 self.send_response(200)
83 self.send_header('Content-Type', 'application/json')
84 self.send_header('Content-Length', str(len(payload)))
85 self.end_headers()
86 self.wfile.write(payload)
87
88 def _redirect(self):
89 self.send_response(int(self.path[len('/redirect_'):]))
90 self.send_header('Location', '/method')
91 self.send_header('Content-Length', '0')
92 self.end_headers()
93
94 def _method(self, method, payload=None):
95 self.send_response(200)
96 self.send_header('Content-Length', str(len(payload or '')))
97 self.send_header('Method', method)
98 self.end_headers()
99 if payload:
100 self.wfile.write(payload)
101
102 def _status(self, status):
103 payload = f'<html>{status} NOT FOUND</html>'.encode()
104 self.send_response(int(status))
105 self.send_header('Content-Type', 'text/html; charset=utf-8')
106 self.send_header('Content-Length', str(len(payload)))
107 self.end_headers()
108 self.wfile.write(payload)
109
110 def _read_data(self):
111 if 'Content-Length' in self.headers:
112 return self.rfile.read(int(self.headers['Content-Length']))
113
114 def do_POST(self):
227bf1a3 115 data = self._read_data() + str(self.headers).encode()
08916a49 116 if self.path.startswith('/redirect_'):
117 self._redirect()
118 elif self.path.startswith('/method'):
119 self._method('POST', data)
120 elif self.path.startswith('/headers'):
121 self._headers()
122 else:
123 self._status(404)
124
125 def do_HEAD(self):
126 if self.path.startswith('/redirect_'):
127 self._redirect()
128 elif self.path.startswith('/method'):
129 self._method('HEAD')
130 else:
131 self._status(404)
132
133 def do_PUT(self):
227bf1a3 134 data = self._read_data() + str(self.headers).encode()
08916a49 135 if self.path.startswith('/redirect_'):
136 self._redirect()
137 elif self.path.startswith('/method'):
138 self._method('PUT', data)
139 else:
140 self._status(404)
141
83fda3c0
PH
142 def do_GET(self):
143 if self.path == '/video.html':
08916a49 144 payload = b'<html><video src="/vid.mp4" /></html>'
83fda3c0
PH
145 self.send_response(200)
146 self.send_header('Content-Type', 'text/html; charset=utf-8')
227bf1a3 147 self.send_header('Content-Length', str(len(payload)))
83fda3c0 148 self.end_headers()
08916a49 149 self.wfile.write(payload)
83fda3c0 150 elif self.path == '/vid.mp4':
08916a49 151 payload = b'\x00\x00\x00\x00\x20\x66\x74[video]'
83fda3c0
PH
152 self.send_response(200)
153 self.send_header('Content-Type', 'video/mp4')
08916a49 154 self.send_header('Content-Length', str(len(payload)))
83fda3c0 155 self.end_headers()
08916a49 156 self.wfile.write(payload)
8c32e5dc 157 elif self.path == '/%E4%B8%AD%E6%96%87.html':
08916a49 158 payload = b'<html><video src="/vid.mp4" /></html>'
8c32e5dc
YCH
159 self.send_response(200)
160 self.send_header('Content-Type', 'text/html; charset=utf-8')
08916a49 161 self.send_header('Content-Length', str(len(payload)))
162 self.end_headers()
163 self.wfile.write(payload)
164 elif self.path == '/%c7%9f':
165 payload = b'<html><video src="/vid.mp4" /></html>'
166 self.send_response(200)
167 self.send_header('Content-Type', 'text/html; charset=utf-8')
168 self.send_header('Content-Length', str(len(payload)))
169 self.end_headers()
170 self.wfile.write(payload)
227bf1a3 171 elif self.path.startswith('/redirect_loop'):
172 self.send_response(301)
173 self.send_header('Location', self.path)
174 self.send_header('Content-Length', '0')
175 self.end_headers()
4bf91228 176 elif self.path == '/redirect_dotsegments':
177 self.send_response(301)
178 # redirect to /headers but with dot segments before
179 self.send_header('Location', '/a/b/./../../headers')
180 self.send_header('Content-Length', '0')
181 self.end_headers()
08916a49 182 elif self.path.startswith('/redirect_'):
183 self._redirect()
184 elif self.path.startswith('/method'):
227bf1a3 185 self._method('GET', str(self.headers).encode())
08916a49 186 elif self.path.startswith('/headers'):
187 self._headers()
f8b4bcc0 188 elif self.path.startswith('/308-to-headers'):
189 self.send_response(308)
190 self.send_header('Location', '/headers')
191 self.send_header('Content-Length', '0')
192 self.end_headers()
08916a49 193 elif self.path == '/trailing_garbage':
194 payload = b'<html><video src="/vid.mp4" /></html>'
195 self.send_response(200)
196 self.send_header('Content-Type', 'text/html; charset=utf-8')
197 self.send_header('Content-Encoding', 'gzip')
198 buf = io.BytesIO()
199 with gzip.GzipFile(fileobj=buf, mode='wb') as f:
200 f.write(payload)
201 compressed = buf.getvalue() + b'trailing garbage'
202 self.send_header('Content-Length', str(len(compressed)))
203 self.end_headers()
204 self.wfile.write(compressed)
205 elif self.path == '/302-non-ascii-redirect':
206 new_url = f'http://127.0.0.1:{http_server_port(self.server)}/中文.html'
207 self.send_response(301)
208 self.send_header('Location', new_url)
209 self.send_header('Content-Length', '0')
8c32e5dc 210 self.end_headers()
daafbf49 211 elif self.path == '/content-encoding':
212 encodings = self.headers.get('ytdl-encoding', '')
213 payload = b'<html><video src="/vid.mp4" /></html>'
214 for encoding in filter(None, (e.strip() for e in encodings.split(','))):
215 if encoding == 'br' and brotli:
216 payload = brotli.compress(payload)
217 elif encoding == 'gzip':
218 buf = io.BytesIO()
219 with gzip.GzipFile(fileobj=buf, mode='wb') as f:
220 f.write(payload)
221 payload = buf.getvalue()
222 elif encoding == 'deflate':
223 payload = zlib.compress(payload)
224 elif encoding == 'unsupported':
225 payload = b'raw'
226 break
227 else:
228 self._status(415)
229 return
230 self.send_response(200)
231 self.send_header('Content-Encoding', encodings)
232 self.send_header('Content-Length', str(len(payload)))
233 self.end_headers()
234 self.wfile.write(payload)
227bf1a3 235 elif self.path.startswith('/gen_'):
236 payload = b'<html></html>'
237 self.send_response(int(self.path[len('/gen_'):]))
238 self.send_header('Content-Type', 'text/html; charset=utf-8')
239 self.send_header('Content-Length', str(len(payload)))
240 self.end_headers()
241 self.wfile.write(payload)
242 elif self.path.startswith('/incompleteread'):
243 payload = b'<html></html>'
244 self.send_response(200)
245 self.send_header('Content-Type', 'text/html; charset=utf-8')
246 self.send_header('Content-Length', '234234')
247 self.end_headers()
248 self.wfile.write(payload)
249 self.finish()
250 elif self.path.startswith('/timeout_'):
251 time.sleep(int(self.path[len('/timeout_'):]))
252 self._headers()
253 elif self.path == '/source_address':
254 payload = str(self.client_address[0]).encode()
255 self.send_response(200)
256 self.send_header('Content-Type', 'text/html; charset=utf-8')
257 self.send_header('Content-Length', str(len(payload)))
258 self.end_headers()
259 self.wfile.write(payload)
260 self.finish()
83fda3c0 261 else:
08916a49 262 self._status(404)
263
264 def send_header(self, keyword, value):
265 """
266 Forcibly allow HTTP server to send non percent-encoded non-ASCII characters in headers.
267 This is against what is defined in RFC 3986, however we need to test we support this
268 since some sites incorrectly do this.
269 """
270 if keyword.lower() == 'connection':
271 return super().send_header(keyword, value)
272
273 if not hasattr(self, '_headers_buffer'):
274 self._headers_buffer = []
275
276 self._headers_buffer.append(f'{keyword}: {value}\r\n'.encode())
83fda3c0
PH
277
278
227bf1a3 279def validate_and_send(rh, req):
280 rh.validate(req)
281 return rh.send(req)
83fda3c0 282
83fda3c0 283
227bf1a3 284class TestRequestHandlerBase:
285 @classmethod
286 def setup_class(cls):
287 cls.http_httpd = http.server.ThreadingHTTPServer(
f19eae42 288 ('127.0.0.1', 0), HTTPTestRequestHandler)
227bf1a3 289 cls.http_port = http_server_port(cls.http_httpd)
290 cls.http_server_thread = threading.Thread(target=cls.http_httpd.serve_forever)
08916a49 291 # FIXME: we should probably stop the http server thread after each test
292 # See: https://github.com/yt-dlp/yt-dlp/pull/7094#discussion_r1199746041
227bf1a3 293 cls.http_server_thread.daemon = True
294 cls.http_server_thread.start()
08916a49 295
296 # HTTPS server
83fda3c0 297 certfn = os.path.join(TEST_DIR, 'testcert.pem')
227bf1a3 298 cls.https_httpd = http.server.ThreadingHTTPServer(
f19eae42 299 ('127.0.0.1', 0), HTTPTestRequestHandler)
b6dc37fe 300 sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
301 sslctx.load_cert_chain(certfn, None)
227bf1a3 302 cls.https_httpd.socket = sslctx.wrap_socket(cls.https_httpd.socket, server_side=True)
303 cls.https_port = http_server_port(cls.https_httpd)
304 cls.https_server_thread = threading.Thread(target=cls.https_httpd.serve_forever)
305 cls.https_server_thread.daemon = True
306 cls.https_server_thread.start()
307
308
309@pytest.fixture
310def handler(request):
311 RH_KEY = request.param
312 if inspect.isclass(RH_KEY) and issubclass(RH_KEY, RequestHandler):
313 handler = RH_KEY
314 elif RH_KEY in _REQUEST_HANDLERS:
315 handler = _REQUEST_HANDLERS[RH_KEY]
316 else:
317 pytest.skip(f'{RH_KEY} request handler is not available')
318
319 return functools.partial(handler, logger=FakeLogger)
320
321
322class TestHTTPRequestHandler(TestRequestHandlerBase):
323 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
324 def test_verify_cert(self, handler):
325 with handler() as rh:
326 with pytest.raises(CertificateVerifyError):
327 validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
328
329 with handler(verify=False) as rh:
330 r = validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
331 assert r.status == 200
08916a49 332 r.close()
333
227bf1a3 334 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
335 def test_ssl_error(self, handler):
336 # HTTPS server with too old TLS version
337 # XXX: is there a better way to test this than to create a new server?
338 https_httpd = http.server.ThreadingHTTPServer(
339 ('127.0.0.1', 0), HTTPTestRequestHandler)
340 sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
341 https_httpd.socket = sslctx.wrap_socket(https_httpd.socket, server_side=True)
342 https_port = http_server_port(https_httpd)
343 https_server_thread = threading.Thread(target=https_httpd.serve_forever)
344 https_server_thread.daemon = True
345 https_server_thread.start()
346
347 with handler(verify=False) as rh:
348 with pytest.raises(SSLError, match='sslv3 alert handshake failure') as exc_info:
349 validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers'))
350 assert not issubclass(exc_info.type, CertificateVerifyError)
351
352 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
353 def test_percent_encode(self, handler):
354 with handler() as rh:
08916a49 355 # Unicode characters should be encoded with uppercase percent-encoding
227bf1a3 356 res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/中文.html'))
357 assert res.status == 200
08916a49 358 res.close()
359 # don't normalize existing percent encodings
227bf1a3 360 res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/%c7%9f'))
361 assert res.status == 200
08916a49 362 res.close()
363
4bf91228 364 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
365 def test_remove_dot_segments(self, handler):
366 with handler() as rh:
367 # This isn't a comprehensive test,
368 # but it should be enough to check whether the handler is removing dot segments
369 res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/a/b/./../../headers'))
370 assert res.status == 200
371 assert res.url == f'http://127.0.0.1:{self.http_port}/headers'
372 res.close()
373
374 res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_dotsegments'))
375 assert res.status == 200
376 assert res.url == f'http://127.0.0.1:{self.http_port}/headers'
377 res.close()
378
227bf1a3 379 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
380 def test_unicode_path_redirection(self, handler):
381 with handler() as rh:
382 r = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/302-non-ascii-redirect'))
383 assert r.url == f'http://127.0.0.1:{self.http_port}/%E4%B8%AD%E6%96%87.html'
08916a49 384 r.close()
385
227bf1a3 386 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
387 def test_raise_http_error(self, handler):
388 with handler() as rh:
389 for bad_status in (400, 500, 599, 302):
390 with pytest.raises(HTTPError):
391 validate_and_send(rh, Request('http://127.0.0.1:%d/gen_%d' % (self.http_port, bad_status)))
392
393 # Should not raise an error
394 validate_and_send(rh, Request('http://127.0.0.1:%d/gen_200' % self.http_port)).close()
395
396 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
397 def test_response_url(self, handler):
398 with handler() as rh:
399 # Response url should be that of the last url in redirect chain
400 res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_301'))
401 assert res.url == f'http://127.0.0.1:{self.http_port}/method'
402 res.close()
403 res2 = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_200'))
404 assert res2.url == f'http://127.0.0.1:{self.http_port}/gen_200'
405 res2.close()
406
407 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
408 def test_redirect(self, handler):
409 with handler() as rh:
410 def do_req(redirect_status, method, assert_no_content=False):
08916a49 411 data = b'testdata' if method in ('POST', 'PUT') else None
227bf1a3 412 res = validate_and_send(
413 rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_{redirect_status}', method=method, data=data))
414
415 headers = b''
416 data_sent = b''
417 if data is not None:
418 data_sent += res.read(len(data))
419 if data_sent != data:
420 headers += data_sent
421 data_sent = b''
422
423 headers += res.read()
424
425 if assert_no_content or data is None:
426 assert b'Content-Type' not in headers
427 assert b'Content-Length' not in headers
428 else:
429 assert b'Content-Type' in headers
430 assert b'Content-Length' in headers
431
432 return data_sent.decode(), res.headers.get('method', '')
08916a49 433
434 # A 303 must either use GET or HEAD for subsequent request
227bf1a3 435 assert do_req(303, 'POST', True) == ('', 'GET')
436 assert do_req(303, 'HEAD') == ('', 'HEAD')
08916a49 437
227bf1a3 438 assert do_req(303, 'PUT', True) == ('', 'GET')
08916a49 439
440 # 301 and 302 turn POST only into a GET
227bf1a3 441 assert do_req(301, 'POST', True) == ('', 'GET')
442 assert do_req(301, 'HEAD') == ('', 'HEAD')
443 assert do_req(302, 'POST', True) == ('', 'GET')
444 assert do_req(302, 'HEAD') == ('', 'HEAD')
08916a49 445
227bf1a3 446 assert do_req(301, 'PUT') == ('testdata', 'PUT')
447 assert do_req(302, 'PUT') == ('testdata', 'PUT')
08916a49 448
449 # 307 and 308 should not change method
450 for m in ('POST', 'PUT'):
227bf1a3 451 assert do_req(307, m) == ('testdata', m)
452 assert do_req(308, m) == ('testdata', m)
08916a49 453
227bf1a3 454 assert do_req(307, 'HEAD') == ('', 'HEAD')
455 assert do_req(308, 'HEAD') == ('', 'HEAD')
08916a49 456
457 # These should not redirect and instead raise an HTTPError
458 for code in (300, 304, 305, 306):
227bf1a3 459 with pytest.raises(HTTPError):
08916a49 460 do_req(code, 'GET')
461
227bf1a3 462 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
463 def test_request_cookie_header(self, handler):
f8b4bcc0 464 # We should accept a Cookie header being passed as in normal headers and handle it appropriately.
227bf1a3 465 with handler() as rh:
f8b4bcc0 466 # Specified Cookie header should be used
227bf1a3 467 res = validate_and_send(
468 rh, Request(
08916a49 469 f'http://127.0.0.1:{self.http_port}/headers',
227bf1a3 470 headers={'Cookie': 'test=test'})).read().decode()
471 assert 'Cookie: test=test' in res
08916a49 472
227bf1a3 473 # Specified Cookie header should be removed on any redirect
474 res = validate_and_send(
475 rh, Request(
476 f'http://127.0.0.1:{self.http_port}/308-to-headers',
477 headers={'Cookie': 'test=test'})).read().decode()
478 assert 'Cookie: test=test' not in res
479
480 # Specified Cookie header should override global cookiejar for that request
481 cookiejar = http.cookiejar.CookieJar()
482 cookiejar.set_cookie(http.cookiejar.Cookie(
483 version=0, name='test', value='ytdlp', port=None, port_specified=False,
484 domain='127.0.0.1', domain_specified=True, domain_initial_dot=False, path='/',
485 path_specified=True, secure=False, expires=None, discard=False, comment=None,
486 comment_url=None, rest={}))
487
488 with handler(cookiejar=cookiejar) as rh:
489 data = validate_and_send(
490 rh, Request(f'http://127.0.0.1:{self.http_port}/headers', headers={'cookie': 'test=test'})).read()
491 assert b'Cookie: test=ytdlp' not in data
492 assert b'Cookie: test=test' in data
493
494 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
495 def test_redirect_loop(self, handler):
496 with handler() as rh:
497 with pytest.raises(HTTPError, match='redirect loop'):
498 validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_loop'))
499
500 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
501 def test_incompleteread(self, handler):
502 with handler(timeout=2) as rh:
503 with pytest.raises(IncompleteRead):
504 validate_and_send(rh, Request('http://127.0.0.1:%d/incompleteread' % self.http_port)).read()
505
506 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
507 def test_cookies(self, handler):
508 cookiejar = http.cookiejar.CookieJar()
509 cookiejar.set_cookie(http.cookiejar.Cookie(
510 0, 'test', 'ytdlp', None, False, '127.0.0.1', True,
511 False, '/headers', True, False, None, False, None, None, {}))
512
513 with handler(cookiejar=cookiejar) as rh:
514 data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read()
515 assert b'Cookie: test=ytdlp' in data
516
517 # Per request
518 with handler() as rh:
519 data = validate_and_send(
520 rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions={'cookiejar': cookiejar})).read()
521 assert b'Cookie: test=ytdlp' in data
522
523 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
524 def test_headers(self, handler):
525
526 with handler(headers=HTTPHeaderDict({'test1': 'test', 'test2': 'test2'})) as rh:
527 # Global Headers
528 data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read()
529 assert b'Test1: test' in data
530
531 # Per request headers, merged with global
532 data = validate_and_send(rh, Request(
533 f'http://127.0.0.1:{self.http_port}/headers', headers={'test2': 'changed', 'test3': 'test3'})).read()
534 assert b'Test1: test' in data
535 assert b'Test2: changed' in data
536 assert b'Test2: test2' not in data
537 assert b'Test3: test3' in data
538
539 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
540 def test_timeout(self, handler):
541 with handler() as rh:
542 # Default timeout is 20 seconds, so this should go through
543 validate_and_send(
544 rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_3'))
545
546 with handler(timeout=0.5) as rh:
547 with pytest.raises(TransportError):
548 validate_and_send(
549 rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1'))
550
551 # Per request timeout, should override handler timeout
552 validate_and_send(
553 rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1', extensions={'timeout': 4}))
554
555 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
556 def test_source_address(self, handler):
557 source_address = f'127.0.0.{random.randint(5, 255)}'
558 with handler(source_address=source_address) as rh:
559 data = validate_and_send(
560 rh, Request(f'http://127.0.0.1:{self.http_port}/source_address')).read().decode()
561 assert source_address == data
562
563 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
564 def test_gzip_trailing_garbage(self, handler):
565 with handler() as rh:
566 data = validate_and_send(rh, Request(f'http://localhost:{self.http_port}/trailing_garbage')).read().decode()
567 assert data == '<html><video src="/vid.mp4" /></html>'
568
569 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
570 @pytest.mark.skipif(not brotli, reason='brotli support is not installed')
571 def test_brotli(self, handler):
572 with handler() as rh:
573 res = validate_and_send(
574 rh, Request(
daafbf49 575 f'http://127.0.0.1:{self.http_port}/content-encoding',
576 headers={'ytdl-encoding': 'br'}))
227bf1a3 577 assert res.headers.get('Content-Encoding') == 'br'
578 assert res.read() == b'<html><video src="/vid.mp4" /></html>'
579
580 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
581 def test_deflate(self, handler):
582 with handler() as rh:
583 res = validate_and_send(
584 rh, Request(
daafbf49 585 f'http://127.0.0.1:{self.http_port}/content-encoding',
586 headers={'ytdl-encoding': 'deflate'}))
227bf1a3 587 assert res.headers.get('Content-Encoding') == 'deflate'
588 assert res.read() == b'<html><video src="/vid.mp4" /></html>'
589
590 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
591 def test_gzip(self, handler):
592 with handler() as rh:
593 res = validate_and_send(
594 rh, Request(
daafbf49 595 f'http://127.0.0.1:{self.http_port}/content-encoding',
596 headers={'ytdl-encoding': 'gzip'}))
227bf1a3 597 assert res.headers.get('Content-Encoding') == 'gzip'
598 assert res.read() == b'<html><video src="/vid.mp4" /></html>'
daafbf49 599
227bf1a3 600 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
601 def test_multiple_encodings(self, handler):
602 with handler() as rh:
daafbf49 603 for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'):
227bf1a3 604 res = validate_and_send(
605 rh, Request(
daafbf49 606 f'http://127.0.0.1:{self.http_port}/content-encoding',
607 headers={'ytdl-encoding': pair}))
227bf1a3 608 assert res.headers.get('Content-Encoding') == pair
609 assert res.read() == b'<html><video src="/vid.mp4" /></html>'
610
611 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
612 def test_unsupported_encoding(self, handler):
613 with handler() as rh:
614 res = validate_and_send(
615 rh, Request(
daafbf49 616 f'http://127.0.0.1:{self.http_port}/content-encoding',
617 headers={'ytdl-encoding': 'unsupported'}))
227bf1a3 618 assert res.headers.get('Content-Encoding') == 'unsupported'
619 assert res.read() == b'raw'
620
621 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
622 def test_read(self, handler):
623 with handler() as rh:
624 res = validate_and_send(
625 rh, Request(f'http://127.0.0.1:{self.http_port}/headers'))
626 assert res.readable()
627 assert res.read(1) == b'H'
628 assert res.read(3) == b'ost'
629
630
631class TestHTTPProxy(TestRequestHandlerBase):
632 @classmethod
633 def setup_class(cls):
634 super().setup_class()
635 # HTTP Proxy server
636 cls.proxy = http.server.ThreadingHTTPServer(
637 ('127.0.0.1', 0), _build_proxy_handler('normal'))
638 cls.proxy_port = http_server_port(cls.proxy)
639 cls.proxy_thread = threading.Thread(target=cls.proxy.serve_forever)
640 cls.proxy_thread.daemon = True
641 cls.proxy_thread.start()
daafbf49 642
227bf1a3 643 # Geo proxy server
644 cls.geo_proxy = http.server.ThreadingHTTPServer(
645 ('127.0.0.1', 0), _build_proxy_handler('geo'))
646 cls.geo_port = http_server_port(cls.geo_proxy)
647 cls.geo_proxy_thread = threading.Thread(target=cls.geo_proxy.serve_forever)
648 cls.geo_proxy_thread.daemon = True
649 cls.geo_proxy_thread.start()
650
651 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
652 def test_http_proxy(self, handler):
653 http_proxy = f'http://127.0.0.1:{self.proxy_port}'
654 geo_proxy = f'http://127.0.0.1:{self.geo_port}'
655
656 # Test global http proxy
657 # Test per request http proxy
658 # Test per request http proxy disables proxy
659 url = 'http://foo.com/bar'
01218f91 660
227bf1a3 661 # Global HTTP proxy
662 with handler(proxies={'http': http_proxy}) as rh:
663 res = validate_and_send(rh, Request(url)).read().decode()
664 assert res == f'normal: {url}'
665
666 # Per request proxy overrides global
667 res = validate_and_send(rh, Request(url, proxies={'http': geo_proxy})).read().decode()
668 assert res == f'geo: {url}'
669
670 # and setting to None disables all proxies for that request
671 real_url = f'http://127.0.0.1:{self.http_port}/headers'
672 res = validate_and_send(
673 rh, Request(real_url, proxies={'http': None})).read().decode()
674 assert res != f'normal: {real_url}'
675 assert 'Accept' in res
676
677 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
678 def test_noproxy(self, handler):
679 with handler(proxies={'proxy': f'http://127.0.0.1:{self.proxy_port}'}) as rh:
680 # NO_PROXY
681 for no_proxy in (f'127.0.0.1:{self.http_port}', '127.0.0.1', 'localhost'):
682 nop_response = validate_and_send(
683 rh, Request(f'http://127.0.0.1:{self.http_port}/headers', proxies={'no': no_proxy})).read().decode(
684 'utf-8')
685 assert 'Accept' in nop_response
686
687 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
688 def test_allproxy(self, handler):
689 url = 'http://foo.com/bar'
690 with handler() as rh:
691 response = validate_and_send(rh, Request(url, proxies={'all': f'http://127.0.0.1:{self.proxy_port}'})).read().decode(
692 'utf-8')
693 assert response == f'normal: {url}'
694
695 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
696 def test_http_proxy_with_idn(self, handler):
697 with handler(proxies={
698 'http': f'http://127.0.0.1:{self.proxy_port}',
699 }) as rh:
700 url = 'http://中文.tw/'
701 response = rh.send(Request(url)).read().decode()
702 # b'xn--fiq228c' is '中文'.encode('idna')
703 assert response == 'normal: http://xn--fiq228c.tw/'
704
705
706class TestClientCertificate:
707
708 @classmethod
709 def setup_class(cls):
bb58c9ed 710 certfn = os.path.join(TEST_DIR, 'testcert.pem')
227bf1a3 711 cls.certdir = os.path.join(TEST_DIR, 'testdata', 'certificate')
712 cacertfn = os.path.join(cls.certdir, 'ca.crt')
713 cls.httpd = http.server.ThreadingHTTPServer(('127.0.0.1', 0), HTTPTestRequestHandler)
bb58c9ed 714 sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
715 sslctx.verify_mode = ssl.CERT_REQUIRED
716 sslctx.load_verify_locations(cafile=cacertfn)
717 sslctx.load_cert_chain(certfn, None)
227bf1a3 718 cls.httpd.socket = sslctx.wrap_socket(cls.httpd.socket, server_side=True)
719 cls.port = http_server_port(cls.httpd)
720 cls.server_thread = threading.Thread(target=cls.httpd.serve_forever)
721 cls.server_thread.daemon = True
722 cls.server_thread.start()
723
724 def _run_test(self, handler, **handler_kwargs):
725 with handler(
bb58c9ed 726 # Disable client-side validation of unacceptable self-signed testcert.pem
727 # The test is of a check on the server side, so unaffected
227bf1a3 728 verify=False,
729 **handler_kwargs,
730 ) as rh:
731 validate_and_send(rh, Request(f'https://127.0.0.1:{self.port}/video.html')).read().decode()
732
733 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
734 def test_certificate_combined_nopass(self, handler):
735 self._run_test(handler, client_cert={
736 'client_certificate': os.path.join(self.certdir, 'clientwithkey.crt'),
bb58c9ed 737 })
bb58c9ed 738
227bf1a3 739 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
740 def test_certificate_nocombined_nopass(self, handler):
741 self._run_test(handler, client_cert={
742 'client_certificate': os.path.join(self.certdir, 'client.crt'),
743 'client_certificate_key': os.path.join(self.certdir, 'client.key'),
744 })
bb58c9ed 745
227bf1a3 746 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
747 def test_certificate_combined_pass(self, handler):
748 self._run_test(handler, client_cert={
749 'client_certificate': os.path.join(self.certdir, 'clientwithencryptedkey.crt'),
750 'client_certificate_password': 'foobar',
751 })
bb58c9ed 752
227bf1a3 753 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
754 def test_certificate_nocombined_pass(self, handler):
755 self._run_test(handler, client_cert={
756 'client_certificate': os.path.join(self.certdir, 'client.crt'),
757 'client_certificate_key': os.path.join(self.certdir, 'clientencrypted.key'),
758 'client_certificate_password': 'foobar',
759 })
bb58c9ed 760
bb58c9ed 761
227bf1a3 762class TestUrllibRequestHandler(TestRequestHandlerBase):
763 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
764 def test_file_urls(self, handler):
765 # See https://github.com/ytdl-org/youtube-dl/issues/8227
766 tf = tempfile.NamedTemporaryFile(delete=False)
767 tf.write(b'foobar')
768 tf.close()
769 req = Request(pathlib.Path(tf.name).as_uri())
770 with handler() as rh:
771 with pytest.raises(UnsupportedRequest):
772 rh.validate(req)
773
774 # Test that urllib never loaded FileHandler
775 with pytest.raises(TransportError):
776 rh.send(req)
777
778 with handler(enable_file_urls=True) as rh:
779 res = validate_and_send(rh, req)
780 assert res.read() == b'foobar'
781 res.close()
bb58c9ed 782
227bf1a3 783 os.unlink(tf.name)
01218f91 784
227bf1a3 785 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
786 def test_http_error_returns_content(self, handler):
787 # urllib HTTPError will try close the underlying response if reference to the HTTPError object is lost
788 def get_response():
789 with handler() as rh:
790 # headers url
791 try:
792 validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_404'))
793 except HTTPError as e:
794 return e.response
795
796 assert get_response().read() == b'<html></html>'
797
798 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
799 def test_verify_cert_error_text(self, handler):
800 # Check the output of the error message
801 with handler() as rh:
802 with pytest.raises(
803 CertificateVerifyError,
804 match=r'\[SSL: CERTIFICATE_VERIFY_FAILED\] certificate verify failed: self.signed certificate'
805 ):
806 validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
807
808 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
95abea9a 809 @pytest.mark.parametrize('req,match,version_check', [
810 # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1256
811 # bpo-39603: Check implemented in 3.7.9+, 3.8.5+
812 (
813 Request('http://127.0.0.1', method='GET\n'),
814 'method can\'t contain control characters',
815 lambda v: v < (3, 7, 9) or (3, 8, 0) <= v < (3, 8, 5)
816 ),
817 # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1265
818 # bpo-38576: Check implemented in 3.7.8+, 3.8.3+
819 (
820 Request('http://127.0.0. 1', method='GET'),
821 'URL can\'t contain control characters',
822 lambda v: v < (3, 7, 8) or (3, 8, 0) <= v < (3, 8, 3)
823 ),
824 # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1288C31-L1288C50
825 (Request('http://127.0.0.1', headers={'foo\n': 'bar'}), 'Invalid header name', None),
826 ])
827 def test_httplib_validation_errors(self, handler, req, match, version_check):
828 if version_check and version_check(sys.version_info):
829 pytest.skip(f'Python {sys.version} version does not have the required validation for this test.')
227bf1a3 830
95abea9a 831 with handler() as rh:
832 with pytest.raises(RequestError, match=match) as exc_info:
833 validate_and_send(rh, req)
227bf1a3 834 assert not isinstance(exc_info.value, TransportError)
835
836
86aea0d3 837def run_validation(handler, error, req, **handler_kwargs):
227bf1a3 838 with handler(**handler_kwargs) as rh:
86aea0d3 839 if error:
840 with pytest.raises(error):
227bf1a3 841 rh.validate(req)
842 else:
843 rh.validate(req)
844
845
846class TestRequestHandlerValidation:
847
848 class ValidationRH(RequestHandler):
849 def _send(self, request):
850 raise RequestError('test')
851
852 class NoCheckRH(ValidationRH):
853 _SUPPORTED_FEATURES = None
854 _SUPPORTED_PROXY_SCHEMES = None
855 _SUPPORTED_URL_SCHEMES = None
856
86aea0d3 857 def _check_extensions(self, extensions):
858 extensions.clear()
859
227bf1a3 860 class HTTPSupportedRH(ValidationRH):
861 _SUPPORTED_URL_SCHEMES = ('http',)
862
863 URL_SCHEME_TESTS = [
864 # scheme, expected to fail, handler kwargs
865 ('Urllib', [
866 ('http', False, {}),
867 ('https', False, {}),
868 ('data', False, {}),
869 ('ftp', False, {}),
86aea0d3 870 ('file', UnsupportedRequest, {}),
227bf1a3 871 ('file', False, {'enable_file_urls': True}),
872 ]),
873 (NoCheckRH, [('http', False, {})]),
86aea0d3 874 (ValidationRH, [('http', UnsupportedRequest, {})])
227bf1a3 875 ]
876
877 PROXY_SCHEME_TESTS = [
878 # scheme, expected to fail
879 ('Urllib', [
880 ('http', False),
86aea0d3 881 ('https', UnsupportedRequest),
227bf1a3 882 ('socks4', False),
883 ('socks4a', False),
884 ('socks5', False),
885 ('socks5h', False),
86aea0d3 886 ('socks', UnsupportedRequest),
227bf1a3 887 ]),
888 (NoCheckRH, [('http', False)]),
86aea0d3 889 (HTTPSupportedRH, [('http', UnsupportedRequest)]),
227bf1a3 890 ]
891
892 PROXY_KEY_TESTS = [
893 # key, expected to fail
894 ('Urllib', [
895 ('all', False),
896 ('unrelated', False),
897 ]),
898 (NoCheckRH, [('all', False)]),
86aea0d3 899 (HTTPSupportedRH, [('all', UnsupportedRequest)]),
900 (HTTPSupportedRH, [('no', UnsupportedRequest)]),
901 ]
902
903 EXTENSION_TESTS = [
904 ('Urllib', [
905 ({'cookiejar': 'notacookiejar'}, AssertionError),
906 ({'cookiejar': CookieJar()}, False),
907 ({'timeout': 1}, False),
908 ({'timeout': 'notatimeout'}, AssertionError),
909 ({'unsupported': 'value'}, UnsupportedRequest),
910 ]),
911 (NoCheckRH, [
912 ({'cookiejar': 'notacookiejar'}, False),
913 ({'somerandom': 'test'}, False), # but any extension is allowed through
914 ]),
227bf1a3 915 ]
916
917 @pytest.mark.parametrize('handler,scheme,fail,handler_kwargs', [
918 (handler_tests[0], scheme, fail, handler_kwargs)
919 for handler_tests in URL_SCHEME_TESTS
920 for scheme, fail, handler_kwargs in handler_tests[1]
921
922 ], indirect=['handler'])
923 def test_url_scheme(self, handler, scheme, fail, handler_kwargs):
924 run_validation(handler, fail, Request(f'{scheme}://'), **(handler_kwargs or {}))
925
926 @pytest.mark.parametrize('handler,fail', [('Urllib', False)], indirect=['handler'])
927 def test_no_proxy(self, handler, fail):
928 run_validation(handler, fail, Request('http://', proxies={'no': '127.0.0.1,github.com'}))
929 run_validation(handler, fail, Request('http://'), proxies={'no': '127.0.0.1,github.com'})
930
931 @pytest.mark.parametrize('handler,proxy_key,fail', [
932 (handler_tests[0], proxy_key, fail)
933 for handler_tests in PROXY_KEY_TESTS
934 for proxy_key, fail in handler_tests[1]
935 ], indirect=['handler'])
936 def test_proxy_key(self, handler, proxy_key, fail):
937 run_validation(handler, fail, Request('http://', proxies={proxy_key: 'http://example.com'}))
938 run_validation(handler, fail, Request('http://'), proxies={proxy_key: 'http://example.com'})
939
940 @pytest.mark.parametrize('handler,scheme,fail', [
941 (handler_tests[0], scheme, fail)
942 for handler_tests in PROXY_SCHEME_TESTS
943 for scheme, fail in handler_tests[1]
944 ], indirect=['handler'])
945 def test_proxy_scheme(self, handler, scheme, fail):
946 run_validation(handler, fail, Request('http://', proxies={'http': f'{scheme}://example.com'}))
947 run_validation(handler, fail, Request('http://'), proxies={'http': f'{scheme}://example.com'})
948
949 @pytest.mark.parametrize('handler', ['Urllib', HTTPSupportedRH], indirect=True)
950 def test_empty_proxy(self, handler):
951 run_validation(handler, False, Request('http://', proxies={'http': None}))
952 run_validation(handler, False, Request('http://'), proxies={'http': None})
953
bbeacff7 954 @pytest.mark.parametrize('proxy_url', ['//example.com', 'example.com', '127.0.0.1', '/a/b/c'])
227bf1a3 955 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
bbeacff7 956 def test_invalid_proxy_url(self, handler, proxy_url):
957 run_validation(handler, UnsupportedRequest, Request('http://', proxies={'http': proxy_url}))
227bf1a3 958
86aea0d3 959 @pytest.mark.parametrize('handler,extensions,fail', [
960 (handler_tests[0], extensions, fail)
961 for handler_tests in EXTENSION_TESTS
962 for extensions, fail in handler_tests[1]
963 ], indirect=['handler'])
964 def test_extension(self, handler, extensions, fail):
965 run_validation(
966 handler, fail, Request('http://', extensions=extensions))
227bf1a3 967
968 def test_invalid_request_type(self):
969 rh = self.ValidationRH(logger=FakeLogger())
970 for method in (rh.validate, rh.send):
971 with pytest.raises(TypeError, match='Expected an instance of Request'):
972 method('not a request')
973
974
975class FakeResponse(Response):
976 def __init__(self, request):
977 # XXX: we could make request part of standard response interface
978 self.request = request
979 super().__init__(fp=io.BytesIO(b''), headers={}, url=request.url)
980
981
982class FakeRH(RequestHandler):
983
984 def _validate(self, request):
985 return
986
987 def _send(self, request: Request):
988 if request.url.startswith('ssl://'):
989 raise SSLError(request.url[len('ssl://'):])
990 return FakeResponse(request)
991
992
993class FakeRHYDL(FakeYDL):
994 def __init__(self, *args, **kwargs):
995 super().__init__(*args, **kwargs)
996 self._request_director = self.build_request_director([FakeRH])
997
998
999class TestRequestDirector:
1000
1001 def test_handler_operations(self):
1002 director = RequestDirector(logger=FakeLogger())
1003 handler = FakeRH(logger=FakeLogger())
1004 director.add_handler(handler)
1005 assert director.handlers.get(FakeRH.RH_KEY) is handler
1006
1007 # Handler should overwrite
1008 handler2 = FakeRH(logger=FakeLogger())
1009 director.add_handler(handler2)
1010 assert director.handlers.get(FakeRH.RH_KEY) is not handler
1011 assert director.handlers.get(FakeRH.RH_KEY) is handler2
1012 assert len(director.handlers) == 1
1013
1014 class AnotherFakeRH(FakeRH):
01218f91 1015 pass
227bf1a3 1016 director.add_handler(AnotherFakeRH(logger=FakeLogger()))
1017 assert len(director.handlers) == 2
1018 assert director.handlers.get(AnotherFakeRH.RH_KEY).RH_KEY == AnotherFakeRH.RH_KEY
01218f91 1019
227bf1a3 1020 director.handlers.pop(FakeRH.RH_KEY, None)
1021 assert director.handlers.get(FakeRH.RH_KEY) is None
1022 assert len(director.handlers) == 1
01218f91 1023
227bf1a3 1024 # RequestErrors should passthrough
1025 with pytest.raises(SSLError):
1026 director.send(Request('ssl://something'))
01218f91 1027
227bf1a3 1028 def test_send(self):
1029 director = RequestDirector(logger=FakeLogger())
1030 with pytest.raises(RequestError):
1031 director.send(Request('any://'))
1032 director.add_handler(FakeRH(logger=FakeLogger()))
1033 assert isinstance(director.send(Request('http://')), FakeResponse)
01218f91 1034
227bf1a3 1035 def test_unsupported_handlers(self):
1036 director = RequestDirector(logger=FakeLogger())
1037 director.add_handler(FakeRH(logger=FakeLogger()))
01218f91 1038
227bf1a3 1039 class SupportedRH(RequestHandler):
1040 _SUPPORTED_URL_SCHEMES = ['http']
01218f91 1041
227bf1a3 1042 def _send(self, request: Request):
1043 return Response(fp=io.BytesIO(b'supported'), headers={}, url=request.url)
efbed08d 1044
227bf1a3 1045 # This handler should by default take preference over FakeRH
1046 director.add_handler(SupportedRH(logger=FakeLogger()))
1047 assert director.send(Request('http://')).read() == b'supported'
1048 assert director.send(Request('any://')).read() == b''
582be358 1049
227bf1a3 1050 director.handlers.pop(FakeRH.RH_KEY)
1051 with pytest.raises(NoSupportingHandlers):
1052 director.send(Request('any://'))
1053
1054 def test_unexpected_error(self):
1055 director = RequestDirector(logger=FakeLogger())
1056
1057 class UnexpectedRH(FakeRH):
1058 def _send(self, request: Request):
1059 raise TypeError('something')
1060
1061 director.add_handler(UnexpectedRH(logger=FakeLogger))
1062 with pytest.raises(NoSupportingHandlers, match=r'1 unexpected error'):
1063 director.send(Request('any://'))
1064
1065 director.handlers.clear()
1066 assert len(director.handlers) == 0
1067
1068 # Should not be fatal
1069 director.add_handler(FakeRH(logger=FakeLogger()))
1070 director.add_handler(UnexpectedRH(logger=FakeLogger))
1071 assert director.send(Request('any://'))
1072
1073
1074# XXX: do we want to move this to test_YoutubeDL.py?
1075class TestYoutubeDLNetworking:
1076
1077 @staticmethod
1078 def build_handler(ydl, handler: RequestHandler = FakeRH):
1079 return ydl.build_request_director([handler]).handlers.get(handler.RH_KEY)
1080
1081 def test_compat_opener(self):
08916a49 1082 with FakeYDL() as ydl:
227bf1a3 1083 with warnings.catch_warnings():
1084 warnings.simplefilter('ignore', category=DeprecationWarning)
1085 assert isinstance(ydl._opener, urllib.request.OpenerDirector)
1086
1087 @pytest.mark.parametrize('proxy,expected', [
1088 ('http://127.0.0.1:8080', {'all': 'http://127.0.0.1:8080'}),
1089 ('', {'all': '__noproxy__'}),
1090 (None, {'http': 'http://127.0.0.1:8081', 'https': 'http://127.0.0.1:8081'}) # env, set https
1091 ])
1092 def test_proxy(self, proxy, expected):
1093 old_http_proxy = os.environ.get('HTTP_PROXY')
1094 try:
1095 os.environ['HTTP_PROXY'] = 'http://127.0.0.1:8081' # ensure that provided proxies override env
1096 with FakeYDL({'proxy': proxy}) as ydl:
1097 assert ydl.proxies == expected
1098 finally:
1099 if old_http_proxy:
1100 os.environ['HTTP_PROXY'] = old_http_proxy
1101
1102 def test_compat_request(self):
1103 with FakeRHYDL() as ydl:
1104 assert ydl.urlopen('test://')
1105 urllib_req = urllib.request.Request('http://foo.bar', data=b'test', method='PUT', headers={'X-Test': '1'})
1106 urllib_req.add_unredirected_header('Cookie', 'bob=bob')
1107 urllib_req.timeout = 2
3d2623a8 1108 with warnings.catch_warnings():
1109 warnings.simplefilter('ignore', category=DeprecationWarning)
1110 req = ydl.urlopen(urllib_req).request
1111 assert req.url == urllib_req.get_full_url()
1112 assert req.data == urllib_req.data
1113 assert req.method == urllib_req.get_method()
1114 assert 'X-Test' in req.headers
1115 assert 'Cookie' in req.headers
1116 assert req.extensions.get('timeout') == 2
227bf1a3 1117
1118 with pytest.raises(AssertionError):
1119 ydl.urlopen(None)
1120
1121 def test_extract_basic_auth(self):
1122 with FakeRHYDL() as ydl:
1123 res = ydl.urlopen(Request('http://user:pass@foo.bar'))
1124 assert res.request.headers['Authorization'] == 'Basic dXNlcjpwYXNz'
1125
1126 def test_sanitize_url(self):
1127 with FakeRHYDL() as ydl:
1128 res = ydl.urlopen(Request('httpss://foo.bar'))
1129 assert res.request.url == 'https://foo.bar'
1130
1131 def test_file_urls_error(self):
1132 # use urllib handler
1133 with FakeYDL() as ydl:
1134 with pytest.raises(RequestError, match=r'file:// URLs are disabled by default'):
1135 ydl.urlopen('file://')
1136
1137 def test_legacy_server_connect_error(self):
1138 with FakeRHYDL() as ydl:
1139 for error in ('UNSAFE_LEGACY_RENEGOTIATION_DISABLED', 'SSLV3_ALERT_HANDSHAKE_FAILURE'):
1140 with pytest.raises(RequestError, match=r'Try using --legacy-server-connect'):
1141 ydl.urlopen(f'ssl://{error}')
1142
1143 with pytest.raises(SSLError, match='testerror'):
1144 ydl.urlopen('ssl://testerror')
1145
1146 @pytest.mark.parametrize('proxy_key,proxy_url,expected', [
1147 ('http', '__noproxy__', None),
1148 ('no', '127.0.0.1,foo.bar', '127.0.0.1,foo.bar'),
1149 ('https', 'example.com', 'http://example.com'),
bbeacff7 1150 ('https', '//example.com', 'http://example.com'),
227bf1a3 1151 ('https', 'socks5://example.com', 'socks5h://example.com'),
1152 ('http', 'socks://example.com', 'socks4://example.com'),
1153 ('http', 'socks4://example.com', 'socks4://example.com'),
bbeacff7 1154 ('unrelated', '/bad/proxy', '/bad/proxy'), # clean_proxies should ignore bad proxies
227bf1a3 1155 ])
1156 def test_clean_proxy(self, proxy_key, proxy_url, expected):
1157 # proxies should be cleaned in urlopen()
1158 with FakeRHYDL() as ydl:
1159 req = ydl.urlopen(Request('test://', proxies={proxy_key: proxy_url})).request
1160 assert req.proxies[proxy_key] == expected
1161
1162 # and should also be cleaned when building the handler
1163 env_key = f'{proxy_key.upper()}_PROXY'
1164 old_env_proxy = os.environ.get(env_key)
1165 try:
1166 os.environ[env_key] = proxy_url # ensure that provided proxies override env
1167 with FakeYDL() as ydl:
1168 rh = self.build_handler(ydl)
1169 assert rh.proxies[proxy_key] == expected
1170 finally:
1171 if old_env_proxy:
1172 os.environ[env_key] = old_env_proxy
1173
1174 def test_clean_proxy_header(self):
1175 with FakeRHYDL() as ydl:
1176 req = ydl.urlopen(Request('test://', headers={'ytdl-request-proxy': '//foo.bar'})).request
1177 assert 'ytdl-request-proxy' not in req.headers
1178 assert req.proxies == {'all': 'http://foo.bar'}
1179
1180 with FakeYDL({'http_headers': {'ytdl-request-proxy': '//foo.bar'}}) as ydl:
1181 rh = self.build_handler(ydl)
1182 assert 'ytdl-request-proxy' not in rh.headers
1183 assert rh.proxies == {'all': 'http://foo.bar'}
1184
1185 def test_clean_header(self):
1186 with FakeRHYDL() as ydl:
1187 res = ydl.urlopen(Request('test://', headers={'Youtubedl-no-compression': True}))
1188 assert 'Youtubedl-no-compression' not in res.request.headers
1189 assert res.request.headers.get('Accept-Encoding') == 'identity'
1190
1191 with FakeYDL({'http_headers': {'Youtubedl-no-compression': True}}) as ydl:
1192 rh = self.build_handler(ydl)
1193 assert 'Youtubedl-no-compression' not in rh.headers
1194 assert rh.headers.get('Accept-Encoding') == 'identity'
1195
1196 def test_build_handler_params(self):
1197 with FakeYDL({
1198 'http_headers': {'test': 'testtest'},
1199 'socket_timeout': 2,
1200 'proxy': 'http://127.0.0.1:8080',
1201 'source_address': '127.0.0.45',
1202 'debug_printtraffic': True,
1203 'compat_opts': ['no-certifi'],
1204 'nocheckcertificate': True,
75dc8e67 1205 'legacyserverconnect': True,
227bf1a3 1206 }) as ydl:
1207 rh = self.build_handler(ydl)
1208 assert rh.headers.get('test') == 'testtest'
1209 assert 'Accept' in rh.headers # ensure std_headers are still there
1210 assert rh.timeout == 2
1211 assert rh.proxies.get('all') == 'http://127.0.0.1:8080'
1212 assert rh.source_address == '127.0.0.45'
1213 assert rh.verbose is True
1214 assert rh.prefer_system_certs is True
1215 assert rh.verify is False
1216 assert rh.legacy_ssl_support is True
1217
1218 @pytest.mark.parametrize('ydl_params', [
1219 {'client_certificate': 'fakecert.crt'},
1220 {'client_certificate': 'fakecert.crt', 'client_certificate_key': 'fakekey.key'},
1221 {'client_certificate': 'fakecert.crt', 'client_certificate_key': 'fakekey.key', 'client_certificate_password': 'foobar'},
1222 {'client_certificate_key': 'fakekey.key', 'client_certificate_password': 'foobar'},
1223 ])
1224 def test_client_certificate(self, ydl_params):
1225 with FakeYDL(ydl_params) as ydl:
1226 rh = self.build_handler(ydl)
1227 assert rh._client_cert == ydl_params # XXX: Too bound to implementation
1228
1229 def test_urllib_file_urls(self):
1230 with FakeYDL({'enable_file_urls': False}) as ydl:
1231 rh = self.build_handler(ydl, UrllibRH)
1232 assert rh.enable_file_urls is False
08916a49 1233
227bf1a3 1234 with FakeYDL({'enable_file_urls': True}) as ydl:
1235 rh = self.build_handler(ydl, UrllibRH)
1236 assert rh.enable_file_urls is True
1237
1238
1239class TestRequest:
1240
1241 def test_query(self):
1242 req = Request('http://example.com?q=something', query={'v': 'xyz'})
1243 assert req.url == 'http://example.com?q=something&v=xyz'
1244
1245 req.update(query={'v': '123'})
1246 assert req.url == 'http://example.com?q=something&v=123'
1247 req.update(url='http://example.com', query={'v': 'xyz'})
1248 assert req.url == 'http://example.com?v=xyz'
1249
1250 def test_method(self):
1251 req = Request('http://example.com')
1252 assert req.method == 'GET'
1253 req.data = b'test'
1254 assert req.method == 'POST'
1255 req.data = None
1256 assert req.method == 'GET'
1257 req.data = b'test2'
1258 req.method = 'PUT'
1259 assert req.method == 'PUT'
1260 req.data = None
1261 assert req.method == 'PUT'
1262 with pytest.raises(TypeError):
1263 req.method = 1
1264
1265 def test_request_helpers(self):
1266 assert HEADRequest('http://example.com').method == 'HEAD'
1267 assert PUTRequest('http://example.com').method == 'PUT'
1268
1269 def test_headers(self):
1270 req = Request('http://example.com', headers={'tesT': 'test'})
1271 assert req.headers == HTTPHeaderDict({'test': 'test'})
1272 req.update(headers={'teSt2': 'test2'})
1273 assert req.headers == HTTPHeaderDict({'test': 'test', 'test2': 'test2'})
1274
1275 req.headers = new_headers = HTTPHeaderDict({'test': 'test'})
1276 assert req.headers == HTTPHeaderDict({'test': 'test'})
1277 assert req.headers is new_headers
1278
1279 # test converts dict to case insensitive dict
1280 req.headers = new_headers = {'test2': 'test2'}
1281 assert isinstance(req.headers, HTTPHeaderDict)
1282 assert req.headers is not new_headers
1283
1284 with pytest.raises(TypeError):
1285 req.headers = None
1286
1287 def test_data_type(self):
1288 req = Request('http://example.com')
1289 assert req.data is None
1290 # test bytes is allowed
1291 req.data = b'test'
1292 assert req.data == b'test'
1293 # test iterable of bytes is allowed
1294 i = [b'test', b'test2']
1295 req.data = i
1296 assert req.data == i
1297
1298 # test file-like object is allowed
1299 f = io.BytesIO(b'test')
1300 req.data = f
1301 assert req.data == f
1302
1303 # common mistake: test str not allowed
1304 with pytest.raises(TypeError):
1305 req.data = 'test'
1306 assert req.data != 'test'
1307
1308 # common mistake: test dict is not allowed
1309 with pytest.raises(TypeError):
1310 req.data = {'test': 'test'}
1311 assert req.data != {'test': 'test'}
1312
1313 def test_content_length_header(self):
1314 req = Request('http://example.com', headers={'Content-Length': '0'}, data=b'')
1315 assert req.headers.get('Content-Length') == '0'
1316
1317 req.data = b'test'
1318 assert 'Content-Length' not in req.headers
1319
1320 req = Request('http://example.com', headers={'Content-Length': '10'})
1321 assert 'Content-Length' not in req.headers
1322
1323 def test_content_type_header(self):
1324 req = Request('http://example.com', headers={'Content-Type': 'test'}, data=b'test')
1325 assert req.headers.get('Content-Type') == 'test'
1326 req.data = b'test2'
1327 assert req.headers.get('Content-Type') == 'test'
1328 req.data = None
1329 assert 'Content-Type' not in req.headers
1330 req.data = b'test3'
1331 assert req.headers.get('Content-Type') == 'application/x-www-form-urlencoded'
1332
71baa490 1333 def test_update_req(self):
1334 req = Request('http://example.com')
1335 assert req.data is None
1336 assert req.method == 'GET'
1337 assert 'Content-Type' not in req.headers
1338 # Test that zero-byte payloads will be sent
1339 req.update(data=b'')
1340 assert req.data == b''
1341 assert req.method == 'POST'
1342 assert req.headers.get('Content-Type') == 'application/x-www-form-urlencoded'
1343
227bf1a3 1344 def test_proxies(self):
1345 req = Request(url='http://example.com', proxies={'http': 'http://127.0.0.1:8080'})
1346 assert req.proxies == {'http': 'http://127.0.0.1:8080'}
1347
1348 def test_extensions(self):
1349 req = Request(url='http://example.com', extensions={'timeout': 2})
1350 assert req.extensions == {'timeout': 2}
1351
1352 def test_copy(self):
1353 req = Request(
1354 url='http://example.com',
1355 extensions={'cookiejar': CookieJar()},
1356 headers={'Accept-Encoding': 'br'},
1357 proxies={'http': 'http://127.0.0.1'},
1358 data=[b'123']
1359 )
1360 req_copy = req.copy()
1361 assert req_copy is not req
1362 assert req_copy.url == req.url
1363 assert req_copy.headers == req.headers
1364 assert req_copy.headers is not req.headers
1365 assert req_copy.proxies == req.proxies
1366 assert req_copy.proxies is not req.proxies
1367
1368 # Data is not able to be copied
1369 assert req_copy.data == req.data
1370 assert req_copy.data is req.data
1371
1372 # Shallow copy extensions
1373 assert req_copy.extensions is not req.extensions
1374 assert req_copy.extensions['cookiejar'] == req.extensions['cookiejar']
1375
1376 # Subclasses are copied by default
1377 class AnotherRequest(Request):
1378 pass
08916a49 1379
227bf1a3 1380 req = AnotherRequest(url='http://127.0.0.1')
1381 assert isinstance(req.copy(), AnotherRequest)
1382
1383 def test_url(self):
1384 req = Request(url='https://фtest.example.com/ some spaceв?ä=c',)
1385 assert req.url == 'https://xn--test-z6d.example.com/%20some%20space%D0%B2?%C3%A4=c'
1386
1387 assert Request(url='//example.com').url == 'http://example.com'
1388
1389 with pytest.raises(TypeError):
1390 Request(url='https://').url = None
1391
1392
1393class TestResponse:
1394
1395 @pytest.mark.parametrize('reason,status,expected', [
1396 ('custom', 200, 'custom'),
1397 (None, 404, 'Not Found'), # fallback status
1398 ('', 403, 'Forbidden'),
1399 (None, 999, None)
1400 ])
1401 def test_reason(self, reason, status, expected):
1402 res = Response(io.BytesIO(b''), url='test://', headers={}, status=status, reason=reason)
1403 assert res.reason == expected
1404
1405 def test_headers(self):
1406 headers = Message()
1407 headers.add_header('Test', 'test')
1408 headers.add_header('Test', 'test2')
1409 headers.add_header('content-encoding', 'br')
1410 res = Response(io.BytesIO(b''), headers=headers, url='test://')
1411 assert res.headers.get_all('test') == ['test', 'test2']
1412 assert 'Content-Encoding' in res.headers
1413
1414 def test_get_header(self):
1415 headers = Message()
1416 headers.add_header('Set-Cookie', 'cookie1')
1417 headers.add_header('Set-cookie', 'cookie2')
1418 headers.add_header('Test', 'test')
1419 headers.add_header('Test', 'test2')
1420 res = Response(io.BytesIO(b''), headers=headers, url='test://')
1421 assert res.get_header('test') == 'test, test2'
1422 assert res.get_header('set-Cookie') == 'cookie1'
1423 assert res.get_header('notexist', 'default') == 'default'
1424
1425 def test_compat(self):
1426 res = Response(io.BytesIO(b''), url='test://', status=404, headers={'test': 'test'})
3d2623a8 1427 with warnings.catch_warnings():
1428 warnings.simplefilter('ignore', category=DeprecationWarning)
1429 assert res.code == res.getcode() == res.status
1430 assert res.geturl() == res.url
1431 assert res.info() is res.headers
1432 assert res.getheader('test') == res.get_header('test')