]> jfr.im git - yt-dlp.git/blob - test/test_networking.py
[networking] Ignore invalid proxies in env (#7704)
[yt-dlp.git] / test / test_networking.py
1 #!/usr/bin/env python3
2
3 # Allow direct execution
4 import os
5 import sys
6
7 import pytest
8
9 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
10
11 import functools
12 import gzip
13 import http.client
14 import http.cookiejar
15 import http.server
16 import inspect
17 import io
18 import pathlib
19 import random
20 import ssl
21 import tempfile
22 import threading
23 import time
24 import urllib.error
25 import urllib.request
26 import warnings
27 import zlib
28 from email.message import Message
29 from http.cookiejar import CookieJar
30
31 from test.helper import FakeYDL, http_server_port
32 from yt_dlp.dependencies import brotli
33 from yt_dlp.networking import (
34 HEADRequest,
35 PUTRequest,
36 Request,
37 RequestDirector,
38 RequestHandler,
39 Response,
40 )
41 from yt_dlp.networking._urllib import UrllibRH
42 from yt_dlp.networking.common import _REQUEST_HANDLERS
43 from yt_dlp.networking.exceptions import (
44 CertificateVerifyError,
45 HTTPError,
46 IncompleteRead,
47 NoSupportingHandlers,
48 RequestError,
49 SSLError,
50 TransportError,
51 UnsupportedRequest,
52 )
53 from yt_dlp.utils._utils import _YDLLogger as FakeLogger
54 from yt_dlp.utils.networking import HTTPHeaderDict
55
56 TEST_DIR = os.path.dirname(os.path.abspath(__file__))
57
58
59 def _build_proxy_handler(name):
60 class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
61 proxy_name = name
62
63 def log_message(self, format, *args):
64 pass
65
66 def do_GET(self):
67 self.send_response(200)
68 self.send_header('Content-Type', 'text/plain; charset=utf-8')
69 self.end_headers()
70 self.wfile.write('{self.proxy_name}: {self.path}'.format(self=self).encode())
71 return HTTPTestRequestHandler
72
73
74 class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
75 protocol_version = 'HTTP/1.1'
76
77 def log_message(self, format, *args):
78 pass
79
80 def _headers(self):
81 payload = str(self.headers).encode()
82 self.send_response(200)
83 self.send_header('Content-Type', 'application/json')
84 self.send_header('Content-Length', str(len(payload)))
85 self.end_headers()
86 self.wfile.write(payload)
87
88 def _redirect(self):
89 self.send_response(int(self.path[len('/redirect_'):]))
90 self.send_header('Location', '/method')
91 self.send_header('Content-Length', '0')
92 self.end_headers()
93
94 def _method(self, method, payload=None):
95 self.send_response(200)
96 self.send_header('Content-Length', str(len(payload or '')))
97 self.send_header('Method', method)
98 self.end_headers()
99 if payload:
100 self.wfile.write(payload)
101
102 def _status(self, status):
103 payload = f'<html>{status} NOT FOUND</html>'.encode()
104 self.send_response(int(status))
105 self.send_header('Content-Type', 'text/html; charset=utf-8')
106 self.send_header('Content-Length', str(len(payload)))
107 self.end_headers()
108 self.wfile.write(payload)
109
110 def _read_data(self):
111 if 'Content-Length' in self.headers:
112 return self.rfile.read(int(self.headers['Content-Length']))
113
114 def do_POST(self):
115 data = self._read_data() + str(self.headers).encode()
116 if self.path.startswith('/redirect_'):
117 self._redirect()
118 elif self.path.startswith('/method'):
119 self._method('POST', data)
120 elif self.path.startswith('/headers'):
121 self._headers()
122 else:
123 self._status(404)
124
125 def do_HEAD(self):
126 if self.path.startswith('/redirect_'):
127 self._redirect()
128 elif self.path.startswith('/method'):
129 self._method('HEAD')
130 else:
131 self._status(404)
132
133 def do_PUT(self):
134 data = self._read_data() + str(self.headers).encode()
135 if self.path.startswith('/redirect_'):
136 self._redirect()
137 elif self.path.startswith('/method'):
138 self._method('PUT', data)
139 else:
140 self._status(404)
141
142 def do_GET(self):
143 if self.path == '/video.html':
144 payload = b'<html><video src="/vid.mp4" /></html>'
145 self.send_response(200)
146 self.send_header('Content-Type', 'text/html; charset=utf-8')
147 self.send_header('Content-Length', str(len(payload)))
148 self.end_headers()
149 self.wfile.write(payload)
150 elif self.path == '/vid.mp4':
151 payload = b'\x00\x00\x00\x00\x20\x66\x74[video]'
152 self.send_response(200)
153 self.send_header('Content-Type', 'video/mp4')
154 self.send_header('Content-Length', str(len(payload)))
155 self.end_headers()
156 self.wfile.write(payload)
157 elif self.path == '/%E4%B8%AD%E6%96%87.html':
158 payload = b'<html><video src="/vid.mp4" /></html>'
159 self.send_response(200)
160 self.send_header('Content-Type', 'text/html; charset=utf-8')
161 self.send_header('Content-Length', str(len(payload)))
162 self.end_headers()
163 self.wfile.write(payload)
164 elif self.path == '/%c7%9f':
165 payload = b'<html><video src="/vid.mp4" /></html>'
166 self.send_response(200)
167 self.send_header('Content-Type', 'text/html; charset=utf-8')
168 self.send_header('Content-Length', str(len(payload)))
169 self.end_headers()
170 self.wfile.write(payload)
171 elif self.path.startswith('/redirect_loop'):
172 self.send_response(301)
173 self.send_header('Location', self.path)
174 self.send_header('Content-Length', '0')
175 self.end_headers()
176 elif self.path.startswith('/redirect_'):
177 self._redirect()
178 elif self.path.startswith('/method'):
179 self._method('GET', str(self.headers).encode())
180 elif self.path.startswith('/headers'):
181 self._headers()
182 elif self.path.startswith('/308-to-headers'):
183 self.send_response(308)
184 self.send_header('Location', '/headers')
185 self.send_header('Content-Length', '0')
186 self.end_headers()
187 elif self.path == '/trailing_garbage':
188 payload = b'<html><video src="/vid.mp4" /></html>'
189 self.send_response(200)
190 self.send_header('Content-Type', 'text/html; charset=utf-8')
191 self.send_header('Content-Encoding', 'gzip')
192 buf = io.BytesIO()
193 with gzip.GzipFile(fileobj=buf, mode='wb') as f:
194 f.write(payload)
195 compressed = buf.getvalue() + b'trailing garbage'
196 self.send_header('Content-Length', str(len(compressed)))
197 self.end_headers()
198 self.wfile.write(compressed)
199 elif self.path == '/302-non-ascii-redirect':
200 new_url = f'http://127.0.0.1:{http_server_port(self.server)}/中文.html'
201 self.send_response(301)
202 self.send_header('Location', new_url)
203 self.send_header('Content-Length', '0')
204 self.end_headers()
205 elif self.path == '/content-encoding':
206 encodings = self.headers.get('ytdl-encoding', '')
207 payload = b'<html><video src="/vid.mp4" /></html>'
208 for encoding in filter(None, (e.strip() for e in encodings.split(','))):
209 if encoding == 'br' and brotli:
210 payload = brotli.compress(payload)
211 elif encoding == 'gzip':
212 buf = io.BytesIO()
213 with gzip.GzipFile(fileobj=buf, mode='wb') as f:
214 f.write(payload)
215 payload = buf.getvalue()
216 elif encoding == 'deflate':
217 payload = zlib.compress(payload)
218 elif encoding == 'unsupported':
219 payload = b'raw'
220 break
221 else:
222 self._status(415)
223 return
224 self.send_response(200)
225 self.send_header('Content-Encoding', encodings)
226 self.send_header('Content-Length', str(len(payload)))
227 self.end_headers()
228 self.wfile.write(payload)
229 elif self.path.startswith('/gen_'):
230 payload = b'<html></html>'
231 self.send_response(int(self.path[len('/gen_'):]))
232 self.send_header('Content-Type', 'text/html; charset=utf-8')
233 self.send_header('Content-Length', str(len(payload)))
234 self.end_headers()
235 self.wfile.write(payload)
236 elif self.path.startswith('/incompleteread'):
237 payload = b'<html></html>'
238 self.send_response(200)
239 self.send_header('Content-Type', 'text/html; charset=utf-8')
240 self.send_header('Content-Length', '234234')
241 self.end_headers()
242 self.wfile.write(payload)
243 self.finish()
244 elif self.path.startswith('/timeout_'):
245 time.sleep(int(self.path[len('/timeout_'):]))
246 self._headers()
247 elif self.path == '/source_address':
248 payload = str(self.client_address[0]).encode()
249 self.send_response(200)
250 self.send_header('Content-Type', 'text/html; charset=utf-8')
251 self.send_header('Content-Length', str(len(payload)))
252 self.end_headers()
253 self.wfile.write(payload)
254 self.finish()
255 else:
256 self._status(404)
257
258 def send_header(self, keyword, value):
259 """
260 Forcibly allow HTTP server to send non percent-encoded non-ASCII characters in headers.
261 This is against what is defined in RFC 3986, however we need to test we support this
262 since some sites incorrectly do this.
263 """
264 if keyword.lower() == 'connection':
265 return super().send_header(keyword, value)
266
267 if not hasattr(self, '_headers_buffer'):
268 self._headers_buffer = []
269
270 self._headers_buffer.append(f'{keyword}: {value}\r\n'.encode())
271
272
273 def validate_and_send(rh, req):
274 rh.validate(req)
275 return rh.send(req)
276
277
278 class TestRequestHandlerBase:
279 @classmethod
280 def setup_class(cls):
281 cls.http_httpd = http.server.ThreadingHTTPServer(
282 ('127.0.0.1', 0), HTTPTestRequestHandler)
283 cls.http_port = http_server_port(cls.http_httpd)
284 cls.http_server_thread = threading.Thread(target=cls.http_httpd.serve_forever)
285 # FIXME: we should probably stop the http server thread after each test
286 # See: https://github.com/yt-dlp/yt-dlp/pull/7094#discussion_r1199746041
287 cls.http_server_thread.daemon = True
288 cls.http_server_thread.start()
289
290 # HTTPS server
291 certfn = os.path.join(TEST_DIR, 'testcert.pem')
292 cls.https_httpd = http.server.ThreadingHTTPServer(
293 ('127.0.0.1', 0), HTTPTestRequestHandler)
294 sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
295 sslctx.load_cert_chain(certfn, None)
296 cls.https_httpd.socket = sslctx.wrap_socket(cls.https_httpd.socket, server_side=True)
297 cls.https_port = http_server_port(cls.https_httpd)
298 cls.https_server_thread = threading.Thread(target=cls.https_httpd.serve_forever)
299 cls.https_server_thread.daemon = True
300 cls.https_server_thread.start()
301
302
303 @pytest.fixture
304 def handler(request):
305 RH_KEY = request.param
306 if inspect.isclass(RH_KEY) and issubclass(RH_KEY, RequestHandler):
307 handler = RH_KEY
308 elif RH_KEY in _REQUEST_HANDLERS:
309 handler = _REQUEST_HANDLERS[RH_KEY]
310 else:
311 pytest.skip(f'{RH_KEY} request handler is not available')
312
313 return functools.partial(handler, logger=FakeLogger)
314
315
316 class TestHTTPRequestHandler(TestRequestHandlerBase):
317 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
318 def test_verify_cert(self, handler):
319 with handler() as rh:
320 with pytest.raises(CertificateVerifyError):
321 validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
322
323 with handler(verify=False) as rh:
324 r = validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
325 assert r.status == 200
326 r.close()
327
328 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
329 def test_ssl_error(self, handler):
330 # HTTPS server with too old TLS version
331 # XXX: is there a better way to test this than to create a new server?
332 https_httpd = http.server.ThreadingHTTPServer(
333 ('127.0.0.1', 0), HTTPTestRequestHandler)
334 sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
335 https_httpd.socket = sslctx.wrap_socket(https_httpd.socket, server_side=True)
336 https_port = http_server_port(https_httpd)
337 https_server_thread = threading.Thread(target=https_httpd.serve_forever)
338 https_server_thread.daemon = True
339 https_server_thread.start()
340
341 with handler(verify=False) as rh:
342 with pytest.raises(SSLError, match='sslv3 alert handshake failure') as exc_info:
343 validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers'))
344 assert not issubclass(exc_info.type, CertificateVerifyError)
345
346 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
347 def test_percent_encode(self, handler):
348 with handler() as rh:
349 # Unicode characters should be encoded with uppercase percent-encoding
350 res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/中文.html'))
351 assert res.status == 200
352 res.close()
353 # don't normalize existing percent encodings
354 res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/%c7%9f'))
355 assert res.status == 200
356 res.close()
357
358 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
359 def test_unicode_path_redirection(self, handler):
360 with handler() as rh:
361 r = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/302-non-ascii-redirect'))
362 assert r.url == f'http://127.0.0.1:{self.http_port}/%E4%B8%AD%E6%96%87.html'
363 r.close()
364
365 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
366 def test_raise_http_error(self, handler):
367 with handler() as rh:
368 for bad_status in (400, 500, 599, 302):
369 with pytest.raises(HTTPError):
370 validate_and_send(rh, Request('http://127.0.0.1:%d/gen_%d' % (self.http_port, bad_status)))
371
372 # Should not raise an error
373 validate_and_send(rh, Request('http://127.0.0.1:%d/gen_200' % self.http_port)).close()
374
375 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
376 def test_response_url(self, handler):
377 with handler() as rh:
378 # Response url should be that of the last url in redirect chain
379 res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_301'))
380 assert res.url == f'http://127.0.0.1:{self.http_port}/method'
381 res.close()
382 res2 = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_200'))
383 assert res2.url == f'http://127.0.0.1:{self.http_port}/gen_200'
384 res2.close()
385
386 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
387 def test_redirect(self, handler):
388 with handler() as rh:
389 def do_req(redirect_status, method, assert_no_content=False):
390 data = b'testdata' if method in ('POST', 'PUT') else None
391 res = validate_and_send(
392 rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_{redirect_status}', method=method, data=data))
393
394 headers = b''
395 data_sent = b''
396 if data is not None:
397 data_sent += res.read(len(data))
398 if data_sent != data:
399 headers += data_sent
400 data_sent = b''
401
402 headers += res.read()
403
404 if assert_no_content or data is None:
405 assert b'Content-Type' not in headers
406 assert b'Content-Length' not in headers
407 else:
408 assert b'Content-Type' in headers
409 assert b'Content-Length' in headers
410
411 return data_sent.decode(), res.headers.get('method', '')
412
413 # A 303 must either use GET or HEAD for subsequent request
414 assert do_req(303, 'POST', True) == ('', 'GET')
415 assert do_req(303, 'HEAD') == ('', 'HEAD')
416
417 assert do_req(303, 'PUT', True) == ('', 'GET')
418
419 # 301 and 302 turn POST only into a GET
420 assert do_req(301, 'POST', True) == ('', 'GET')
421 assert do_req(301, 'HEAD') == ('', 'HEAD')
422 assert do_req(302, 'POST', True) == ('', 'GET')
423 assert do_req(302, 'HEAD') == ('', 'HEAD')
424
425 assert do_req(301, 'PUT') == ('testdata', 'PUT')
426 assert do_req(302, 'PUT') == ('testdata', 'PUT')
427
428 # 307 and 308 should not change method
429 for m in ('POST', 'PUT'):
430 assert do_req(307, m) == ('testdata', m)
431 assert do_req(308, m) == ('testdata', m)
432
433 assert do_req(307, 'HEAD') == ('', 'HEAD')
434 assert do_req(308, 'HEAD') == ('', 'HEAD')
435
436 # These should not redirect and instead raise an HTTPError
437 for code in (300, 304, 305, 306):
438 with pytest.raises(HTTPError):
439 do_req(code, 'GET')
440
441 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
442 def test_request_cookie_header(self, handler):
443 # We should accept a Cookie header being passed as in normal headers and handle it appropriately.
444 with handler() as rh:
445 # Specified Cookie header should be used
446 res = validate_and_send(
447 rh, Request(
448 f'http://127.0.0.1:{self.http_port}/headers',
449 headers={'Cookie': 'test=test'})).read().decode()
450 assert 'Cookie: test=test' in res
451
452 # Specified Cookie header should be removed on any redirect
453 res = validate_and_send(
454 rh, Request(
455 f'http://127.0.0.1:{self.http_port}/308-to-headers',
456 headers={'Cookie': 'test=test'})).read().decode()
457 assert 'Cookie: test=test' not in res
458
459 # Specified Cookie header should override global cookiejar for that request
460 cookiejar = http.cookiejar.CookieJar()
461 cookiejar.set_cookie(http.cookiejar.Cookie(
462 version=0, name='test', value='ytdlp', port=None, port_specified=False,
463 domain='127.0.0.1', domain_specified=True, domain_initial_dot=False, path='/',
464 path_specified=True, secure=False, expires=None, discard=False, comment=None,
465 comment_url=None, rest={}))
466
467 with handler(cookiejar=cookiejar) as rh:
468 data = validate_and_send(
469 rh, Request(f'http://127.0.0.1:{self.http_port}/headers', headers={'cookie': 'test=test'})).read()
470 assert b'Cookie: test=ytdlp' not in data
471 assert b'Cookie: test=test' in data
472
473 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
474 def test_redirect_loop(self, handler):
475 with handler() as rh:
476 with pytest.raises(HTTPError, match='redirect loop'):
477 validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_loop'))
478
479 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
480 def test_incompleteread(self, handler):
481 with handler(timeout=2) as rh:
482 with pytest.raises(IncompleteRead):
483 validate_and_send(rh, Request('http://127.0.0.1:%d/incompleteread' % self.http_port)).read()
484
485 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
486 def test_cookies(self, handler):
487 cookiejar = http.cookiejar.CookieJar()
488 cookiejar.set_cookie(http.cookiejar.Cookie(
489 0, 'test', 'ytdlp', None, False, '127.0.0.1', True,
490 False, '/headers', True, False, None, False, None, None, {}))
491
492 with handler(cookiejar=cookiejar) as rh:
493 data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read()
494 assert b'Cookie: test=ytdlp' in data
495
496 # Per request
497 with handler() as rh:
498 data = validate_and_send(
499 rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions={'cookiejar': cookiejar})).read()
500 assert b'Cookie: test=ytdlp' in data
501
502 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
503 def test_headers(self, handler):
504
505 with handler(headers=HTTPHeaderDict({'test1': 'test', 'test2': 'test2'})) as rh:
506 # Global Headers
507 data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read()
508 assert b'Test1: test' in data
509
510 # Per request headers, merged with global
511 data = validate_and_send(rh, Request(
512 f'http://127.0.0.1:{self.http_port}/headers', headers={'test2': 'changed', 'test3': 'test3'})).read()
513 assert b'Test1: test' in data
514 assert b'Test2: changed' in data
515 assert b'Test2: test2' not in data
516 assert b'Test3: test3' in data
517
518 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
519 def test_timeout(self, handler):
520 with handler() as rh:
521 # Default timeout is 20 seconds, so this should go through
522 validate_and_send(
523 rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_3'))
524
525 with handler(timeout=0.5) as rh:
526 with pytest.raises(TransportError):
527 validate_and_send(
528 rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1'))
529
530 # Per request timeout, should override handler timeout
531 validate_and_send(
532 rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1', extensions={'timeout': 4}))
533
534 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
535 def test_source_address(self, handler):
536 source_address = f'127.0.0.{random.randint(5, 255)}'
537 with handler(source_address=source_address) as rh:
538 data = validate_and_send(
539 rh, Request(f'http://127.0.0.1:{self.http_port}/source_address')).read().decode()
540 assert source_address == data
541
542 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
543 def test_gzip_trailing_garbage(self, handler):
544 with handler() as rh:
545 data = validate_and_send(rh, Request(f'http://localhost:{self.http_port}/trailing_garbage')).read().decode()
546 assert data == '<html><video src="/vid.mp4" /></html>'
547
548 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
549 @pytest.mark.skipif(not brotli, reason='brotli support is not installed')
550 def test_brotli(self, handler):
551 with handler() as rh:
552 res = validate_and_send(
553 rh, Request(
554 f'http://127.0.0.1:{self.http_port}/content-encoding',
555 headers={'ytdl-encoding': 'br'}))
556 assert res.headers.get('Content-Encoding') == 'br'
557 assert res.read() == b'<html><video src="/vid.mp4" /></html>'
558
559 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
560 def test_deflate(self, handler):
561 with handler() as rh:
562 res = validate_and_send(
563 rh, Request(
564 f'http://127.0.0.1:{self.http_port}/content-encoding',
565 headers={'ytdl-encoding': 'deflate'}))
566 assert res.headers.get('Content-Encoding') == 'deflate'
567 assert res.read() == b'<html><video src="/vid.mp4" /></html>'
568
569 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
570 def test_gzip(self, handler):
571 with handler() as rh:
572 res = validate_and_send(
573 rh, Request(
574 f'http://127.0.0.1:{self.http_port}/content-encoding',
575 headers={'ytdl-encoding': 'gzip'}))
576 assert res.headers.get('Content-Encoding') == 'gzip'
577 assert res.read() == b'<html><video src="/vid.mp4" /></html>'
578
579 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
580 def test_multiple_encodings(self, handler):
581 with handler() as rh:
582 for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'):
583 res = validate_and_send(
584 rh, Request(
585 f'http://127.0.0.1:{self.http_port}/content-encoding',
586 headers={'ytdl-encoding': pair}))
587 assert res.headers.get('Content-Encoding') == pair
588 assert res.read() == b'<html><video src="/vid.mp4" /></html>'
589
590 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
591 def test_unsupported_encoding(self, handler):
592 with handler() as rh:
593 res = validate_and_send(
594 rh, Request(
595 f'http://127.0.0.1:{self.http_port}/content-encoding',
596 headers={'ytdl-encoding': 'unsupported'}))
597 assert res.headers.get('Content-Encoding') == 'unsupported'
598 assert res.read() == b'raw'
599
600 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
601 def test_read(self, handler):
602 with handler() as rh:
603 res = validate_and_send(
604 rh, Request(f'http://127.0.0.1:{self.http_port}/headers'))
605 assert res.readable()
606 assert res.read(1) == b'H'
607 assert res.read(3) == b'ost'
608
609
610 class TestHTTPProxy(TestRequestHandlerBase):
611 @classmethod
612 def setup_class(cls):
613 super().setup_class()
614 # HTTP Proxy server
615 cls.proxy = http.server.ThreadingHTTPServer(
616 ('127.0.0.1', 0), _build_proxy_handler('normal'))
617 cls.proxy_port = http_server_port(cls.proxy)
618 cls.proxy_thread = threading.Thread(target=cls.proxy.serve_forever)
619 cls.proxy_thread.daemon = True
620 cls.proxy_thread.start()
621
622 # Geo proxy server
623 cls.geo_proxy = http.server.ThreadingHTTPServer(
624 ('127.0.0.1', 0), _build_proxy_handler('geo'))
625 cls.geo_port = http_server_port(cls.geo_proxy)
626 cls.geo_proxy_thread = threading.Thread(target=cls.geo_proxy.serve_forever)
627 cls.geo_proxy_thread.daemon = True
628 cls.geo_proxy_thread.start()
629
630 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
631 def test_http_proxy(self, handler):
632 http_proxy = f'http://127.0.0.1:{self.proxy_port}'
633 geo_proxy = f'http://127.0.0.1:{self.geo_port}'
634
635 # Test global http proxy
636 # Test per request http proxy
637 # Test per request http proxy disables proxy
638 url = 'http://foo.com/bar'
639
640 # Global HTTP proxy
641 with handler(proxies={'http': http_proxy}) as rh:
642 res = validate_and_send(rh, Request(url)).read().decode()
643 assert res == f'normal: {url}'
644
645 # Per request proxy overrides global
646 res = validate_and_send(rh, Request(url, proxies={'http': geo_proxy})).read().decode()
647 assert res == f'geo: {url}'
648
649 # and setting to None disables all proxies for that request
650 real_url = f'http://127.0.0.1:{self.http_port}/headers'
651 res = validate_and_send(
652 rh, Request(real_url, proxies={'http': None})).read().decode()
653 assert res != f'normal: {real_url}'
654 assert 'Accept' in res
655
656 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
657 def test_noproxy(self, handler):
658 with handler(proxies={'proxy': f'http://127.0.0.1:{self.proxy_port}'}) as rh:
659 # NO_PROXY
660 for no_proxy in (f'127.0.0.1:{self.http_port}', '127.0.0.1', 'localhost'):
661 nop_response = validate_and_send(
662 rh, Request(f'http://127.0.0.1:{self.http_port}/headers', proxies={'no': no_proxy})).read().decode(
663 'utf-8')
664 assert 'Accept' in nop_response
665
666 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
667 def test_allproxy(self, handler):
668 url = 'http://foo.com/bar'
669 with handler() as rh:
670 response = validate_and_send(rh, Request(url, proxies={'all': f'http://127.0.0.1:{self.proxy_port}'})).read().decode(
671 'utf-8')
672 assert response == f'normal: {url}'
673
674 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
675 def test_http_proxy_with_idn(self, handler):
676 with handler(proxies={
677 'http': f'http://127.0.0.1:{self.proxy_port}',
678 }) as rh:
679 url = 'http://中文.tw/'
680 response = rh.send(Request(url)).read().decode()
681 # b'xn--fiq228c' is '中文'.encode('idna')
682 assert response == 'normal: http://xn--fiq228c.tw/'
683
684
685 class TestClientCertificate:
686
687 @classmethod
688 def setup_class(cls):
689 certfn = os.path.join(TEST_DIR, 'testcert.pem')
690 cls.certdir = os.path.join(TEST_DIR, 'testdata', 'certificate')
691 cacertfn = os.path.join(cls.certdir, 'ca.crt')
692 cls.httpd = http.server.ThreadingHTTPServer(('127.0.0.1', 0), HTTPTestRequestHandler)
693 sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
694 sslctx.verify_mode = ssl.CERT_REQUIRED
695 sslctx.load_verify_locations(cafile=cacertfn)
696 sslctx.load_cert_chain(certfn, None)
697 cls.httpd.socket = sslctx.wrap_socket(cls.httpd.socket, server_side=True)
698 cls.port = http_server_port(cls.httpd)
699 cls.server_thread = threading.Thread(target=cls.httpd.serve_forever)
700 cls.server_thread.daemon = True
701 cls.server_thread.start()
702
703 def _run_test(self, handler, **handler_kwargs):
704 with handler(
705 # Disable client-side validation of unacceptable self-signed testcert.pem
706 # The test is of a check on the server side, so unaffected
707 verify=False,
708 **handler_kwargs,
709 ) as rh:
710 validate_and_send(rh, Request(f'https://127.0.0.1:{self.port}/video.html')).read().decode()
711
712 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
713 def test_certificate_combined_nopass(self, handler):
714 self._run_test(handler, client_cert={
715 'client_certificate': os.path.join(self.certdir, 'clientwithkey.crt'),
716 })
717
718 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
719 def test_certificate_nocombined_nopass(self, handler):
720 self._run_test(handler, client_cert={
721 'client_certificate': os.path.join(self.certdir, 'client.crt'),
722 'client_certificate_key': os.path.join(self.certdir, 'client.key'),
723 })
724
725 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
726 def test_certificate_combined_pass(self, handler):
727 self._run_test(handler, client_cert={
728 'client_certificate': os.path.join(self.certdir, 'clientwithencryptedkey.crt'),
729 'client_certificate_password': 'foobar',
730 })
731
732 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
733 def test_certificate_nocombined_pass(self, handler):
734 self._run_test(handler, client_cert={
735 'client_certificate': os.path.join(self.certdir, 'client.crt'),
736 'client_certificate_key': os.path.join(self.certdir, 'clientencrypted.key'),
737 'client_certificate_password': 'foobar',
738 })
739
740
741 class TestUrllibRequestHandler(TestRequestHandlerBase):
742 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
743 def test_file_urls(self, handler):
744 # See https://github.com/ytdl-org/youtube-dl/issues/8227
745 tf = tempfile.NamedTemporaryFile(delete=False)
746 tf.write(b'foobar')
747 tf.close()
748 req = Request(pathlib.Path(tf.name).as_uri())
749 with handler() as rh:
750 with pytest.raises(UnsupportedRequest):
751 rh.validate(req)
752
753 # Test that urllib never loaded FileHandler
754 with pytest.raises(TransportError):
755 rh.send(req)
756
757 with handler(enable_file_urls=True) as rh:
758 res = validate_and_send(rh, req)
759 assert res.read() == b'foobar'
760 res.close()
761
762 os.unlink(tf.name)
763
764 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
765 def test_http_error_returns_content(self, handler):
766 # urllib HTTPError will try close the underlying response if reference to the HTTPError object is lost
767 def get_response():
768 with handler() as rh:
769 # headers url
770 try:
771 validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_404'))
772 except HTTPError as e:
773 return e.response
774
775 assert get_response().read() == b'<html></html>'
776
777 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
778 def test_verify_cert_error_text(self, handler):
779 # Check the output of the error message
780 with handler() as rh:
781 with pytest.raises(
782 CertificateVerifyError,
783 match=r'\[SSL: CERTIFICATE_VERIFY_FAILED\] certificate verify failed: self.signed certificate'
784 ):
785 validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
786
787 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
788 @pytest.mark.parametrize('req,match,version_check', [
789 # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1256
790 # bpo-39603: Check implemented in 3.7.9+, 3.8.5+
791 (
792 Request('http://127.0.0.1', method='GET\n'),
793 'method can\'t contain control characters',
794 lambda v: v < (3, 7, 9) or (3, 8, 0) <= v < (3, 8, 5)
795 ),
796 # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1265
797 # bpo-38576: Check implemented in 3.7.8+, 3.8.3+
798 (
799 Request('http://127.0.0. 1', method='GET'),
800 'URL can\'t contain control characters',
801 lambda v: v < (3, 7, 8) or (3, 8, 0) <= v < (3, 8, 3)
802 ),
803 # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1288C31-L1288C50
804 (Request('http://127.0.0.1', headers={'foo\n': 'bar'}), 'Invalid header name', None),
805 ])
806 def test_httplib_validation_errors(self, handler, req, match, version_check):
807 if version_check and version_check(sys.version_info):
808 pytest.skip(f'Python {sys.version} version does not have the required validation for this test.')
809
810 with handler() as rh:
811 with pytest.raises(RequestError, match=match) as exc_info:
812 validate_and_send(rh, req)
813 assert not isinstance(exc_info.value, TransportError)
814
815
816 def run_validation(handler, error, req, **handler_kwargs):
817 with handler(**handler_kwargs) as rh:
818 if error:
819 with pytest.raises(error):
820 rh.validate(req)
821 else:
822 rh.validate(req)
823
824
825 class TestRequestHandlerValidation:
826
827 class ValidationRH(RequestHandler):
828 def _send(self, request):
829 raise RequestError('test')
830
831 class NoCheckRH(ValidationRH):
832 _SUPPORTED_FEATURES = None
833 _SUPPORTED_PROXY_SCHEMES = None
834 _SUPPORTED_URL_SCHEMES = None
835
836 def _check_extensions(self, extensions):
837 extensions.clear()
838
839 class HTTPSupportedRH(ValidationRH):
840 _SUPPORTED_URL_SCHEMES = ('http',)
841
842 URL_SCHEME_TESTS = [
843 # scheme, expected to fail, handler kwargs
844 ('Urllib', [
845 ('http', False, {}),
846 ('https', False, {}),
847 ('data', False, {}),
848 ('ftp', False, {}),
849 ('file', UnsupportedRequest, {}),
850 ('file', False, {'enable_file_urls': True}),
851 ]),
852 (NoCheckRH, [('http', False, {})]),
853 (ValidationRH, [('http', UnsupportedRequest, {})])
854 ]
855
856 PROXY_SCHEME_TESTS = [
857 # scheme, expected to fail
858 ('Urllib', [
859 ('http', False),
860 ('https', UnsupportedRequest),
861 ('socks4', False),
862 ('socks4a', False),
863 ('socks5', False),
864 ('socks5h', False),
865 ('socks', UnsupportedRequest),
866 ]),
867 (NoCheckRH, [('http', False)]),
868 (HTTPSupportedRH, [('http', UnsupportedRequest)]),
869 ]
870
871 PROXY_KEY_TESTS = [
872 # key, expected to fail
873 ('Urllib', [
874 ('all', False),
875 ('unrelated', False),
876 ]),
877 (NoCheckRH, [('all', False)]),
878 (HTTPSupportedRH, [('all', UnsupportedRequest)]),
879 (HTTPSupportedRH, [('no', UnsupportedRequest)]),
880 ]
881
882 EXTENSION_TESTS = [
883 ('Urllib', [
884 ({'cookiejar': 'notacookiejar'}, AssertionError),
885 ({'cookiejar': CookieJar()}, False),
886 ({'timeout': 1}, False),
887 ({'timeout': 'notatimeout'}, AssertionError),
888 ({'unsupported': 'value'}, UnsupportedRequest),
889 ]),
890 (NoCheckRH, [
891 ({'cookiejar': 'notacookiejar'}, False),
892 ({'somerandom': 'test'}, False), # but any extension is allowed through
893 ]),
894 ]
895
896 @pytest.mark.parametrize('handler,scheme,fail,handler_kwargs', [
897 (handler_tests[0], scheme, fail, handler_kwargs)
898 for handler_tests in URL_SCHEME_TESTS
899 for scheme, fail, handler_kwargs in handler_tests[1]
900
901 ], indirect=['handler'])
902 def test_url_scheme(self, handler, scheme, fail, handler_kwargs):
903 run_validation(handler, fail, Request(f'{scheme}://'), **(handler_kwargs or {}))
904
905 @pytest.mark.parametrize('handler,fail', [('Urllib', False)], indirect=['handler'])
906 def test_no_proxy(self, handler, fail):
907 run_validation(handler, fail, Request('http://', proxies={'no': '127.0.0.1,github.com'}))
908 run_validation(handler, fail, Request('http://'), proxies={'no': '127.0.0.1,github.com'})
909
910 @pytest.mark.parametrize('handler,proxy_key,fail', [
911 (handler_tests[0], proxy_key, fail)
912 for handler_tests in PROXY_KEY_TESTS
913 for proxy_key, fail in handler_tests[1]
914 ], indirect=['handler'])
915 def test_proxy_key(self, handler, proxy_key, fail):
916 run_validation(handler, fail, Request('http://', proxies={proxy_key: 'http://example.com'}))
917 run_validation(handler, fail, Request('http://'), proxies={proxy_key: 'http://example.com'})
918
919 @pytest.mark.parametrize('handler,scheme,fail', [
920 (handler_tests[0], scheme, fail)
921 for handler_tests in PROXY_SCHEME_TESTS
922 for scheme, fail in handler_tests[1]
923 ], indirect=['handler'])
924 def test_proxy_scheme(self, handler, scheme, fail):
925 run_validation(handler, fail, Request('http://', proxies={'http': f'{scheme}://example.com'}))
926 run_validation(handler, fail, Request('http://'), proxies={'http': f'{scheme}://example.com'})
927
928 @pytest.mark.parametrize('handler', ['Urllib', HTTPSupportedRH], indirect=True)
929 def test_empty_proxy(self, handler):
930 run_validation(handler, False, Request('http://', proxies={'http': None}))
931 run_validation(handler, False, Request('http://'), proxies={'http': None})
932
933 @pytest.mark.parametrize('proxy_url', ['//example.com', 'example.com', '127.0.0.1', '/a/b/c'])
934 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
935 def test_invalid_proxy_url(self, handler, proxy_url):
936 run_validation(handler, UnsupportedRequest, Request('http://', proxies={'http': proxy_url}))
937
938 @pytest.mark.parametrize('handler,extensions,fail', [
939 (handler_tests[0], extensions, fail)
940 for handler_tests in EXTENSION_TESTS
941 for extensions, fail in handler_tests[1]
942 ], indirect=['handler'])
943 def test_extension(self, handler, extensions, fail):
944 run_validation(
945 handler, fail, Request('http://', extensions=extensions))
946
947 def test_invalid_request_type(self):
948 rh = self.ValidationRH(logger=FakeLogger())
949 for method in (rh.validate, rh.send):
950 with pytest.raises(TypeError, match='Expected an instance of Request'):
951 method('not a request')
952
953
954 class FakeResponse(Response):
955 def __init__(self, request):
956 # XXX: we could make request part of standard response interface
957 self.request = request
958 super().__init__(fp=io.BytesIO(b''), headers={}, url=request.url)
959
960
961 class FakeRH(RequestHandler):
962
963 def _validate(self, request):
964 return
965
966 def _send(self, request: Request):
967 if request.url.startswith('ssl://'):
968 raise SSLError(request.url[len('ssl://'):])
969 return FakeResponse(request)
970
971
972 class FakeRHYDL(FakeYDL):
973 def __init__(self, *args, **kwargs):
974 super().__init__(*args, **kwargs)
975 self._request_director = self.build_request_director([FakeRH])
976
977
978 class TestRequestDirector:
979
980 def test_handler_operations(self):
981 director = RequestDirector(logger=FakeLogger())
982 handler = FakeRH(logger=FakeLogger())
983 director.add_handler(handler)
984 assert director.handlers.get(FakeRH.RH_KEY) is handler
985
986 # Handler should overwrite
987 handler2 = FakeRH(logger=FakeLogger())
988 director.add_handler(handler2)
989 assert director.handlers.get(FakeRH.RH_KEY) is not handler
990 assert director.handlers.get(FakeRH.RH_KEY) is handler2
991 assert len(director.handlers) == 1
992
993 class AnotherFakeRH(FakeRH):
994 pass
995 director.add_handler(AnotherFakeRH(logger=FakeLogger()))
996 assert len(director.handlers) == 2
997 assert director.handlers.get(AnotherFakeRH.RH_KEY).RH_KEY == AnotherFakeRH.RH_KEY
998
999 director.handlers.pop(FakeRH.RH_KEY, None)
1000 assert director.handlers.get(FakeRH.RH_KEY) is None
1001 assert len(director.handlers) == 1
1002
1003 # RequestErrors should passthrough
1004 with pytest.raises(SSLError):
1005 director.send(Request('ssl://something'))
1006
1007 def test_send(self):
1008 director = RequestDirector(logger=FakeLogger())
1009 with pytest.raises(RequestError):
1010 director.send(Request('any://'))
1011 director.add_handler(FakeRH(logger=FakeLogger()))
1012 assert isinstance(director.send(Request('http://')), FakeResponse)
1013
1014 def test_unsupported_handlers(self):
1015 director = RequestDirector(logger=FakeLogger())
1016 director.add_handler(FakeRH(logger=FakeLogger()))
1017
1018 class SupportedRH(RequestHandler):
1019 _SUPPORTED_URL_SCHEMES = ['http']
1020
1021 def _send(self, request: Request):
1022 return Response(fp=io.BytesIO(b'supported'), headers={}, url=request.url)
1023
1024 # This handler should by default take preference over FakeRH
1025 director.add_handler(SupportedRH(logger=FakeLogger()))
1026 assert director.send(Request('http://')).read() == b'supported'
1027 assert director.send(Request('any://')).read() == b''
1028
1029 director.handlers.pop(FakeRH.RH_KEY)
1030 with pytest.raises(NoSupportingHandlers):
1031 director.send(Request('any://'))
1032
1033 def test_unexpected_error(self):
1034 director = RequestDirector(logger=FakeLogger())
1035
1036 class UnexpectedRH(FakeRH):
1037 def _send(self, request: Request):
1038 raise TypeError('something')
1039
1040 director.add_handler(UnexpectedRH(logger=FakeLogger))
1041 with pytest.raises(NoSupportingHandlers, match=r'1 unexpected error'):
1042 director.send(Request('any://'))
1043
1044 director.handlers.clear()
1045 assert len(director.handlers) == 0
1046
1047 # Should not be fatal
1048 director.add_handler(FakeRH(logger=FakeLogger()))
1049 director.add_handler(UnexpectedRH(logger=FakeLogger))
1050 assert director.send(Request('any://'))
1051
1052
1053 # XXX: do we want to move this to test_YoutubeDL.py?
1054 class TestYoutubeDLNetworking:
1055
1056 @staticmethod
1057 def build_handler(ydl, handler: RequestHandler = FakeRH):
1058 return ydl.build_request_director([handler]).handlers.get(handler.RH_KEY)
1059
1060 def test_compat_opener(self):
1061 with FakeYDL() as ydl:
1062 with warnings.catch_warnings():
1063 warnings.simplefilter('ignore', category=DeprecationWarning)
1064 assert isinstance(ydl._opener, urllib.request.OpenerDirector)
1065
1066 @pytest.mark.parametrize('proxy,expected', [
1067 ('http://127.0.0.1:8080', {'all': 'http://127.0.0.1:8080'}),
1068 ('', {'all': '__noproxy__'}),
1069 (None, {'http': 'http://127.0.0.1:8081', 'https': 'http://127.0.0.1:8081'}) # env, set https
1070 ])
1071 def test_proxy(self, proxy, expected):
1072 old_http_proxy = os.environ.get('HTTP_PROXY')
1073 try:
1074 os.environ['HTTP_PROXY'] = 'http://127.0.0.1:8081' # ensure that provided proxies override env
1075 with FakeYDL({'proxy': proxy}) as ydl:
1076 assert ydl.proxies == expected
1077 finally:
1078 if old_http_proxy:
1079 os.environ['HTTP_PROXY'] = old_http_proxy
1080
1081 def test_compat_request(self):
1082 with FakeRHYDL() as ydl:
1083 assert ydl.urlopen('test://')
1084 urllib_req = urllib.request.Request('http://foo.bar', data=b'test', method='PUT', headers={'X-Test': '1'})
1085 urllib_req.add_unredirected_header('Cookie', 'bob=bob')
1086 urllib_req.timeout = 2
1087 with warnings.catch_warnings():
1088 warnings.simplefilter('ignore', category=DeprecationWarning)
1089 req = ydl.urlopen(urllib_req).request
1090 assert req.url == urllib_req.get_full_url()
1091 assert req.data == urllib_req.data
1092 assert req.method == urllib_req.get_method()
1093 assert 'X-Test' in req.headers
1094 assert 'Cookie' in req.headers
1095 assert req.extensions.get('timeout') == 2
1096
1097 with pytest.raises(AssertionError):
1098 ydl.urlopen(None)
1099
1100 def test_extract_basic_auth(self):
1101 with FakeRHYDL() as ydl:
1102 res = ydl.urlopen(Request('http://user:pass@foo.bar'))
1103 assert res.request.headers['Authorization'] == 'Basic dXNlcjpwYXNz'
1104
1105 def test_sanitize_url(self):
1106 with FakeRHYDL() as ydl:
1107 res = ydl.urlopen(Request('httpss://foo.bar'))
1108 assert res.request.url == 'https://foo.bar'
1109
1110 def test_file_urls_error(self):
1111 # use urllib handler
1112 with FakeYDL() as ydl:
1113 with pytest.raises(RequestError, match=r'file:// URLs are disabled by default'):
1114 ydl.urlopen('file://')
1115
1116 def test_legacy_server_connect_error(self):
1117 with FakeRHYDL() as ydl:
1118 for error in ('UNSAFE_LEGACY_RENEGOTIATION_DISABLED', 'SSLV3_ALERT_HANDSHAKE_FAILURE'):
1119 with pytest.raises(RequestError, match=r'Try using --legacy-server-connect'):
1120 ydl.urlopen(f'ssl://{error}')
1121
1122 with pytest.raises(SSLError, match='testerror'):
1123 ydl.urlopen('ssl://testerror')
1124
1125 @pytest.mark.parametrize('proxy_key,proxy_url,expected', [
1126 ('http', '__noproxy__', None),
1127 ('no', '127.0.0.1,foo.bar', '127.0.0.1,foo.bar'),
1128 ('https', 'example.com', 'http://example.com'),
1129 ('https', '//example.com', 'http://example.com'),
1130 ('https', 'socks5://example.com', 'socks5h://example.com'),
1131 ('http', 'socks://example.com', 'socks4://example.com'),
1132 ('http', 'socks4://example.com', 'socks4://example.com'),
1133 ('unrelated', '/bad/proxy', '/bad/proxy'), # clean_proxies should ignore bad proxies
1134 ])
1135 def test_clean_proxy(self, proxy_key, proxy_url, expected):
1136 # proxies should be cleaned in urlopen()
1137 with FakeRHYDL() as ydl:
1138 req = ydl.urlopen(Request('test://', proxies={proxy_key: proxy_url})).request
1139 assert req.proxies[proxy_key] == expected
1140
1141 # and should also be cleaned when building the handler
1142 env_key = f'{proxy_key.upper()}_PROXY'
1143 old_env_proxy = os.environ.get(env_key)
1144 try:
1145 os.environ[env_key] = proxy_url # ensure that provided proxies override env
1146 with FakeYDL() as ydl:
1147 rh = self.build_handler(ydl)
1148 assert rh.proxies[proxy_key] == expected
1149 finally:
1150 if old_env_proxy:
1151 os.environ[env_key] = old_env_proxy
1152
1153 def test_clean_proxy_header(self):
1154 with FakeRHYDL() as ydl:
1155 req = ydl.urlopen(Request('test://', headers={'ytdl-request-proxy': '//foo.bar'})).request
1156 assert 'ytdl-request-proxy' not in req.headers
1157 assert req.proxies == {'all': 'http://foo.bar'}
1158
1159 with FakeYDL({'http_headers': {'ytdl-request-proxy': '//foo.bar'}}) as ydl:
1160 rh = self.build_handler(ydl)
1161 assert 'ytdl-request-proxy' not in rh.headers
1162 assert rh.proxies == {'all': 'http://foo.bar'}
1163
1164 def test_clean_header(self):
1165 with FakeRHYDL() as ydl:
1166 res = ydl.urlopen(Request('test://', headers={'Youtubedl-no-compression': True}))
1167 assert 'Youtubedl-no-compression' not in res.request.headers
1168 assert res.request.headers.get('Accept-Encoding') == 'identity'
1169
1170 with FakeYDL({'http_headers': {'Youtubedl-no-compression': True}}) as ydl:
1171 rh = self.build_handler(ydl)
1172 assert 'Youtubedl-no-compression' not in rh.headers
1173 assert rh.headers.get('Accept-Encoding') == 'identity'
1174
1175 def test_build_handler_params(self):
1176 with FakeYDL({
1177 'http_headers': {'test': 'testtest'},
1178 'socket_timeout': 2,
1179 'proxy': 'http://127.0.0.1:8080',
1180 'source_address': '127.0.0.45',
1181 'debug_printtraffic': True,
1182 'compat_opts': ['no-certifi'],
1183 'nocheckcertificate': True,
1184 'legacyserverconnect': True,
1185 }) as ydl:
1186 rh = self.build_handler(ydl)
1187 assert rh.headers.get('test') == 'testtest'
1188 assert 'Accept' in rh.headers # ensure std_headers are still there
1189 assert rh.timeout == 2
1190 assert rh.proxies.get('all') == 'http://127.0.0.1:8080'
1191 assert rh.source_address == '127.0.0.45'
1192 assert rh.verbose is True
1193 assert rh.prefer_system_certs is True
1194 assert rh.verify is False
1195 assert rh.legacy_ssl_support is True
1196
1197 @pytest.mark.parametrize('ydl_params', [
1198 {'client_certificate': 'fakecert.crt'},
1199 {'client_certificate': 'fakecert.crt', 'client_certificate_key': 'fakekey.key'},
1200 {'client_certificate': 'fakecert.crt', 'client_certificate_key': 'fakekey.key', 'client_certificate_password': 'foobar'},
1201 {'client_certificate_key': 'fakekey.key', 'client_certificate_password': 'foobar'},
1202 ])
1203 def test_client_certificate(self, ydl_params):
1204 with FakeYDL(ydl_params) as ydl:
1205 rh = self.build_handler(ydl)
1206 assert rh._client_cert == ydl_params # XXX: Too bound to implementation
1207
1208 def test_urllib_file_urls(self):
1209 with FakeYDL({'enable_file_urls': False}) as ydl:
1210 rh = self.build_handler(ydl, UrllibRH)
1211 assert rh.enable_file_urls is False
1212
1213 with FakeYDL({'enable_file_urls': True}) as ydl:
1214 rh = self.build_handler(ydl, UrllibRH)
1215 assert rh.enable_file_urls is True
1216
1217
1218 class TestRequest:
1219
1220 def test_query(self):
1221 req = Request('http://example.com?q=something', query={'v': 'xyz'})
1222 assert req.url == 'http://example.com?q=something&v=xyz'
1223
1224 req.update(query={'v': '123'})
1225 assert req.url == 'http://example.com?q=something&v=123'
1226 req.update(url='http://example.com', query={'v': 'xyz'})
1227 assert req.url == 'http://example.com?v=xyz'
1228
1229 def test_method(self):
1230 req = Request('http://example.com')
1231 assert req.method == 'GET'
1232 req.data = b'test'
1233 assert req.method == 'POST'
1234 req.data = None
1235 assert req.method == 'GET'
1236 req.data = b'test2'
1237 req.method = 'PUT'
1238 assert req.method == 'PUT'
1239 req.data = None
1240 assert req.method == 'PUT'
1241 with pytest.raises(TypeError):
1242 req.method = 1
1243
1244 def test_request_helpers(self):
1245 assert HEADRequest('http://example.com').method == 'HEAD'
1246 assert PUTRequest('http://example.com').method == 'PUT'
1247
1248 def test_headers(self):
1249 req = Request('http://example.com', headers={'tesT': 'test'})
1250 assert req.headers == HTTPHeaderDict({'test': 'test'})
1251 req.update(headers={'teSt2': 'test2'})
1252 assert req.headers == HTTPHeaderDict({'test': 'test', 'test2': 'test2'})
1253
1254 req.headers = new_headers = HTTPHeaderDict({'test': 'test'})
1255 assert req.headers == HTTPHeaderDict({'test': 'test'})
1256 assert req.headers is new_headers
1257
1258 # test converts dict to case insensitive dict
1259 req.headers = new_headers = {'test2': 'test2'}
1260 assert isinstance(req.headers, HTTPHeaderDict)
1261 assert req.headers is not new_headers
1262
1263 with pytest.raises(TypeError):
1264 req.headers = None
1265
1266 def test_data_type(self):
1267 req = Request('http://example.com')
1268 assert req.data is None
1269 # test bytes is allowed
1270 req.data = b'test'
1271 assert req.data == b'test'
1272 # test iterable of bytes is allowed
1273 i = [b'test', b'test2']
1274 req.data = i
1275 assert req.data == i
1276
1277 # test file-like object is allowed
1278 f = io.BytesIO(b'test')
1279 req.data = f
1280 assert req.data == f
1281
1282 # common mistake: test str not allowed
1283 with pytest.raises(TypeError):
1284 req.data = 'test'
1285 assert req.data != 'test'
1286
1287 # common mistake: test dict is not allowed
1288 with pytest.raises(TypeError):
1289 req.data = {'test': 'test'}
1290 assert req.data != {'test': 'test'}
1291
1292 def test_content_length_header(self):
1293 req = Request('http://example.com', headers={'Content-Length': '0'}, data=b'')
1294 assert req.headers.get('Content-Length') == '0'
1295
1296 req.data = b'test'
1297 assert 'Content-Length' not in req.headers
1298
1299 req = Request('http://example.com', headers={'Content-Length': '10'})
1300 assert 'Content-Length' not in req.headers
1301
1302 def test_content_type_header(self):
1303 req = Request('http://example.com', headers={'Content-Type': 'test'}, data=b'test')
1304 assert req.headers.get('Content-Type') == 'test'
1305 req.data = b'test2'
1306 assert req.headers.get('Content-Type') == 'test'
1307 req.data = None
1308 assert 'Content-Type' not in req.headers
1309 req.data = b'test3'
1310 assert req.headers.get('Content-Type') == 'application/x-www-form-urlencoded'
1311
1312 def test_update_req(self):
1313 req = Request('http://example.com')
1314 assert req.data is None
1315 assert req.method == 'GET'
1316 assert 'Content-Type' not in req.headers
1317 # Test that zero-byte payloads will be sent
1318 req.update(data=b'')
1319 assert req.data == b''
1320 assert req.method == 'POST'
1321 assert req.headers.get('Content-Type') == 'application/x-www-form-urlencoded'
1322
1323 def test_proxies(self):
1324 req = Request(url='http://example.com', proxies={'http': 'http://127.0.0.1:8080'})
1325 assert req.proxies == {'http': 'http://127.0.0.1:8080'}
1326
1327 def test_extensions(self):
1328 req = Request(url='http://example.com', extensions={'timeout': 2})
1329 assert req.extensions == {'timeout': 2}
1330
1331 def test_copy(self):
1332 req = Request(
1333 url='http://example.com',
1334 extensions={'cookiejar': CookieJar()},
1335 headers={'Accept-Encoding': 'br'},
1336 proxies={'http': 'http://127.0.0.1'},
1337 data=[b'123']
1338 )
1339 req_copy = req.copy()
1340 assert req_copy is not req
1341 assert req_copy.url == req.url
1342 assert req_copy.headers == req.headers
1343 assert req_copy.headers is not req.headers
1344 assert req_copy.proxies == req.proxies
1345 assert req_copy.proxies is not req.proxies
1346
1347 # Data is not able to be copied
1348 assert req_copy.data == req.data
1349 assert req_copy.data is req.data
1350
1351 # Shallow copy extensions
1352 assert req_copy.extensions is not req.extensions
1353 assert req_copy.extensions['cookiejar'] == req.extensions['cookiejar']
1354
1355 # Subclasses are copied by default
1356 class AnotherRequest(Request):
1357 pass
1358
1359 req = AnotherRequest(url='http://127.0.0.1')
1360 assert isinstance(req.copy(), AnotherRequest)
1361
1362 def test_url(self):
1363 req = Request(url='https://фtest.example.com/ some spaceв?ä=c',)
1364 assert req.url == 'https://xn--test-z6d.example.com/%20some%20space%D0%B2?%C3%A4=c'
1365
1366 assert Request(url='//example.com').url == 'http://example.com'
1367
1368 with pytest.raises(TypeError):
1369 Request(url='https://').url = None
1370
1371
1372 class TestResponse:
1373
1374 @pytest.mark.parametrize('reason,status,expected', [
1375 ('custom', 200, 'custom'),
1376 (None, 404, 'Not Found'), # fallback status
1377 ('', 403, 'Forbidden'),
1378 (None, 999, None)
1379 ])
1380 def test_reason(self, reason, status, expected):
1381 res = Response(io.BytesIO(b''), url='test://', headers={}, status=status, reason=reason)
1382 assert res.reason == expected
1383
1384 def test_headers(self):
1385 headers = Message()
1386 headers.add_header('Test', 'test')
1387 headers.add_header('Test', 'test2')
1388 headers.add_header('content-encoding', 'br')
1389 res = Response(io.BytesIO(b''), headers=headers, url='test://')
1390 assert res.headers.get_all('test') == ['test', 'test2']
1391 assert 'Content-Encoding' in res.headers
1392
1393 def test_get_header(self):
1394 headers = Message()
1395 headers.add_header('Set-Cookie', 'cookie1')
1396 headers.add_header('Set-cookie', 'cookie2')
1397 headers.add_header('Test', 'test')
1398 headers.add_header('Test', 'test2')
1399 res = Response(io.BytesIO(b''), headers=headers, url='test://')
1400 assert res.get_header('test') == 'test, test2'
1401 assert res.get_header('set-Cookie') == 'cookie1'
1402 assert res.get_header('notexist', 'default') == 'default'
1403
1404 def test_compat(self):
1405 res = Response(io.BytesIO(b''), url='test://', status=404, headers={'test': 'test'})
1406 with warnings.catch_warnings():
1407 warnings.simplefilter('ignore', category=DeprecationWarning)
1408 assert res.code == res.getcode() == res.status
1409 assert res.geturl() == res.url
1410 assert res.info() is res.headers
1411 assert res.getheader('test') == res.get_header('test')