]> jfr.im git - yt-dlp.git/blame - test/test_networking.py
[test:download] Test for `expected_exception`
[yt-dlp.git] / test / test_networking.py
CommitLineData
cc52de43 1#!/usr/bin/env python3
54007a45 2
83fda3c0
PH
3# Allow direct execution
4import os
5import sys
227bf1a3 6
7import pytest
f8271158 8
83fda3c0
PH
9sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
10
08916a49 11import gzip
227bf1a3 12import http.client
08916a49 13import http.cookiejar
54007a45 14import http.server
08916a49 15import io
16import pathlib
227bf1a3 17import random
f8271158 18import ssl
08916a49 19import tempfile
f8271158 20import threading
227bf1a3 21import time
08916a49 22import urllib.error
ac668111 23import urllib.request
227bf1a3 24import warnings
daafbf49 25import zlib
227bf1a3 26from email.message import Message
27from http.cookiejar import CookieJar
f8271158 28
227bf1a3 29from test.helper import FakeYDL, http_server_port
6148833f 30from yt_dlp.cookies import YoutubeDLCookieJar
daafbf49 31from yt_dlp.dependencies import brotli
227bf1a3 32from yt_dlp.networking import (
33 HEADRequest,
34 PUTRequest,
35 Request,
36 RequestDirector,
37 RequestHandler,
38 Response,
39)
40from yt_dlp.networking._urllib import UrllibRH
227bf1a3 41from yt_dlp.networking.exceptions import (
42 CertificateVerifyError,
43 HTTPError,
44 IncompleteRead,
45 NoSupportingHandlers,
46 RequestError,
47 SSLError,
48 TransportError,
49 UnsupportedRequest,
50)
51from yt_dlp.utils._utils import _YDLLogger as FakeLogger
52from yt_dlp.utils.networking import HTTPHeaderDict
83fda3c0
PH
53
54TEST_DIR = os.path.dirname(os.path.abspath(__file__))
55
03d8d4df 56
227bf1a3 57def _build_proxy_handler(name):
58 class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
59 proxy_name = name
60
61 def log_message(self, format, *args):
62 pass
63
64 def do_GET(self):
65 self.send_response(200)
66 self.send_header('Content-Type', 'text/plain; charset=utf-8')
67 self.end_headers()
68 self.wfile.write('{self.proxy_name}: {self.path}'.format(self=self).encode())
69 return HTTPTestRequestHandler
70
71
ac668111 72class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
08916a49 73 protocol_version = 'HTTP/1.1'
74
83fda3c0
PH
75 def log_message(self, format, *args):
76 pass
77
08916a49 78 def _headers(self):
227bf1a3 79 payload = str(self.headers).encode()
08916a49 80 self.send_response(200)
81 self.send_header('Content-Type', 'application/json')
82 self.send_header('Content-Length', str(len(payload)))
83 self.end_headers()
84 self.wfile.write(payload)
85
86 def _redirect(self):
87 self.send_response(int(self.path[len('/redirect_'):]))
88 self.send_header('Location', '/method')
89 self.send_header('Content-Length', '0')
90 self.end_headers()
91
92 def _method(self, method, payload=None):
93 self.send_response(200)
94 self.send_header('Content-Length', str(len(payload or '')))
95 self.send_header('Method', method)
96 self.end_headers()
97 if payload:
98 self.wfile.write(payload)
99
100 def _status(self, status):
101 payload = f'<html>{status} NOT FOUND</html>'.encode()
102 self.send_response(int(status))
103 self.send_header('Content-Type', 'text/html; charset=utf-8')
104 self.send_header('Content-Length', str(len(payload)))
105 self.end_headers()
106 self.wfile.write(payload)
107
108 def _read_data(self):
109 if 'Content-Length' in self.headers:
110 return self.rfile.read(int(self.headers['Content-Length']))
111
112 def do_POST(self):
227bf1a3 113 data = self._read_data() + str(self.headers).encode()
08916a49 114 if self.path.startswith('/redirect_'):
115 self._redirect()
116 elif self.path.startswith('/method'):
117 self._method('POST', data)
118 elif self.path.startswith('/headers'):
119 self._headers()
120 else:
121 self._status(404)
122
123 def do_HEAD(self):
124 if self.path.startswith('/redirect_'):
125 self._redirect()
126 elif self.path.startswith('/method'):
127 self._method('HEAD')
128 else:
129 self._status(404)
130
131 def do_PUT(self):
227bf1a3 132 data = self._read_data() + str(self.headers).encode()
08916a49 133 if self.path.startswith('/redirect_'):
134 self._redirect()
135 elif self.path.startswith('/method'):
136 self._method('PUT', data)
137 else:
138 self._status(404)
139
83fda3c0
PH
140 def do_GET(self):
141 if self.path == '/video.html':
08916a49 142 payload = b'<html><video src="/vid.mp4" /></html>'
83fda3c0
PH
143 self.send_response(200)
144 self.send_header('Content-Type', 'text/html; charset=utf-8')
227bf1a3 145 self.send_header('Content-Length', str(len(payload)))
83fda3c0 146 self.end_headers()
08916a49 147 self.wfile.write(payload)
83fda3c0 148 elif self.path == '/vid.mp4':
08916a49 149 payload = b'\x00\x00\x00\x00\x20\x66\x74[video]'
83fda3c0
PH
150 self.send_response(200)
151 self.send_header('Content-Type', 'video/mp4')
08916a49 152 self.send_header('Content-Length', str(len(payload)))
83fda3c0 153 self.end_headers()
08916a49 154 self.wfile.write(payload)
8c32e5dc 155 elif self.path == '/%E4%B8%AD%E6%96%87.html':
08916a49 156 payload = b'<html><video src="/vid.mp4" /></html>'
8c32e5dc
YCH
157 self.send_response(200)
158 self.send_header('Content-Type', 'text/html; charset=utf-8')
08916a49 159 self.send_header('Content-Length', str(len(payload)))
160 self.end_headers()
161 self.wfile.write(payload)
162 elif self.path == '/%c7%9f':
163 payload = b'<html><video src="/vid.mp4" /></html>'
164 self.send_response(200)
165 self.send_header('Content-Type', 'text/html; charset=utf-8')
166 self.send_header('Content-Length', str(len(payload)))
167 self.end_headers()
168 self.wfile.write(payload)
227bf1a3 169 elif self.path.startswith('/redirect_loop'):
170 self.send_response(301)
171 self.send_header('Location', self.path)
172 self.send_header('Content-Length', '0')
173 self.end_headers()
4bf91228 174 elif self.path == '/redirect_dotsegments':
175 self.send_response(301)
176 # redirect to /headers but with dot segments before
177 self.send_header('Location', '/a/b/./../../headers')
178 self.send_header('Content-Length', '0')
179 self.end_headers()
08916a49 180 elif self.path.startswith('/redirect_'):
181 self._redirect()
182 elif self.path.startswith('/method'):
227bf1a3 183 self._method('GET', str(self.headers).encode())
08916a49 184 elif self.path.startswith('/headers'):
185 self._headers()
f8b4bcc0 186 elif self.path.startswith('/308-to-headers'):
187 self.send_response(308)
188 self.send_header('Location', '/headers')
189 self.send_header('Content-Length', '0')
190 self.end_headers()
08916a49 191 elif self.path == '/trailing_garbage':
192 payload = b'<html><video src="/vid.mp4" /></html>'
193 self.send_response(200)
194 self.send_header('Content-Type', 'text/html; charset=utf-8')
195 self.send_header('Content-Encoding', 'gzip')
196 buf = io.BytesIO()
197 with gzip.GzipFile(fileobj=buf, mode='wb') as f:
198 f.write(payload)
199 compressed = buf.getvalue() + b'trailing garbage'
200 self.send_header('Content-Length', str(len(compressed)))
201 self.end_headers()
202 self.wfile.write(compressed)
203 elif self.path == '/302-non-ascii-redirect':
204 new_url = f'http://127.0.0.1:{http_server_port(self.server)}/中文.html'
205 self.send_response(301)
206 self.send_header('Location', new_url)
207 self.send_header('Content-Length', '0')
8c32e5dc 208 self.end_headers()
daafbf49 209 elif self.path == '/content-encoding':
210 encodings = self.headers.get('ytdl-encoding', '')
211 payload = b'<html><video src="/vid.mp4" /></html>'
212 for encoding in filter(None, (e.strip() for e in encodings.split(','))):
213 if encoding == 'br' and brotli:
214 payload = brotli.compress(payload)
215 elif encoding == 'gzip':
216 buf = io.BytesIO()
217 with gzip.GzipFile(fileobj=buf, mode='wb') as f:
218 f.write(payload)
219 payload = buf.getvalue()
220 elif encoding == 'deflate':
221 payload = zlib.compress(payload)
222 elif encoding == 'unsupported':
223 payload = b'raw'
224 break
225 else:
226 self._status(415)
227 return
228 self.send_response(200)
229 self.send_header('Content-Encoding', encodings)
230 self.send_header('Content-Length', str(len(payload)))
231 self.end_headers()
232 self.wfile.write(payload)
227bf1a3 233 elif self.path.startswith('/gen_'):
234 payload = b'<html></html>'
235 self.send_response(int(self.path[len('/gen_'):]))
236 self.send_header('Content-Type', 'text/html; charset=utf-8')
237 self.send_header('Content-Length', str(len(payload)))
238 self.end_headers()
239 self.wfile.write(payload)
240 elif self.path.startswith('/incompleteread'):
241 payload = b'<html></html>'
242 self.send_response(200)
243 self.send_header('Content-Type', 'text/html; charset=utf-8')
244 self.send_header('Content-Length', '234234')
245 self.end_headers()
246 self.wfile.write(payload)
247 self.finish()
248 elif self.path.startswith('/timeout_'):
249 time.sleep(int(self.path[len('/timeout_'):]))
250 self._headers()
251 elif self.path == '/source_address':
252 payload = str(self.client_address[0]).encode()
253 self.send_response(200)
254 self.send_header('Content-Type', 'text/html; charset=utf-8')
255 self.send_header('Content-Length', str(len(payload)))
256 self.end_headers()
257 self.wfile.write(payload)
258 self.finish()
83fda3c0 259 else:
08916a49 260 self._status(404)
261
262 def send_header(self, keyword, value):
263 """
264 Forcibly allow HTTP server to send non percent-encoded non-ASCII characters in headers.
265 This is against what is defined in RFC 3986, however we need to test we support this
266 since some sites incorrectly do this.
267 """
268 if keyword.lower() == 'connection':
269 return super().send_header(keyword, value)
270
271 if not hasattr(self, '_headers_buffer'):
272 self._headers_buffer = []
273
274 self._headers_buffer.append(f'{keyword}: {value}\r\n'.encode())
83fda3c0
PH
275
276
227bf1a3 277def validate_and_send(rh, req):
278 rh.validate(req)
279 return rh.send(req)
83fda3c0 280
83fda3c0 281
227bf1a3 282class TestRequestHandlerBase:
283 @classmethod
284 def setup_class(cls):
285 cls.http_httpd = http.server.ThreadingHTTPServer(
f19eae42 286 ('127.0.0.1', 0), HTTPTestRequestHandler)
227bf1a3 287 cls.http_port = http_server_port(cls.http_httpd)
288 cls.http_server_thread = threading.Thread(target=cls.http_httpd.serve_forever)
08916a49 289 # FIXME: we should probably stop the http server thread after each test
290 # See: https://github.com/yt-dlp/yt-dlp/pull/7094#discussion_r1199746041
227bf1a3 291 cls.http_server_thread.daemon = True
292 cls.http_server_thread.start()
08916a49 293
294 # HTTPS server
83fda3c0 295 certfn = os.path.join(TEST_DIR, 'testcert.pem')
227bf1a3 296 cls.https_httpd = http.server.ThreadingHTTPServer(
f19eae42 297 ('127.0.0.1', 0), HTTPTestRequestHandler)
b6dc37fe 298 sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
299 sslctx.load_cert_chain(certfn, None)
227bf1a3 300 cls.https_httpd.socket = sslctx.wrap_socket(cls.https_httpd.socket, server_side=True)
301 cls.https_port = http_server_port(cls.https_httpd)
302 cls.https_server_thread = threading.Thread(target=cls.https_httpd.serve_forever)
303 cls.https_server_thread.daemon = True
304 cls.https_server_thread.start()
305
306
227bf1a3 307class TestHTTPRequestHandler(TestRequestHandlerBase):
308 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
309 def test_verify_cert(self, handler):
310 with handler() as rh:
311 with pytest.raises(CertificateVerifyError):
312 validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
313
314 with handler(verify=False) as rh:
315 r = validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
316 assert r.status == 200
08916a49 317 r.close()
318
227bf1a3 319 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
320 def test_ssl_error(self, handler):
321 # HTTPS server with too old TLS version
322 # XXX: is there a better way to test this than to create a new server?
323 https_httpd = http.server.ThreadingHTTPServer(
324 ('127.0.0.1', 0), HTTPTestRequestHandler)
325 sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
326 https_httpd.socket = sslctx.wrap_socket(https_httpd.socket, server_side=True)
327 https_port = http_server_port(https_httpd)
328 https_server_thread = threading.Thread(target=https_httpd.serve_forever)
329 https_server_thread.daemon = True
330 https_server_thread.start()
331
332 with handler(verify=False) as rh:
333 with pytest.raises(SSLError, match='sslv3 alert handshake failure') as exc_info:
334 validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers'))
335 assert not issubclass(exc_info.type, CertificateVerifyError)
336
337 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
338 def test_percent_encode(self, handler):
339 with handler() as rh:
08916a49 340 # Unicode characters should be encoded with uppercase percent-encoding
227bf1a3 341 res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/中文.html'))
342 assert res.status == 200
08916a49 343 res.close()
344 # don't normalize existing percent encodings
227bf1a3 345 res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/%c7%9f'))
346 assert res.status == 200
08916a49 347 res.close()
348
4bf91228 349 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
350 def test_remove_dot_segments(self, handler):
351 with handler() as rh:
352 # This isn't a comprehensive test,
353 # but it should be enough to check whether the handler is removing dot segments
354 res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/a/b/./../../headers'))
355 assert res.status == 200
356 assert res.url == f'http://127.0.0.1:{self.http_port}/headers'
357 res.close()
358
359 res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_dotsegments'))
360 assert res.status == 200
361 assert res.url == f'http://127.0.0.1:{self.http_port}/headers'
362 res.close()
363
227bf1a3 364 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
365 def test_unicode_path_redirection(self, handler):
366 with handler() as rh:
367 r = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/302-non-ascii-redirect'))
368 assert r.url == f'http://127.0.0.1:{self.http_port}/%E4%B8%AD%E6%96%87.html'
08916a49 369 r.close()
370
227bf1a3 371 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
372 def test_raise_http_error(self, handler):
373 with handler() as rh:
374 for bad_status in (400, 500, 599, 302):
375 with pytest.raises(HTTPError):
376 validate_and_send(rh, Request('http://127.0.0.1:%d/gen_%d' % (self.http_port, bad_status)))
377
378 # Should not raise an error
379 validate_and_send(rh, Request('http://127.0.0.1:%d/gen_200' % self.http_port)).close()
380
381 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
382 def test_response_url(self, handler):
383 with handler() as rh:
384 # Response url should be that of the last url in redirect chain
385 res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_301'))
386 assert res.url == f'http://127.0.0.1:{self.http_port}/method'
387 res.close()
388 res2 = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_200'))
389 assert res2.url == f'http://127.0.0.1:{self.http_port}/gen_200'
390 res2.close()
391
392 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
393 def test_redirect(self, handler):
394 with handler() as rh:
395 def do_req(redirect_status, method, assert_no_content=False):
08916a49 396 data = b'testdata' if method in ('POST', 'PUT') else None
227bf1a3 397 res = validate_and_send(
398 rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_{redirect_status}', method=method, data=data))
399
400 headers = b''
401 data_sent = b''
402 if data is not None:
403 data_sent += res.read(len(data))
404 if data_sent != data:
405 headers += data_sent
406 data_sent = b''
407
408 headers += res.read()
409
410 if assert_no_content or data is None:
411 assert b'Content-Type' not in headers
412 assert b'Content-Length' not in headers
413 else:
414 assert b'Content-Type' in headers
415 assert b'Content-Length' in headers
416
417 return data_sent.decode(), res.headers.get('method', '')
08916a49 418
419 # A 303 must either use GET or HEAD for subsequent request
227bf1a3 420 assert do_req(303, 'POST', True) == ('', 'GET')
421 assert do_req(303, 'HEAD') == ('', 'HEAD')
08916a49 422
227bf1a3 423 assert do_req(303, 'PUT', True) == ('', 'GET')
08916a49 424
425 # 301 and 302 turn POST only into a GET
227bf1a3 426 assert do_req(301, 'POST', True) == ('', 'GET')
427 assert do_req(301, 'HEAD') == ('', 'HEAD')
428 assert do_req(302, 'POST', True) == ('', 'GET')
429 assert do_req(302, 'HEAD') == ('', 'HEAD')
08916a49 430
227bf1a3 431 assert do_req(301, 'PUT') == ('testdata', 'PUT')
432 assert do_req(302, 'PUT') == ('testdata', 'PUT')
08916a49 433
434 # 307 and 308 should not change method
435 for m in ('POST', 'PUT'):
227bf1a3 436 assert do_req(307, m) == ('testdata', m)
437 assert do_req(308, m) == ('testdata', m)
08916a49 438
227bf1a3 439 assert do_req(307, 'HEAD') == ('', 'HEAD')
440 assert do_req(308, 'HEAD') == ('', 'HEAD')
08916a49 441
442 # These should not redirect and instead raise an HTTPError
443 for code in (300, 304, 305, 306):
227bf1a3 444 with pytest.raises(HTTPError):
08916a49 445 do_req(code, 'GET')
446
227bf1a3 447 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
448 def test_request_cookie_header(self, handler):
f8b4bcc0 449 # We should accept a Cookie header being passed as in normal headers and handle it appropriately.
227bf1a3 450 with handler() as rh:
f8b4bcc0 451 # Specified Cookie header should be used
227bf1a3 452 res = validate_and_send(
453 rh, Request(
08916a49 454 f'http://127.0.0.1:{self.http_port}/headers',
227bf1a3 455 headers={'Cookie': 'test=test'})).read().decode()
456 assert 'Cookie: test=test' in res
08916a49 457
227bf1a3 458 # Specified Cookie header should be removed on any redirect
459 res = validate_and_send(
460 rh, Request(
461 f'http://127.0.0.1:{self.http_port}/308-to-headers',
462 headers={'Cookie': 'test=test'})).read().decode()
463 assert 'Cookie: test=test' not in res
464
465 # Specified Cookie header should override global cookiejar for that request
6148833f 466 cookiejar = YoutubeDLCookieJar()
227bf1a3 467 cookiejar.set_cookie(http.cookiejar.Cookie(
468 version=0, name='test', value='ytdlp', port=None, port_specified=False,
469 domain='127.0.0.1', domain_specified=True, domain_initial_dot=False, path='/',
470 path_specified=True, secure=False, expires=None, discard=False, comment=None,
471 comment_url=None, rest={}))
472
473 with handler(cookiejar=cookiejar) as rh:
474 data = validate_and_send(
475 rh, Request(f'http://127.0.0.1:{self.http_port}/headers', headers={'cookie': 'test=test'})).read()
476 assert b'Cookie: test=ytdlp' not in data
477 assert b'Cookie: test=test' in data
478
479 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
480 def test_redirect_loop(self, handler):
481 with handler() as rh:
482 with pytest.raises(HTTPError, match='redirect loop'):
483 validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_loop'))
484
485 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
486 def test_incompleteread(self, handler):
487 with handler(timeout=2) as rh:
488 with pytest.raises(IncompleteRead):
489 validate_and_send(rh, Request('http://127.0.0.1:%d/incompleteread' % self.http_port)).read()
490
491 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
492 def test_cookies(self, handler):
6148833f 493 cookiejar = YoutubeDLCookieJar()
227bf1a3 494 cookiejar.set_cookie(http.cookiejar.Cookie(
495 0, 'test', 'ytdlp', None, False, '127.0.0.1', True,
496 False, '/headers', True, False, None, False, None, None, {}))
497
498 with handler(cookiejar=cookiejar) as rh:
499 data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read()
500 assert b'Cookie: test=ytdlp' in data
501
502 # Per request
503 with handler() as rh:
504 data = validate_and_send(
505 rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions={'cookiejar': cookiejar})).read()
506 assert b'Cookie: test=ytdlp' in data
507
508 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
509 def test_headers(self, handler):
510
511 with handler(headers=HTTPHeaderDict({'test1': 'test', 'test2': 'test2'})) as rh:
512 # Global Headers
513 data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read()
514 assert b'Test1: test' in data
515
516 # Per request headers, merged with global
517 data = validate_and_send(rh, Request(
518 f'http://127.0.0.1:{self.http_port}/headers', headers={'test2': 'changed', 'test3': 'test3'})).read()
519 assert b'Test1: test' in data
520 assert b'Test2: changed' in data
521 assert b'Test2: test2' not in data
522 assert b'Test3: test3' in data
523
524 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
525 def test_timeout(self, handler):
526 with handler() as rh:
527 # Default timeout is 20 seconds, so this should go through
528 validate_and_send(
529 rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_3'))
530
531 with handler(timeout=0.5) as rh:
532 with pytest.raises(TransportError):
533 validate_and_send(
534 rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1'))
535
536 # Per request timeout, should override handler timeout
537 validate_and_send(
538 rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1', extensions={'timeout': 4}))
539
540 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
541 def test_source_address(self, handler):
542 source_address = f'127.0.0.{random.randint(5, 255)}'
543 with handler(source_address=source_address) as rh:
544 data = validate_and_send(
545 rh, Request(f'http://127.0.0.1:{self.http_port}/source_address')).read().decode()
546 assert source_address == data
547
548 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
549 def test_gzip_trailing_garbage(self, handler):
550 with handler() as rh:
551 data = validate_and_send(rh, Request(f'http://localhost:{self.http_port}/trailing_garbage')).read().decode()
552 assert data == '<html><video src="/vid.mp4" /></html>'
553
554 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
555 @pytest.mark.skipif(not brotli, reason='brotli support is not installed')
556 def test_brotli(self, handler):
557 with handler() as rh:
558 res = validate_and_send(
559 rh, Request(
daafbf49 560 f'http://127.0.0.1:{self.http_port}/content-encoding',
561 headers={'ytdl-encoding': 'br'}))
227bf1a3 562 assert res.headers.get('Content-Encoding') == 'br'
563 assert res.read() == b'<html><video src="/vid.mp4" /></html>'
564
565 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
566 def test_deflate(self, handler):
567 with handler() as rh:
568 res = validate_and_send(
569 rh, Request(
daafbf49 570 f'http://127.0.0.1:{self.http_port}/content-encoding',
571 headers={'ytdl-encoding': 'deflate'}))
227bf1a3 572 assert res.headers.get('Content-Encoding') == 'deflate'
573 assert res.read() == b'<html><video src="/vid.mp4" /></html>'
574
575 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
576 def test_gzip(self, handler):
577 with handler() as rh:
578 res = validate_and_send(
579 rh, Request(
daafbf49 580 f'http://127.0.0.1:{self.http_port}/content-encoding',
581 headers={'ytdl-encoding': 'gzip'}))
227bf1a3 582 assert res.headers.get('Content-Encoding') == 'gzip'
583 assert res.read() == b'<html><video src="/vid.mp4" /></html>'
daafbf49 584
227bf1a3 585 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
586 def test_multiple_encodings(self, handler):
587 with handler() as rh:
daafbf49 588 for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'):
227bf1a3 589 res = validate_and_send(
590 rh, Request(
daafbf49 591 f'http://127.0.0.1:{self.http_port}/content-encoding',
592 headers={'ytdl-encoding': pair}))
227bf1a3 593 assert res.headers.get('Content-Encoding') == pair
594 assert res.read() == b'<html><video src="/vid.mp4" /></html>'
595
596 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
597 def test_unsupported_encoding(self, handler):
598 with handler() as rh:
599 res = validate_and_send(
600 rh, Request(
daafbf49 601 f'http://127.0.0.1:{self.http_port}/content-encoding',
602 headers={'ytdl-encoding': 'unsupported'}))
227bf1a3 603 assert res.headers.get('Content-Encoding') == 'unsupported'
604 assert res.read() == b'raw'
605
606 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
607 def test_read(self, handler):
608 with handler() as rh:
609 res = validate_and_send(
610 rh, Request(f'http://127.0.0.1:{self.http_port}/headers'))
611 assert res.readable()
612 assert res.read(1) == b'H'
613 assert res.read(3) == b'ost'
614
615
616class TestHTTPProxy(TestRequestHandlerBase):
617 @classmethod
618 def setup_class(cls):
619 super().setup_class()
620 # HTTP Proxy server
621 cls.proxy = http.server.ThreadingHTTPServer(
622 ('127.0.0.1', 0), _build_proxy_handler('normal'))
623 cls.proxy_port = http_server_port(cls.proxy)
624 cls.proxy_thread = threading.Thread(target=cls.proxy.serve_forever)
625 cls.proxy_thread.daemon = True
626 cls.proxy_thread.start()
daafbf49 627
227bf1a3 628 # Geo proxy server
629 cls.geo_proxy = http.server.ThreadingHTTPServer(
630 ('127.0.0.1', 0), _build_proxy_handler('geo'))
631 cls.geo_port = http_server_port(cls.geo_proxy)
632 cls.geo_proxy_thread = threading.Thread(target=cls.geo_proxy.serve_forever)
633 cls.geo_proxy_thread.daemon = True
634 cls.geo_proxy_thread.start()
635
636 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
637 def test_http_proxy(self, handler):
638 http_proxy = f'http://127.0.0.1:{self.proxy_port}'
639 geo_proxy = f'http://127.0.0.1:{self.geo_port}'
640
641 # Test global http proxy
642 # Test per request http proxy
643 # Test per request http proxy disables proxy
644 url = 'http://foo.com/bar'
01218f91 645
227bf1a3 646 # Global HTTP proxy
647 with handler(proxies={'http': http_proxy}) as rh:
648 res = validate_and_send(rh, Request(url)).read().decode()
649 assert res == f'normal: {url}'
650
651 # Per request proxy overrides global
652 res = validate_and_send(rh, Request(url, proxies={'http': geo_proxy})).read().decode()
653 assert res == f'geo: {url}'
654
655 # and setting to None disables all proxies for that request
656 real_url = f'http://127.0.0.1:{self.http_port}/headers'
657 res = validate_and_send(
658 rh, Request(real_url, proxies={'http': None})).read().decode()
659 assert res != f'normal: {real_url}'
660 assert 'Accept' in res
661
662 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
663 def test_noproxy(self, handler):
664 with handler(proxies={'proxy': f'http://127.0.0.1:{self.proxy_port}'}) as rh:
665 # NO_PROXY
666 for no_proxy in (f'127.0.0.1:{self.http_port}', '127.0.0.1', 'localhost'):
667 nop_response = validate_and_send(
668 rh, Request(f'http://127.0.0.1:{self.http_port}/headers', proxies={'no': no_proxy})).read().decode(
669 'utf-8')
670 assert 'Accept' in nop_response
671
672 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
673 def test_allproxy(self, handler):
674 url = 'http://foo.com/bar'
675 with handler() as rh:
676 response = validate_and_send(rh, Request(url, proxies={'all': f'http://127.0.0.1:{self.proxy_port}'})).read().decode(
677 'utf-8')
678 assert response == f'normal: {url}'
679
680 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
681 def test_http_proxy_with_idn(self, handler):
682 with handler(proxies={
683 'http': f'http://127.0.0.1:{self.proxy_port}',
684 }) as rh:
685 url = 'http://中文.tw/'
686 response = rh.send(Request(url)).read().decode()
687 # b'xn--fiq228c' is '中文'.encode('idna')
688 assert response == 'normal: http://xn--fiq228c.tw/'
689
690
691class TestClientCertificate:
692
693 @classmethod
694 def setup_class(cls):
bb58c9ed 695 certfn = os.path.join(TEST_DIR, 'testcert.pem')
227bf1a3 696 cls.certdir = os.path.join(TEST_DIR, 'testdata', 'certificate')
697 cacertfn = os.path.join(cls.certdir, 'ca.crt')
698 cls.httpd = http.server.ThreadingHTTPServer(('127.0.0.1', 0), HTTPTestRequestHandler)
bb58c9ed 699 sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
700 sslctx.verify_mode = ssl.CERT_REQUIRED
701 sslctx.load_verify_locations(cafile=cacertfn)
702 sslctx.load_cert_chain(certfn, None)
227bf1a3 703 cls.httpd.socket = sslctx.wrap_socket(cls.httpd.socket, server_side=True)
704 cls.port = http_server_port(cls.httpd)
705 cls.server_thread = threading.Thread(target=cls.httpd.serve_forever)
706 cls.server_thread.daemon = True
707 cls.server_thread.start()
708
709 def _run_test(self, handler, **handler_kwargs):
710 with handler(
bb58c9ed 711 # Disable client-side validation of unacceptable self-signed testcert.pem
712 # The test is of a check on the server side, so unaffected
227bf1a3 713 verify=False,
714 **handler_kwargs,
715 ) as rh:
716 validate_and_send(rh, Request(f'https://127.0.0.1:{self.port}/video.html')).read().decode()
717
718 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
719 def test_certificate_combined_nopass(self, handler):
720 self._run_test(handler, client_cert={
721 'client_certificate': os.path.join(self.certdir, 'clientwithkey.crt'),
bb58c9ed 722 })
bb58c9ed 723
227bf1a3 724 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
725 def test_certificate_nocombined_nopass(self, handler):
726 self._run_test(handler, client_cert={
727 'client_certificate': os.path.join(self.certdir, 'client.crt'),
728 'client_certificate_key': os.path.join(self.certdir, 'client.key'),
729 })
bb58c9ed 730
227bf1a3 731 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
732 def test_certificate_combined_pass(self, handler):
733 self._run_test(handler, client_cert={
734 'client_certificate': os.path.join(self.certdir, 'clientwithencryptedkey.crt'),
735 'client_certificate_password': 'foobar',
736 })
bb58c9ed 737
227bf1a3 738 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
739 def test_certificate_nocombined_pass(self, handler):
740 self._run_test(handler, client_cert={
741 'client_certificate': os.path.join(self.certdir, 'client.crt'),
742 'client_certificate_key': os.path.join(self.certdir, 'clientencrypted.key'),
743 'client_certificate_password': 'foobar',
744 })
bb58c9ed 745
bb58c9ed 746
227bf1a3 747class TestUrllibRequestHandler(TestRequestHandlerBase):
748 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
749 def test_file_urls(self, handler):
750 # See https://github.com/ytdl-org/youtube-dl/issues/8227
751 tf = tempfile.NamedTemporaryFile(delete=False)
752 tf.write(b'foobar')
753 tf.close()
754 req = Request(pathlib.Path(tf.name).as_uri())
755 with handler() as rh:
756 with pytest.raises(UnsupportedRequest):
757 rh.validate(req)
758
759 # Test that urllib never loaded FileHandler
760 with pytest.raises(TransportError):
761 rh.send(req)
762
763 with handler(enable_file_urls=True) as rh:
764 res = validate_and_send(rh, req)
765 assert res.read() == b'foobar'
766 res.close()
bb58c9ed 767
227bf1a3 768 os.unlink(tf.name)
01218f91 769
227bf1a3 770 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
771 def test_http_error_returns_content(self, handler):
772 # urllib HTTPError will try close the underlying response if reference to the HTTPError object is lost
773 def get_response():
774 with handler() as rh:
775 # headers url
776 try:
777 validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_404'))
778 except HTTPError as e:
779 return e.response
780
781 assert get_response().read() == b'<html></html>'
782
783 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
784 def test_verify_cert_error_text(self, handler):
785 # Check the output of the error message
786 with handler() as rh:
787 with pytest.raises(
788 CertificateVerifyError,
789 match=r'\[SSL: CERTIFICATE_VERIFY_FAILED\] certificate verify failed: self.signed certificate'
790 ):
791 validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
792
793 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
95abea9a 794 @pytest.mark.parametrize('req,match,version_check', [
795 # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1256
796 # bpo-39603: Check implemented in 3.7.9+, 3.8.5+
797 (
798 Request('http://127.0.0.1', method='GET\n'),
799 'method can\'t contain control characters',
800 lambda v: v < (3, 7, 9) or (3, 8, 0) <= v < (3, 8, 5)
801 ),
802 # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1265
803 # bpo-38576: Check implemented in 3.7.8+, 3.8.3+
804 (
805 Request('http://127.0.0. 1', method='GET'),
806 'URL can\'t contain control characters',
807 lambda v: v < (3, 7, 8) or (3, 8, 0) <= v < (3, 8, 3)
808 ),
809 # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1288C31-L1288C50
810 (Request('http://127.0.0.1', headers={'foo\n': 'bar'}), 'Invalid header name', None),
811 ])
812 def test_httplib_validation_errors(self, handler, req, match, version_check):
813 if version_check and version_check(sys.version_info):
814 pytest.skip(f'Python {sys.version} version does not have the required validation for this test.')
227bf1a3 815
95abea9a 816 with handler() as rh:
817 with pytest.raises(RequestError, match=match) as exc_info:
818 validate_and_send(rh, req)
227bf1a3 819 assert not isinstance(exc_info.value, TransportError)
820
821
86aea0d3 822def run_validation(handler, error, req, **handler_kwargs):
227bf1a3 823 with handler(**handler_kwargs) as rh:
86aea0d3 824 if error:
825 with pytest.raises(error):
227bf1a3 826 rh.validate(req)
827 else:
828 rh.validate(req)
829
830
831class TestRequestHandlerValidation:
832
833 class ValidationRH(RequestHandler):
834 def _send(self, request):
835 raise RequestError('test')
836
837 class NoCheckRH(ValidationRH):
838 _SUPPORTED_FEATURES = None
839 _SUPPORTED_PROXY_SCHEMES = None
840 _SUPPORTED_URL_SCHEMES = None
841
86aea0d3 842 def _check_extensions(self, extensions):
843 extensions.clear()
844
227bf1a3 845 class HTTPSupportedRH(ValidationRH):
846 _SUPPORTED_URL_SCHEMES = ('http',)
847
848 URL_SCHEME_TESTS = [
849 # scheme, expected to fail, handler kwargs
850 ('Urllib', [
851 ('http', False, {}),
852 ('https', False, {}),
853 ('data', False, {}),
854 ('ftp', False, {}),
86aea0d3 855 ('file', UnsupportedRequest, {}),
227bf1a3 856 ('file', False, {'enable_file_urls': True}),
857 ]),
858 (NoCheckRH, [('http', False, {})]),
86aea0d3 859 (ValidationRH, [('http', UnsupportedRequest, {})])
227bf1a3 860 ]
861
862 PROXY_SCHEME_TESTS = [
863 # scheme, expected to fail
864 ('Urllib', [
865 ('http', False),
86aea0d3 866 ('https', UnsupportedRequest),
227bf1a3 867 ('socks4', False),
868 ('socks4a', False),
869 ('socks5', False),
870 ('socks5h', False),
86aea0d3 871 ('socks', UnsupportedRequest),
227bf1a3 872 ]),
873 (NoCheckRH, [('http', False)]),
86aea0d3 874 (HTTPSupportedRH, [('http', UnsupportedRequest)]),
227bf1a3 875 ]
876
877 PROXY_KEY_TESTS = [
878 # key, expected to fail
879 ('Urllib', [
880 ('all', False),
881 ('unrelated', False),
882 ]),
883 (NoCheckRH, [('all', False)]),
86aea0d3 884 (HTTPSupportedRH, [('all', UnsupportedRequest)]),
885 (HTTPSupportedRH, [('no', UnsupportedRequest)]),
886 ]
887
888 EXTENSION_TESTS = [
889 ('Urllib', [
890 ({'cookiejar': 'notacookiejar'}, AssertionError),
6148833f 891 ({'cookiejar': YoutubeDLCookieJar()}, False),
892 ({'cookiejar': CookieJar()}, AssertionError),
86aea0d3 893 ({'timeout': 1}, False),
894 ({'timeout': 'notatimeout'}, AssertionError),
895 ({'unsupported': 'value'}, UnsupportedRequest),
896 ]),
897 (NoCheckRH, [
898 ({'cookiejar': 'notacookiejar'}, False),
899 ({'somerandom': 'test'}, False), # but any extension is allowed through
900 ]),
227bf1a3 901 ]
902
903 @pytest.mark.parametrize('handler,scheme,fail,handler_kwargs', [
904 (handler_tests[0], scheme, fail, handler_kwargs)
905 for handler_tests in URL_SCHEME_TESTS
906 for scheme, fail, handler_kwargs in handler_tests[1]
907
908 ], indirect=['handler'])
909 def test_url_scheme(self, handler, scheme, fail, handler_kwargs):
910 run_validation(handler, fail, Request(f'{scheme}://'), **(handler_kwargs or {}))
911
912 @pytest.mark.parametrize('handler,fail', [('Urllib', False)], indirect=['handler'])
913 def test_no_proxy(self, handler, fail):
914 run_validation(handler, fail, Request('http://', proxies={'no': '127.0.0.1,github.com'}))
915 run_validation(handler, fail, Request('http://'), proxies={'no': '127.0.0.1,github.com'})
916
917 @pytest.mark.parametrize('handler,proxy_key,fail', [
918 (handler_tests[0], proxy_key, fail)
919 for handler_tests in PROXY_KEY_TESTS
920 for proxy_key, fail in handler_tests[1]
921 ], indirect=['handler'])
922 def test_proxy_key(self, handler, proxy_key, fail):
923 run_validation(handler, fail, Request('http://', proxies={proxy_key: 'http://example.com'}))
924 run_validation(handler, fail, Request('http://'), proxies={proxy_key: 'http://example.com'})
925
926 @pytest.mark.parametrize('handler,scheme,fail', [
927 (handler_tests[0], scheme, fail)
928 for handler_tests in PROXY_SCHEME_TESTS
929 for scheme, fail in handler_tests[1]
930 ], indirect=['handler'])
931 def test_proxy_scheme(self, handler, scheme, fail):
932 run_validation(handler, fail, Request('http://', proxies={'http': f'{scheme}://example.com'}))
933 run_validation(handler, fail, Request('http://'), proxies={'http': f'{scheme}://example.com'})
934
935 @pytest.mark.parametrize('handler', ['Urllib', HTTPSupportedRH], indirect=True)
936 def test_empty_proxy(self, handler):
937 run_validation(handler, False, Request('http://', proxies={'http': None}))
938 run_validation(handler, False, Request('http://'), proxies={'http': None})
939
bbeacff7 940 @pytest.mark.parametrize('proxy_url', ['//example.com', 'example.com', '127.0.0.1', '/a/b/c'])
227bf1a3 941 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
bbeacff7 942 def test_invalid_proxy_url(self, handler, proxy_url):
943 run_validation(handler, UnsupportedRequest, Request('http://', proxies={'http': proxy_url}))
227bf1a3 944
86aea0d3 945 @pytest.mark.parametrize('handler,extensions,fail', [
946 (handler_tests[0], extensions, fail)
947 for handler_tests in EXTENSION_TESTS
948 for extensions, fail in handler_tests[1]
949 ], indirect=['handler'])
950 def test_extension(self, handler, extensions, fail):
951 run_validation(
952 handler, fail, Request('http://', extensions=extensions))
227bf1a3 953
954 def test_invalid_request_type(self):
955 rh = self.ValidationRH(logger=FakeLogger())
956 for method in (rh.validate, rh.send):
957 with pytest.raises(TypeError, match='Expected an instance of Request'):
958 method('not a request')
959
960
961class FakeResponse(Response):
962 def __init__(self, request):
963 # XXX: we could make request part of standard response interface
964 self.request = request
965 super().__init__(fp=io.BytesIO(b''), headers={}, url=request.url)
966
967
968class FakeRH(RequestHandler):
969
970 def _validate(self, request):
971 return
972
973 def _send(self, request: Request):
974 if request.url.startswith('ssl://'):
975 raise SSLError(request.url[len('ssl://'):])
976 return FakeResponse(request)
977
978
979class FakeRHYDL(FakeYDL):
980 def __init__(self, *args, **kwargs):
981 super().__init__(*args, **kwargs)
982 self._request_director = self.build_request_director([FakeRH])
983
984
985class TestRequestDirector:
986
987 def test_handler_operations(self):
988 director = RequestDirector(logger=FakeLogger())
989 handler = FakeRH(logger=FakeLogger())
990 director.add_handler(handler)
991 assert director.handlers.get(FakeRH.RH_KEY) is handler
992
993 # Handler should overwrite
994 handler2 = FakeRH(logger=FakeLogger())
995 director.add_handler(handler2)
996 assert director.handlers.get(FakeRH.RH_KEY) is not handler
997 assert director.handlers.get(FakeRH.RH_KEY) is handler2
998 assert len(director.handlers) == 1
999
1000 class AnotherFakeRH(FakeRH):
01218f91 1001 pass
227bf1a3 1002 director.add_handler(AnotherFakeRH(logger=FakeLogger()))
1003 assert len(director.handlers) == 2
1004 assert director.handlers.get(AnotherFakeRH.RH_KEY).RH_KEY == AnotherFakeRH.RH_KEY
01218f91 1005
227bf1a3 1006 director.handlers.pop(FakeRH.RH_KEY, None)
1007 assert director.handlers.get(FakeRH.RH_KEY) is None
1008 assert len(director.handlers) == 1
01218f91 1009
227bf1a3 1010 # RequestErrors should passthrough
1011 with pytest.raises(SSLError):
1012 director.send(Request('ssl://something'))
01218f91 1013
227bf1a3 1014 def test_send(self):
1015 director = RequestDirector(logger=FakeLogger())
1016 with pytest.raises(RequestError):
1017 director.send(Request('any://'))
1018 director.add_handler(FakeRH(logger=FakeLogger()))
1019 assert isinstance(director.send(Request('http://')), FakeResponse)
01218f91 1020
227bf1a3 1021 def test_unsupported_handlers(self):
227bf1a3 1022 class SupportedRH(RequestHandler):
1023 _SUPPORTED_URL_SCHEMES = ['http']
01218f91 1024
227bf1a3 1025 def _send(self, request: Request):
1026 return Response(fp=io.BytesIO(b'supported'), headers={}, url=request.url)
efbed08d 1027
db7b054a 1028 director = RequestDirector(logger=FakeLogger())
227bf1a3 1029 director.add_handler(SupportedRH(logger=FakeLogger()))
db7b054a 1030 director.add_handler(FakeRH(logger=FakeLogger()))
1031
1032 # First should take preference
227bf1a3 1033 assert director.send(Request('http://')).read() == b'supported'
1034 assert director.send(Request('any://')).read() == b''
582be358 1035
227bf1a3 1036 director.handlers.pop(FakeRH.RH_KEY)
1037 with pytest.raises(NoSupportingHandlers):
1038 director.send(Request('any://'))
1039
1040 def test_unexpected_error(self):
1041 director = RequestDirector(logger=FakeLogger())
1042
1043 class UnexpectedRH(FakeRH):
1044 def _send(self, request: Request):
1045 raise TypeError('something')
1046
1047 director.add_handler(UnexpectedRH(logger=FakeLogger))
1048 with pytest.raises(NoSupportingHandlers, match=r'1 unexpected error'):
1049 director.send(Request('any://'))
1050
1051 director.handlers.clear()
1052 assert len(director.handlers) == 0
1053
1054 # Should not be fatal
1055 director.add_handler(FakeRH(logger=FakeLogger()))
1056 director.add_handler(UnexpectedRH(logger=FakeLogger))
1057 assert director.send(Request('any://'))
1058
db7b054a 1059 def test_preference(self):
1060 director = RequestDirector(logger=FakeLogger())
1061 director.add_handler(FakeRH(logger=FakeLogger()))
1062
1063 class SomeRH(RequestHandler):
1064 _SUPPORTED_URL_SCHEMES = ['http']
1065
1066 def _send(self, request: Request):
1067 return Response(fp=io.BytesIO(b'supported'), headers={}, url=request.url)
1068
1069 def some_preference(rh, request):
1070 return (0 if not isinstance(rh, SomeRH)
1071 else 100 if 'prefer' in request.headers
1072 else -1)
1073
1074 director.add_handler(SomeRH(logger=FakeLogger()))
1075 director.preferences.add(some_preference)
1076
1077 assert director.send(Request('http://')).read() == b''
1078 assert director.send(Request('http://', headers={'prefer': '1'})).read() == b'supported'
1079
227bf1a3 1080
1081# XXX: do we want to move this to test_YoutubeDL.py?
1082class TestYoutubeDLNetworking:
1083
1084 @staticmethod
1085 def build_handler(ydl, handler: RequestHandler = FakeRH):
1086 return ydl.build_request_director([handler]).handlers.get(handler.RH_KEY)
1087
1088 def test_compat_opener(self):
08916a49 1089 with FakeYDL() as ydl:
227bf1a3 1090 with warnings.catch_warnings():
1091 warnings.simplefilter('ignore', category=DeprecationWarning)
1092 assert isinstance(ydl._opener, urllib.request.OpenerDirector)
1093
1094 @pytest.mark.parametrize('proxy,expected', [
1095 ('http://127.0.0.1:8080', {'all': 'http://127.0.0.1:8080'}),
1096 ('', {'all': '__noproxy__'}),
1097 (None, {'http': 'http://127.0.0.1:8081', 'https': 'http://127.0.0.1:8081'}) # env, set https
1098 ])
1099 def test_proxy(self, proxy, expected):
1100 old_http_proxy = os.environ.get('HTTP_PROXY')
1101 try:
1102 os.environ['HTTP_PROXY'] = 'http://127.0.0.1:8081' # ensure that provided proxies override env
1103 with FakeYDL({'proxy': proxy}) as ydl:
1104 assert ydl.proxies == expected
1105 finally:
1106 if old_http_proxy:
1107 os.environ['HTTP_PROXY'] = old_http_proxy
1108
1109 def test_compat_request(self):
1110 with FakeRHYDL() as ydl:
1111 assert ydl.urlopen('test://')
1112 urllib_req = urllib.request.Request('http://foo.bar', data=b'test', method='PUT', headers={'X-Test': '1'})
1113 urllib_req.add_unredirected_header('Cookie', 'bob=bob')
1114 urllib_req.timeout = 2
3d2623a8 1115 with warnings.catch_warnings():
1116 warnings.simplefilter('ignore', category=DeprecationWarning)
1117 req = ydl.urlopen(urllib_req).request
1118 assert req.url == urllib_req.get_full_url()
1119 assert req.data == urllib_req.data
1120 assert req.method == urllib_req.get_method()
1121 assert 'X-Test' in req.headers
1122 assert 'Cookie' in req.headers
1123 assert req.extensions.get('timeout') == 2
227bf1a3 1124
1125 with pytest.raises(AssertionError):
1126 ydl.urlopen(None)
1127
1128 def test_extract_basic_auth(self):
1129 with FakeRHYDL() as ydl:
1130 res = ydl.urlopen(Request('http://user:pass@foo.bar'))
1131 assert res.request.headers['Authorization'] == 'Basic dXNlcjpwYXNz'
1132
1133 def test_sanitize_url(self):
1134 with FakeRHYDL() as ydl:
1135 res = ydl.urlopen(Request('httpss://foo.bar'))
1136 assert res.request.url == 'https://foo.bar'
1137
1138 def test_file_urls_error(self):
1139 # use urllib handler
1140 with FakeYDL() as ydl:
1141 with pytest.raises(RequestError, match=r'file:// URLs are disabled by default'):
1142 ydl.urlopen('file://')
1143
1144 def test_legacy_server_connect_error(self):
1145 with FakeRHYDL() as ydl:
1146 for error in ('UNSAFE_LEGACY_RENEGOTIATION_DISABLED', 'SSLV3_ALERT_HANDSHAKE_FAILURE'):
1147 with pytest.raises(RequestError, match=r'Try using --legacy-server-connect'):
1148 ydl.urlopen(f'ssl://{error}')
1149
1150 with pytest.raises(SSLError, match='testerror'):
1151 ydl.urlopen('ssl://testerror')
1152
1153 @pytest.mark.parametrize('proxy_key,proxy_url,expected', [
1154 ('http', '__noproxy__', None),
1155 ('no', '127.0.0.1,foo.bar', '127.0.0.1,foo.bar'),
1156 ('https', 'example.com', 'http://example.com'),
bbeacff7 1157 ('https', '//example.com', 'http://example.com'),
227bf1a3 1158 ('https', 'socks5://example.com', 'socks5h://example.com'),
1159 ('http', 'socks://example.com', 'socks4://example.com'),
1160 ('http', 'socks4://example.com', 'socks4://example.com'),
bbeacff7 1161 ('unrelated', '/bad/proxy', '/bad/proxy'), # clean_proxies should ignore bad proxies
227bf1a3 1162 ])
1163 def test_clean_proxy(self, proxy_key, proxy_url, expected):
1164 # proxies should be cleaned in urlopen()
1165 with FakeRHYDL() as ydl:
1166 req = ydl.urlopen(Request('test://', proxies={proxy_key: proxy_url})).request
1167 assert req.proxies[proxy_key] == expected
1168
1169 # and should also be cleaned when building the handler
1170 env_key = f'{proxy_key.upper()}_PROXY'
1171 old_env_proxy = os.environ.get(env_key)
1172 try:
1173 os.environ[env_key] = proxy_url # ensure that provided proxies override env
1174 with FakeYDL() as ydl:
1175 rh = self.build_handler(ydl)
1176 assert rh.proxies[proxy_key] == expected
1177 finally:
1178 if old_env_proxy:
1179 os.environ[env_key] = old_env_proxy
1180
1181 def test_clean_proxy_header(self):
1182 with FakeRHYDL() as ydl:
1183 req = ydl.urlopen(Request('test://', headers={'ytdl-request-proxy': '//foo.bar'})).request
1184 assert 'ytdl-request-proxy' not in req.headers
1185 assert req.proxies == {'all': 'http://foo.bar'}
1186
1187 with FakeYDL({'http_headers': {'ytdl-request-proxy': '//foo.bar'}}) as ydl:
1188 rh = self.build_handler(ydl)
1189 assert 'ytdl-request-proxy' not in rh.headers
1190 assert rh.proxies == {'all': 'http://foo.bar'}
1191
1192 def test_clean_header(self):
1193 with FakeRHYDL() as ydl:
1194 res = ydl.urlopen(Request('test://', headers={'Youtubedl-no-compression': True}))
1195 assert 'Youtubedl-no-compression' not in res.request.headers
1196 assert res.request.headers.get('Accept-Encoding') == 'identity'
1197
1198 with FakeYDL({'http_headers': {'Youtubedl-no-compression': True}}) as ydl:
1199 rh = self.build_handler(ydl)
1200 assert 'Youtubedl-no-compression' not in rh.headers
1201 assert rh.headers.get('Accept-Encoding') == 'identity'
1202
1203 def test_build_handler_params(self):
1204 with FakeYDL({
1205 'http_headers': {'test': 'testtest'},
1206 'socket_timeout': 2,
1207 'proxy': 'http://127.0.0.1:8080',
1208 'source_address': '127.0.0.45',
1209 'debug_printtraffic': True,
1210 'compat_opts': ['no-certifi'],
1211 'nocheckcertificate': True,
75dc8e67 1212 'legacyserverconnect': True,
227bf1a3 1213 }) as ydl:
1214 rh = self.build_handler(ydl)
1215 assert rh.headers.get('test') == 'testtest'
1216 assert 'Accept' in rh.headers # ensure std_headers are still there
1217 assert rh.timeout == 2
1218 assert rh.proxies.get('all') == 'http://127.0.0.1:8080'
1219 assert rh.source_address == '127.0.0.45'
1220 assert rh.verbose is True
1221 assert rh.prefer_system_certs is True
1222 assert rh.verify is False
1223 assert rh.legacy_ssl_support is True
1224
1225 @pytest.mark.parametrize('ydl_params', [
1226 {'client_certificate': 'fakecert.crt'},
1227 {'client_certificate': 'fakecert.crt', 'client_certificate_key': 'fakekey.key'},
1228 {'client_certificate': 'fakecert.crt', 'client_certificate_key': 'fakekey.key', 'client_certificate_password': 'foobar'},
1229 {'client_certificate_key': 'fakekey.key', 'client_certificate_password': 'foobar'},
1230 ])
1231 def test_client_certificate(self, ydl_params):
1232 with FakeYDL(ydl_params) as ydl:
1233 rh = self.build_handler(ydl)
1234 assert rh._client_cert == ydl_params # XXX: Too bound to implementation
1235
1236 def test_urllib_file_urls(self):
1237 with FakeYDL({'enable_file_urls': False}) as ydl:
1238 rh = self.build_handler(ydl, UrllibRH)
1239 assert rh.enable_file_urls is False
08916a49 1240
227bf1a3 1241 with FakeYDL({'enable_file_urls': True}) as ydl:
1242 rh = self.build_handler(ydl, UrllibRH)
1243 assert rh.enable_file_urls is True
1244
1245
1246class TestRequest:
1247
1248 def test_query(self):
1249 req = Request('http://example.com?q=something', query={'v': 'xyz'})
1250 assert req.url == 'http://example.com?q=something&v=xyz'
1251
1252 req.update(query={'v': '123'})
1253 assert req.url == 'http://example.com?q=something&v=123'
1254 req.update(url='http://example.com', query={'v': 'xyz'})
1255 assert req.url == 'http://example.com?v=xyz'
1256
1257 def test_method(self):
1258 req = Request('http://example.com')
1259 assert req.method == 'GET'
1260 req.data = b'test'
1261 assert req.method == 'POST'
1262 req.data = None
1263 assert req.method == 'GET'
1264 req.data = b'test2'
1265 req.method = 'PUT'
1266 assert req.method == 'PUT'
1267 req.data = None
1268 assert req.method == 'PUT'
1269 with pytest.raises(TypeError):
1270 req.method = 1
1271
1272 def test_request_helpers(self):
1273 assert HEADRequest('http://example.com').method == 'HEAD'
1274 assert PUTRequest('http://example.com').method == 'PUT'
1275
1276 def test_headers(self):
1277 req = Request('http://example.com', headers={'tesT': 'test'})
1278 assert req.headers == HTTPHeaderDict({'test': 'test'})
1279 req.update(headers={'teSt2': 'test2'})
1280 assert req.headers == HTTPHeaderDict({'test': 'test', 'test2': 'test2'})
1281
1282 req.headers = new_headers = HTTPHeaderDict({'test': 'test'})
1283 assert req.headers == HTTPHeaderDict({'test': 'test'})
1284 assert req.headers is new_headers
1285
1286 # test converts dict to case insensitive dict
1287 req.headers = new_headers = {'test2': 'test2'}
1288 assert isinstance(req.headers, HTTPHeaderDict)
1289 assert req.headers is not new_headers
1290
1291 with pytest.raises(TypeError):
1292 req.headers = None
1293
1294 def test_data_type(self):
1295 req = Request('http://example.com')
1296 assert req.data is None
1297 # test bytes is allowed
1298 req.data = b'test'
1299 assert req.data == b'test'
1300 # test iterable of bytes is allowed
1301 i = [b'test', b'test2']
1302 req.data = i
1303 assert req.data == i
1304
1305 # test file-like object is allowed
1306 f = io.BytesIO(b'test')
1307 req.data = f
1308 assert req.data == f
1309
1310 # common mistake: test str not allowed
1311 with pytest.raises(TypeError):
1312 req.data = 'test'
1313 assert req.data != 'test'
1314
1315 # common mistake: test dict is not allowed
1316 with pytest.raises(TypeError):
1317 req.data = {'test': 'test'}
1318 assert req.data != {'test': 'test'}
1319
1320 def test_content_length_header(self):
1321 req = Request('http://example.com', headers={'Content-Length': '0'}, data=b'')
1322 assert req.headers.get('Content-Length') == '0'
1323
1324 req.data = b'test'
1325 assert 'Content-Length' not in req.headers
1326
1327 req = Request('http://example.com', headers={'Content-Length': '10'})
1328 assert 'Content-Length' not in req.headers
1329
1330 def test_content_type_header(self):
1331 req = Request('http://example.com', headers={'Content-Type': 'test'}, data=b'test')
1332 assert req.headers.get('Content-Type') == 'test'
1333 req.data = b'test2'
1334 assert req.headers.get('Content-Type') == 'test'
1335 req.data = None
1336 assert 'Content-Type' not in req.headers
1337 req.data = b'test3'
1338 assert req.headers.get('Content-Type') == 'application/x-www-form-urlencoded'
1339
71baa490 1340 def test_update_req(self):
1341 req = Request('http://example.com')
1342 assert req.data is None
1343 assert req.method == 'GET'
1344 assert 'Content-Type' not in req.headers
1345 # Test that zero-byte payloads will be sent
1346 req.update(data=b'')
1347 assert req.data == b''
1348 assert req.method == 'POST'
1349 assert req.headers.get('Content-Type') == 'application/x-www-form-urlencoded'
1350
227bf1a3 1351 def test_proxies(self):
1352 req = Request(url='http://example.com', proxies={'http': 'http://127.0.0.1:8080'})
1353 assert req.proxies == {'http': 'http://127.0.0.1:8080'}
1354
1355 def test_extensions(self):
1356 req = Request(url='http://example.com', extensions={'timeout': 2})
1357 assert req.extensions == {'timeout': 2}
1358
1359 def test_copy(self):
1360 req = Request(
1361 url='http://example.com',
1362 extensions={'cookiejar': CookieJar()},
1363 headers={'Accept-Encoding': 'br'},
1364 proxies={'http': 'http://127.0.0.1'},
1365 data=[b'123']
1366 )
1367 req_copy = req.copy()
1368 assert req_copy is not req
1369 assert req_copy.url == req.url
1370 assert req_copy.headers == req.headers
1371 assert req_copy.headers is not req.headers
1372 assert req_copy.proxies == req.proxies
1373 assert req_copy.proxies is not req.proxies
1374
1375 # Data is not able to be copied
1376 assert req_copy.data == req.data
1377 assert req_copy.data is req.data
1378
1379 # Shallow copy extensions
1380 assert req_copy.extensions is not req.extensions
1381 assert req_copy.extensions['cookiejar'] == req.extensions['cookiejar']
1382
1383 # Subclasses are copied by default
1384 class AnotherRequest(Request):
1385 pass
08916a49 1386
227bf1a3 1387 req = AnotherRequest(url='http://127.0.0.1')
1388 assert isinstance(req.copy(), AnotherRequest)
1389
1390 def test_url(self):
1391 req = Request(url='https://фtest.example.com/ some spaceв?ä=c',)
1392 assert req.url == 'https://xn--test-z6d.example.com/%20some%20space%D0%B2?%C3%A4=c'
1393
1394 assert Request(url='//example.com').url == 'http://example.com'
1395
1396 with pytest.raises(TypeError):
1397 Request(url='https://').url = None
1398
1399
1400class TestResponse:
1401
1402 @pytest.mark.parametrize('reason,status,expected', [
1403 ('custom', 200, 'custom'),
1404 (None, 404, 'Not Found'), # fallback status
1405 ('', 403, 'Forbidden'),
1406 (None, 999, None)
1407 ])
1408 def test_reason(self, reason, status, expected):
1409 res = Response(io.BytesIO(b''), url='test://', headers={}, status=status, reason=reason)
1410 assert res.reason == expected
1411
1412 def test_headers(self):
1413 headers = Message()
1414 headers.add_header('Test', 'test')
1415 headers.add_header('Test', 'test2')
1416 headers.add_header('content-encoding', 'br')
1417 res = Response(io.BytesIO(b''), headers=headers, url='test://')
1418 assert res.headers.get_all('test') == ['test', 'test2']
1419 assert 'Content-Encoding' in res.headers
1420
1421 def test_get_header(self):
1422 headers = Message()
1423 headers.add_header('Set-Cookie', 'cookie1')
1424 headers.add_header('Set-cookie', 'cookie2')
1425 headers.add_header('Test', 'test')
1426 headers.add_header('Test', 'test2')
1427 res = Response(io.BytesIO(b''), headers=headers, url='test://')
1428 assert res.get_header('test') == 'test, test2'
1429 assert res.get_header('set-Cookie') == 'cookie1'
1430 assert res.get_header('notexist', 'default') == 'default'
1431
1432 def test_compat(self):
1433 res = Response(io.BytesIO(b''), url='test://', status=404, headers={'test': 'test'})
3d2623a8 1434 with warnings.catch_warnings():
1435 warnings.simplefilter('ignore', category=DeprecationWarning)
1436 assert res.code == res.getcode() == res.status
1437 assert res.geturl() == res.url
1438 assert res.info() is res.headers
1439 assert res.getheader('test') == res.get_header('test')