X-Git-Url: https://jfr.im/git/yt-dlp.git/blobdiff_plain/aa7785f860be0bae7135ee32fe0ef4f0ab00bbc1..54a63e80af82791d2f0985bd0176bb182963fd5f:/test/test_utils.py diff --git a/test/test_utils.py b/test/test_utils.py index d84c3d3ee..251739686 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1,81 +1,109 @@ #!/usr/bin/env python3 -# coding: utf-8 - -from __future__ import unicode_literals # Allow direct execution import os import sys import unittest +import warnings +import datetime as dt + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -# Various small unit tests +import contextlib import io import itertools import json +import subprocess import xml.etree.ElementTree +from yt_dlp.compat import ( + compat_etree_fromstring, + compat_HTMLParseError, + compat_os_name, +) from yt_dlp.utils import ( + Config, + DateRange, + ExtractorError, + InAdvancePagedList, + LazyList, + NO_DEFAULT, + OnDemandPagedList, + Popen, age_restricted, args_to_str, - encode_base_n, + base_url, caesar, clean_html, clean_podcast_url, + cli_bool_option, + cli_option, + cli_valueless_option, date_from_str, datetime_from_str, - DateRange, detect_exe_version, determine_ext, - dict_get, + determine_file_encoding, + dfxp2srt, + encode_base_n, encode_compat_str, encodeFilename, - escape_rfc3986, - escape_url, + expand_path, extract_attributes, - ExtractorError, + extract_basic_auth, find_xpath_attr, fix_xml_ampersands, float_or_none, - get_element_by_class, + format_bytes, + get_compatible_ext, get_element_by_attribute, - get_elements_by_class, + get_element_by_class, + get_element_html_by_attribute, + get_element_html_by_class, + get_element_text_and_html_by_tag, get_elements_by_attribute, - InAdvancePagedList, + get_elements_by_class, + get_elements_html_by_attribute, + get_elements_html_by_class, + get_elements_text_and_html_by_attribute, int_or_none, intlist_to_bytes, + iri_to_uri, is_html, js_to_json, limit_length, + locked_file, + lowercase_escape, + match_str, merge_dicts, mimetype2ext, month_by_name, multipart_encode, ohdave_rsa_encrypt, - OnDemandPagedList, orderedSet, parse_age_limit, + parse_bitrate, + parse_codecs, + parse_count, + parse_dfxp_time_expr, parse_duration, parse_filesize, - parse_count, parse_iso8601, - parse_resolution, - parse_bitrate, parse_qs, + parse_resolution, pkcs1pad, - read_batch_urls, - sanitize_filename, - sanitize_path, - sanitize_url, - sanitized_Request, - expand_path, prepend_extension, - replace_extension, - remove_start, + read_batch_urls, remove_end, remove_quotes, + remove_start, + render_table, + replace_extension, rot47, + sanitize_filename, + sanitize_path, + sanitize_url, shell_quote, smuggle_url, str_to_int, @@ -83,41 +111,30 @@ strip_or_none, subtitles_filename, timeconvert, + try_call, unescapeHTML, unified_strdate, unified_timestamp, unsmuggle_url, + update_url_query, uppercase_escape, - lowercase_escape, url_basename, url_or_none, - base_url, - urljoin, urlencode_postdata, + urljoin, urshift, - update_url_query, + variadic, version_tuple, - xpath_with_ns, + xpath_attr, xpath_element, xpath_text, - xpath_attr, - render_table, - match_str, - parse_dfxp_time_expr, - dfxp2srt, - cli_option, - cli_valueless_option, - cli_bool_option, - parse_codecs, - iri_to_uri, - LazyList, + xpath_with_ns, ) -from yt_dlp.compat import ( - compat_chr, - compat_etree_fromstring, - compat_getenv, - compat_os_name, - compat_setenv, +from yt_dlp.utils.networking import ( + HTTPHeaderDict, + escape_rfc3986, + normalize_url, + remove_dot_segments, ) @@ -133,13 +150,13 @@ def test_sanitize_filename(self): self.assertEqual(sanitize_filename('123'), '123') - self.assertEqual('abc_de', sanitize_filename('abc/de')) + self.assertEqual('abc⧸de', sanitize_filename('abc/de')) self.assertFalse('/' in sanitize_filename('abc/de///')) - self.assertEqual('abc_de', sanitize_filename('abc/<>\\*|de')) - self.assertEqual('xxx', sanitize_filename('xxx/<>\\*|')) - self.assertEqual('yes no', sanitize_filename('yes? no')) - self.assertEqual('this - that', sanitize_filename('this: that')) + self.assertEqual('abc_de', sanitize_filename('abc/<>\\*|de', is_id=False)) + self.assertEqual('xxx', sanitize_filename('xxx/<>\\*|', is_id=False)) + self.assertEqual('yes no', sanitize_filename('yes? no', is_id=False)) + self.assertEqual('this - that', sanitize_filename('this: that', is_id=False)) self.assertEqual(sanitize_filename('AT&T'), 'AT&T') aumlaut = 'ä' @@ -151,10 +168,12 @@ def test_sanitize_filename(self): sanitize_filename('New World record at 0:12:34'), 'New World record at 0_12_34') - self.assertEqual(sanitize_filename('--gasdgf'), '_-gasdgf') + self.assertEqual(sanitize_filename('--gasdgf'), '--gasdgf') self.assertEqual(sanitize_filename('--gasdgf', is_id=True), '--gasdgf') - self.assertEqual(sanitize_filename('.gasdgf'), 'gasdgf') + self.assertEqual(sanitize_filename('--gasdgf', is_id=False), '_-gasdgf') + self.assertEqual(sanitize_filename('.gasdgf'), '.gasdgf') self.assertEqual(sanitize_filename('.gasdgf', is_id=True), '.gasdgf') + self.assertEqual(sanitize_filename('.gasdgf', is_id=False), 'gasdgf') forbidden = '"\0\\/' for fc in forbidden: @@ -243,26 +262,24 @@ def test_sanitize_url(self): self.assertEqual(sanitize_url('https://foo.bar'), 'https://foo.bar') self.assertEqual(sanitize_url('foo bar'), 'foo bar') - def test_extract_basic_auth(self): - auth_header = lambda url: sanitized_Request(url).get_header('Authorization') - self.assertFalse(auth_header('http://foo.bar')) - self.assertFalse(auth_header('http://:foo.bar')) - self.assertEqual(auth_header('http://@foo.bar'), 'Basic Og==') - self.assertEqual(auth_header('http://:pass@foo.bar'), 'Basic OnBhc3M=') - self.assertEqual(auth_header('http://user:@foo.bar'), 'Basic dXNlcjo=') - self.assertEqual(auth_header('http://user:pass@foo.bar'), 'Basic dXNlcjpwYXNz') - def test_expand_path(self): def env(var): - return '%{0}%'.format(var) if sys.platform == 'win32' else '${0}'.format(var) + return f'%{var}%' if sys.platform == 'win32' else f'${var}' - compat_setenv('yt_dlp_EXPATH_PATH', 'expanded') + os.environ['yt_dlp_EXPATH_PATH'] = 'expanded' self.assertEqual(expand_path(env('yt_dlp_EXPATH_PATH')), 'expanded') - self.assertEqual(expand_path(env('HOME')), compat_getenv('HOME')) - self.assertEqual(expand_path('~'), compat_getenv('HOME')) - self.assertEqual( - expand_path('~/%s' % env('yt_dlp_EXPATH_PATH')), - '%s/expanded' % compat_getenv('HOME')) + + old_home = os.environ.get('HOME') + test_str = R'C:\Documents and Settings\тест\Application Data' + try: + os.environ['HOME'] = test_str + self.assertEqual(expand_path(env('HOME')), os.getenv('HOME')) + self.assertEqual(expand_path('~'), os.getenv('HOME')) + self.assertEqual( + expand_path('~/{}'.format(env('yt_dlp_EXPATH_PATH'))), + '{}/expanded'.format(os.getenv('HOME'))) + finally: + os.environ['HOME'] = old_home or '' def test_prepend_extension(self): self.assertEqual(prepend_extension('abc.ext', 'temp'), 'abc.temp.ext') @@ -339,12 +356,12 @@ def test_datetime_from_str(self): self.assertEqual(datetime_from_str('now+23hours', precision='hour'), datetime_from_str('now+23hours', precision='auto')) def test_daterange(self): - _20century = DateRange("19000101", "20000101") - self.assertFalse("17890714" in _20century) - _ac = DateRange("00010101") - self.assertTrue("19690721" in _ac) - _firstmilenium = DateRange(end="10000101") - self.assertTrue("07110427" in _firstmilenium) + _20century = DateRange('19000101', '20000101') + self.assertFalse('17890714' in _20century) + _ac = DateRange('00010101') + self.assertTrue('19690721' in _ac) + _firstmilenium = DateRange(end='10000101') + self.assertTrue('07110427' in _firstmilenium) def test_unified_dates(self): self.assertEqual(unified_strdate('December 21, 2010'), '20101221') @@ -353,6 +370,7 @@ def test_unified_dates(self): self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011') self.assertEqual(unified_strdate('1968 12 10'), '19681210') self.assertEqual(unified_strdate('1968-12-10'), '19681210') + self.assertEqual(unified_strdate('31-07-2022 20:00'), '20220731') self.assertEqual(unified_strdate('28/01/2014 21:00:00 +0100'), '20140128') self.assertEqual( unified_strdate('11/26/2014 11:30:00 AM PST', day_first=False), @@ -396,6 +414,10 @@ def test_unified_timestamps(self): self.assertEqual(unified_timestamp('December 15, 2017 at 7:49 am'), 1513324140) self.assertEqual(unified_timestamp('2018-03-14T08:32:43.1493874+00:00'), 1521016363) + self.assertEqual(unified_timestamp('December 31 1969 20:00:01 EDT'), 1) + self.assertEqual(unified_timestamp('Wednesday 31 December 1969 18:01:26 MDT'), 86) + self.assertEqual(unified_timestamp('12/31/1969 20:01:18 EDT', False), 78) + def test_determine_ext(self): self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4') self.assertEqual(determine_ext('http://example.com/foo/bar/?download', None), None) @@ -484,7 +506,7 @@ def test_xpath_attr(self): self.assertRaises(ExtractorError, xpath_attr, doc, 'div/p', 'y', fatal=True) def test_smuggle_url(self): - data = {"ö": "ö", "abc": [3]} + data = {'ö': 'ö', 'abc': [3]} url = 'https://foo.bar/baz?x=y#a' smug_url = smuggle_url(url, data) unsmug_url, unsmug_data = unsmuggle_url(smug_url) @@ -526,9 +548,6 @@ def test_str_to_int(self): self.assertEqual(str_to_int('123,456'), 123456) self.assertEqual(str_to_int('123.456'), 123456) self.assertEqual(str_to_int(523), 523) - # Python 3 has no long - if sys.version_info < (3, 0): - eval('self.assertEqual(str_to_int(123456L), 123456)') self.assertEqual(str_to_int('noninteger'), None) self.assertEqual(str_to_int([]), None) @@ -548,6 +567,7 @@ def test_base_url(self): self.assertEqual(base_url('http://foo.de/bar/'), 'http://foo.de/bar/') self.assertEqual(base_url('http://foo.de/bar/baz'), 'http://foo.de/bar/') self.assertEqual(base_url('http://foo.de/bar/baz?x=z/x/c'), 'http://foo.de/bar/') + self.assertEqual(base_url('http://foo.de/bar/baz&x=z&w=y/x/c'), 'http://foo.de/bar/baz&x=z&w=y/x/') def test_urljoin(self): self.assertEqual(urljoin('http://foo.de/', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt') @@ -616,6 +636,8 @@ def test_parse_duration(self): self.assertEqual(parse_duration('3h 11m 53s'), 11513) self.assertEqual(parse_duration('3 hours 11 minutes 53 seconds'), 11513) self.assertEqual(parse_duration('3 hours 11 mins 53 secs'), 11513) + self.assertEqual(parse_duration('3 hours, 11 minutes, 53 seconds'), 11513) + self.assertEqual(parse_duration('3 hours, 11 mins, 53 secs'), 11513) self.assertEqual(parse_duration('62m45s'), 3765) self.assertEqual(parse_duration('6m59s'), 419) self.assertEqual(parse_duration('49s'), 49) @@ -634,6 +656,10 @@ def test_parse_duration(self): self.assertEqual(parse_duration('PT1H0.040S'), 3600.04) self.assertEqual(parse_duration('PT00H03M30SZ'), 210) self.assertEqual(parse_duration('P0Y0M0DT0H4M20.880S'), 260.88) + self.assertEqual(parse_duration('01:02:03:050'), 3723.05) + self.assertEqual(parse_duration('103:050'), 103.05) + self.assertEqual(parse_duration('1HR 3MIN'), 3780) + self.assertEqual(parse_duration('2hrs 3mins'), 7380) def test_fix_xml_ampersands(self): self.assertEqual( @@ -653,8 +679,7 @@ def testPL(size, pagesize, sliceargs, expected): def get_page(pagenum): firstid = pagenum * pagesize upto = min(size, pagenum * pagesize + pagesize) - for i in range(firstid, upto): - yield i + yield from range(firstid, upto) pl = OnDemandPagedList(get_page, pagesize) got = pl.getslice(*sliceargs) @@ -723,33 +748,11 @@ def test_multipart_encode(self): multipart_encode({b'field': b'value'}, boundary='AAAAAA')[0], b'--AAAAAA\r\nContent-Disposition: form-data; name="field"\r\n\r\nvalue\r\n--AAAAAA--\r\n') self.assertEqual( - multipart_encode({'欄位'.encode('utf-8'): '值'.encode('utf-8')}, boundary='AAAAAA')[0], + multipart_encode({'欄位'.encode(): '值'.encode()}, boundary='AAAAAA')[0], b'--AAAAAA\r\nContent-Disposition: form-data; name="\xe6\xac\x84\xe4\xbd\x8d"\r\n\r\n\xe5\x80\xbc\r\n--AAAAAA--\r\n') self.assertRaises( ValueError, multipart_encode, {b'field': b'value'}, boundary='value') - def test_dict_get(self): - FALSE_VALUES = { - 'none': None, - 'false': False, - 'zero': 0, - 'empty_string': '', - 'empty_list': [], - } - d = FALSE_VALUES.copy() - d['a'] = 42 - self.assertEqual(dict_get(d, 'a'), 42) - self.assertEqual(dict_get(d, 'b'), None) - self.assertEqual(dict_get(d, 'b', 42), 42) - self.assertEqual(dict_get(d, ('a', )), 42) - self.assertEqual(dict_get(d, ('b', 'a', )), 42) - self.assertEqual(dict_get(d, ('b', 'c', 'a', 'd', )), 42) - self.assertEqual(dict_get(d, ('b', 'c', )), None) - self.assertEqual(dict_get(d, ('b', 'c', ), 42), 42) - for key, false_value in FALSE_VALUES.items(): - self.assertEqual(dict_get(d, ('b', 'c', key, )), None) - self.assertEqual(dict_get(d, ('b', 'c', key, ), skip_false_values=False), false_value) - def test_merge_dicts(self): self.assertEqual(merge_dicts({'a': 1}, {'b': 2}), {'a': 1, 'b': 2}) self.assertEqual(merge_dicts({'a': 1}, {'a': 2}), {'a': 1}) @@ -767,6 +770,11 @@ def test_encode_compat_str(self): def test_parse_iso8601(self): self.assertEqual(parse_iso8601('2014-03-23T23:04:26+0100'), 1395612266) + self.assertEqual(parse_iso8601('2014-03-23T23:04:26-07:00'), 1395641066) + self.assertEqual(parse_iso8601('2014-03-23T23:04:26', timezone=dt.timedelta(hours=-7)), 1395641066) + self.assertEqual(parse_iso8601('2014-03-23T23:04:26', timezone=NO_DEFAULT), None) + # default does not override timezone in date_str + self.assertEqual(parse_iso8601('2014-03-23T23:04:26-07:00', timezone=dt.timedelta(hours=-10)), 1395641066) self.assertEqual(parse_iso8601('2014-03-23T22:04:26+0000'), 1395612266) self.assertEqual(parse_iso8601('2014-03-23T22:04:26Z'), 1395612266) self.assertEqual(parse_iso8601('2014-03-23T22:04:26.1234Z'), 1395612266) @@ -776,7 +784,7 @@ def test_parse_iso8601(self): def test_strip_jsonp(self): stripped = strip_jsonp('cb ([ {"id":"532cb",\n\n\n"x":\n3}\n]\n);') d = json.loads(stripped) - self.assertEqual(d, [{"id": "532cb", "x": 3}]) + self.assertEqual(d, [{'id': '532cb', 'x': 3}]) stripped = strip_jsonp('parseMetadata({"STATUS":"OK"})\n\n\n//epc') d = json.loads(stripped) @@ -881,7 +889,7 @@ def test_parse_codecs(self): 'dynamic_range': 'HDR10', }) self.assertEqual(parse_codecs('av01.0.12M.10.0.110.09.16.09.0'), { - 'vcodec': 'av01.0.12M.10', + 'vcodec': 'av01.0.12M.10.0.110.09.16.09.0', 'acodec': 'none', 'dynamic_range': 'HDR10', }) @@ -911,24 +919,124 @@ def test_escape_rfc3986(self): self.assertEqual(escape_rfc3986('foo bar'), 'foo%20bar') self.assertEqual(escape_rfc3986('foo%20bar'), 'foo%20bar') - def test_escape_url(self): + def test_normalize_url(self): self.assertEqual( - escape_url('http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavré_FD.mp4'), - 'http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavre%CC%81_FD.mp4' + normalize_url('http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavré_FD.mp4'), + 'http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavre%CC%81_FD.mp4', ) self.assertEqual( - escape_url('http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erklärt/Das-Erste/Video?documentId=22673108&bcastId=5290'), - 'http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erkl%C3%A4rt/Das-Erste/Video?documentId=22673108&bcastId=5290' + normalize_url('http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erklärt/Das-Erste/Video?documentId=22673108&bcastId=5290'), + 'http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erkl%C3%A4rt/Das-Erste/Video?documentId=22673108&bcastId=5290', ) self.assertEqual( - escape_url('http://тест.рф/фрагмент'), - 'http://xn--e1aybc.xn--p1ai/%D1%84%D1%80%D0%B0%D0%B3%D0%BC%D0%B5%D0%BD%D1%82' + normalize_url('http://тест.рф/фрагмент'), + 'http://xn--e1aybc.xn--p1ai/%D1%84%D1%80%D0%B0%D0%B3%D0%BC%D0%B5%D0%BD%D1%82', ) self.assertEqual( - escape_url('http://тест.рф/абв?абв=абв#абв'), - 'http://xn--e1aybc.xn--p1ai/%D0%B0%D0%B1%D0%B2?%D0%B0%D0%B1%D0%B2=%D0%B0%D0%B1%D0%B2#%D0%B0%D0%B1%D0%B2' + normalize_url('http://тест.рф/абв?абв=абв#абв'), + 'http://xn--e1aybc.xn--p1ai/%D0%B0%D0%B1%D0%B2?%D0%B0%D0%B1%D0%B2=%D0%B0%D0%B1%D0%B2#%D0%B0%D0%B1%D0%B2', + ) + self.assertEqual(normalize_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0') + + self.assertEqual(normalize_url('http://www.example.com/../a/b/../c/./d.html'), 'http://www.example.com/a/c/d.html') + + def test_remove_dot_segments(self): + self.assertEqual(remove_dot_segments('/a/b/c/./../../g'), '/a/g') + self.assertEqual(remove_dot_segments('mid/content=5/../6'), 'mid/6') + self.assertEqual(remove_dot_segments('/ad/../cd'), '/cd') + self.assertEqual(remove_dot_segments('/ad/../cd/'), '/cd/') + self.assertEqual(remove_dot_segments('/..'), '/') + self.assertEqual(remove_dot_segments('/./'), '/') + self.assertEqual(remove_dot_segments('/./a'), '/a') + self.assertEqual(remove_dot_segments('/abc/./.././d/././e/.././f/./../../ghi'), '/ghi') + self.assertEqual(remove_dot_segments('/'), '/') + self.assertEqual(remove_dot_segments('/t'), '/t') + self.assertEqual(remove_dot_segments('t'), 't') + self.assertEqual(remove_dot_segments(''), '') + self.assertEqual(remove_dot_segments('/../a/b/c'), '/a/b/c') + self.assertEqual(remove_dot_segments('../a'), 'a') + self.assertEqual(remove_dot_segments('./a'), 'a') + self.assertEqual(remove_dot_segments('.'), '') + self.assertEqual(remove_dot_segments('////'), '////') + + def test_js_to_json_vars_strings(self): + self.assertDictEqual( + json.loads(js_to_json( + '''{ + 'null': a, + 'nullStr': b, + 'true': c, + 'trueStr': d, + 'false': e, + 'falseStr': f, + 'unresolvedVar': g, + }''', + { + 'a': 'null', + 'b': '"null"', + 'c': 'true', + 'd': '"true"', + 'e': 'false', + 'f': '"false"', + 'g': 'var', + }, + )), + { + 'null': None, + 'nullStr': 'null', + 'true': True, + 'trueStr': 'true', + 'false': False, + 'falseStr': 'false', + 'unresolvedVar': 'var', + }, + ) + + self.assertDictEqual( + json.loads(js_to_json( + '''{ + 'int': a, + 'intStr': b, + 'float': c, + 'floatStr': d, + }''', + { + 'a': '123', + 'b': '"123"', + 'c': '1.23', + 'd': '"1.23"', + }, + )), + { + 'int': 123, + 'intStr': '123', + 'float': 1.23, + 'floatStr': '1.23', + }, + ) + + self.assertDictEqual( + json.loads(js_to_json( + '''{ + 'object': a, + 'objectStr': b, + 'array': c, + 'arrayStr': d, + }''', + { + 'a': '{}', + 'b': '"{}"', + 'c': '[]', + 'd': '"[]"', + }, + )), + { + 'object': {}, + 'objectStr': '{}', + 'array': [], + 'arrayStr': '[]', + }, ) - self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0') def test_js_to_json_realworld(self): inp = '''{ @@ -973,7 +1081,7 @@ def test_js_to_json_realworld(self): def test_js_to_json_edgecases(self): on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}") - self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"}) + self.assertEqual(json.loads(on), {'abc_def': "1'\\2\\'3\"4"}) on = js_to_json('{"abc": true}') self.assertEqual(json.loads(on), {'abc': True}) @@ -1005,9 +1113,9 @@ def test_js_to_json_edgecases(self): 'c': 0, 'd': 42.42, 'e': [], - 'f': "abc", - 'g': "", - '42': 42 + 'f': 'abc', + 'g': '', + '42': 42, }) on = js_to_json('["abc", "def",]') @@ -1076,10 +1184,34 @@ def test_js_to_json_edgecases(self): on = js_to_json('[1,//{},\n2]') self.assertEqual(json.loads(on), [1, 2]) + on = js_to_json(R'"\^\$\#"') + self.assertEqual(json.loads(on), R'^$#', msg='Unnecessary escapes should be stripped') + + on = js_to_json('\'"\\""\'') + self.assertEqual(json.loads(on), '"""', msg='Unnecessary quote escape should be escaped') + + on = js_to_json('[new Date("spam"), \'("eggs")\']') + self.assertEqual(json.loads(on), ['spam', '("eggs")'], msg='Date regex should match a single string') + def test_js_to_json_malformed(self): self.assertEqual(js_to_json('42a1'), '42"a1"') self.assertEqual(js_to_json('42a-1'), '42"a"-1') + def test_js_to_json_template_literal(self): + self.assertEqual(js_to_json('`Hello ${name}`', {'name': '"world"'}), '"Hello world"') + self.assertEqual(js_to_json('`${name}${name}`', {'name': '"X"'}), '"XX"') + self.assertEqual(js_to_json('`${name}${name}`', {'name': '5'}), '"55"') + self.assertEqual(js_to_json('`${name}"${name}"`', {'name': '5'}), '"5\\"5\\""') + self.assertEqual(js_to_json('`${name}`', {}), '"name"') + + def test_js_to_json_common_constructors(self): + self.assertEqual(json.loads(js_to_json('new Map([["a", 5]])')), {'a': 5}) + self.assertEqual(json.loads(js_to_json('Array(5, 10)')), [5, 10]) + self.assertEqual(json.loads(js_to_json('new Array(15,5)')), [15, 5]) + self.assertEqual(json.loads(js_to_json('new Map([Array(5, 10),new Array(15,5)])')), {'5': 10, '15': 5}) + self.assertEqual(json.loads(js_to_json('new Date("123")')), '123') + self.assertEqual(json.loads(js_to_json('new Date(\'2023-10-19\')')), '2023-10-19') + def test_extract_attributes(self): self.assertEqual(extract_attributes(''), {'x': 'y'}) self.assertEqual(extract_attributes(""), {'x': 'y'}) @@ -1111,7 +1243,7 @@ def test_extract_attributes(self): self.assertEqual(extract_attributes(''), {'x': 'décompose\u0301'}) # "Narrow" Python builds don't support unicode code points outside BMP. try: - compat_chr(0x10000) + chr(0x10000) supports_outside_bmp = True except ValueError: supports_outside_bmp = False @@ -1122,7 +1254,7 @@ def test_extract_attributes(self): def test_clean_html(self): self.assertEqual(clean_html('a:\nb'), 'a: b') - self.assertEqual(clean_html('a:\n "b"'), 'a: "b"') + self.assertEqual(clean_html('a:\n "b"'), 'a: "b"') self.assertEqual(clean_html('a
\xa0b'), 'a\nb') def test_intlist_to_bytes(self): @@ -1133,7 +1265,7 @@ def test_intlist_to_bytes(self): def test_args_to_str(self): self.assertEqual( args_to_str(['foo', 'ba/r', '-baz', '2 be', '']), - 'foo ba/r -baz \'2 be\' \'\'' if compat_os_name != 'nt' else 'foo ba/r -baz "2 be" ""' + 'foo ba/r -baz \'2 be\' \'\'' if compat_os_name != 'nt' else 'foo ba/r -baz "2 be" ""', ) def test_parse_filesize(self): @@ -1156,19 +1288,29 @@ def test_parse_count(self): self.assertEqual(parse_count('1000'), 1000) self.assertEqual(parse_count('1.000'), 1000) self.assertEqual(parse_count('1.1k'), 1100) + self.assertEqual(parse_count('1.1 k'), 1100) + self.assertEqual(parse_count('1,1 k'), 1100) self.assertEqual(parse_count('1.1kk'), 1100000) self.assertEqual(parse_count('1.1kk '), 1100000) + self.assertEqual(parse_count('1,1kk'), 1100000) + self.assertEqual(parse_count('100 views'), 100) + self.assertEqual(parse_count('1,100 views'), 1100) self.assertEqual(parse_count('1.1kk views'), 1100000) + self.assertEqual(parse_count('10M views'), 10000000) + self.assertEqual(parse_count('has 10M views'), 10000000) def test_parse_resolution(self): self.assertEqual(parse_resolution(None), {}) self.assertEqual(parse_resolution(''), {}) - self.assertEqual(parse_resolution('1920x1080'), {'width': 1920, 'height': 1080}) - self.assertEqual(parse_resolution('1920×1080'), {'width': 1920, 'height': 1080}) + self.assertEqual(parse_resolution(' 1920x1080'), {'width': 1920, 'height': 1080}) + self.assertEqual(parse_resolution('1920×1080 '), {'width': 1920, 'height': 1080}) self.assertEqual(parse_resolution('1920 x 1080'), {'width': 1920, 'height': 1080}) self.assertEqual(parse_resolution('720p'), {'height': 720}) self.assertEqual(parse_resolution('4k'), {'height': 2160}) self.assertEqual(parse_resolution('8K'), {'height': 4320}) + self.assertEqual(parse_resolution('pre_1920x1080_post'), {'width': 1920, 'height': 1080}) + self.assertEqual(parse_resolution('ep1x2'), {}) + self.assertEqual(parse_resolution('1920, 1080'), {'width': 1920, 'height': 1080}) def test_parse_bitrate(self): self.assertEqual(parse_bitrate(None), None) @@ -1206,10 +1348,10 @@ def test_is_html(self): self.assertTrue(is_html( # UTF-8 with BOM b'\xef\xbb\xbf\xaaa')) self.assertTrue(is_html( # UTF-16-LE - b'\xff\xfe<\x00h\x00t\x00m\x00l\x00>\x00\xe4\x00' + b'\xff\xfe<\x00h\x00t\x00m\x00l\x00>\x00\xe4\x00', )) self.assertTrue(is_html( # UTF-16-BE - b'\xfe\xff\x00<\x00h\x00t\x00m\x00l\x00>\x00\xe4' + b'\xfe\xff\x00<\x00h\x00t\x00m\x00l\x00>\x00\xe4', )) self.assertTrue(is_html( # UTF-32-BE b'\x00\x00\xFE\xFF\x00\x00\x00<\x00\x00\x00h\x00\x00\x00t\x00\x00\x00m\x00\x00\x00l\x00\x00\x00>\x00\x00\x00\xe4')) @@ -1217,14 +1359,51 @@ def test_is_html(self): b'\xFF\xFE\x00\x00<\x00\x00\x00h\x00\x00\x00t\x00\x00\x00m\x00\x00\x00l\x00\x00\x00>\x00\x00\x00\xe4\x00\x00\x00')) def test_render_table(self): + self.assertEqual( + render_table( + ['a', 'empty', 'bcd'], + [[123, '', 4], [9999, '', 51]]), + 'a empty bcd\n' + '123 4\n' + '9999 51') + + self.assertEqual( + render_table( + ['a', 'empty', 'bcd'], + [[123, '', 4], [9999, '', 51]], + hide_empty=True), + 'a bcd\n' + '123 4\n' + '9999 51') + + self.assertEqual( + render_table( + ['\ta', 'bcd'], + [['1\t23', 4], ['\t9999', 51]]), + ' a bcd\n' + '1 23 4\n' + '9999 51') + self.assertEqual( render_table( ['a', 'bcd'], - [[123, 4], [9999, 51]]), + [[123, 4], [9999, 51]], + delim='-'), 'a bcd\n' + '--------\n' '123 4\n' '9999 51') + self.assertEqual( + render_table( + ['a', 'bcd'], + [[123, 4], [9999, 51]], + delim='-', extra_gap=2), + 'a bcd\n' + '----------\n' + '123 4\n' + '9999 51') + def test_match_str(self): # Unary self.assertFalse(match_str('xy', {'x': 1200})) @@ -1337,7 +1516,7 @@ def test_dfxp2srt(self):

Ignored, three

- '''.encode('utf-8') + '''.encode() srt_data = '''1 00:00:00,000 --> 00:00:01,000 The following line contains Chinese characters and special symbols @@ -1355,14 +1534,14 @@ def test_dfxp2srt(self): ''' self.assertEqual(dfxp2srt(dfxp_data), srt_data) - dfxp_data_no_default_namespace = ''' + dfxp_data_no_default_namespace = b'''

The first line

-
'''.encode('utf-8') + ''' srt_data = '''1 00:00:00,000 --> 00:00:01,000 The first line @@ -1370,7 +1549,7 @@ def test_dfxp2srt(self): ''' self.assertEqual(dfxp2srt(dfxp_data_no_default_namespace), srt_data) - dfxp_data_with_style = ''' + dfxp_data_with_style = b''' @@ -1388,7 +1567,7 @@ def test_dfxp2srt(self):

inner
style

-
'''.encode('utf-8') +''' srt_data = '''1 00:00:02,080 --> 00:00:05,840 default stylecustom style @@ -1526,46 +1705,119 @@ def test_urshift(self): self.assertEqual(urshift(3, 1), 1) self.assertEqual(urshift(-3, 1), 2147483646) + GET_ELEMENT_BY_CLASS_TEST_STRING = ''' + nice + ''' + def test_get_element_by_class(self): - html = ''' - nice - ''' + html = self.GET_ELEMENT_BY_CLASS_TEST_STRING self.assertEqual(get_element_by_class('foo', html), 'nice') self.assertEqual(get_element_by_class('no-such-class', html), None) + def test_get_element_html_by_class(self): + html = self.GET_ELEMENT_BY_CLASS_TEST_STRING + + self.assertEqual(get_element_html_by_class('foo', html), html.strip()) + self.assertEqual(get_element_by_class('no-such-class', html), None) + + GET_ELEMENT_BY_ATTRIBUTE_TEST_STRING = ''' + + ''' + def test_get_element_by_attribute(self): - html = ''' - nice - ''' + html = self.GET_ELEMENT_BY_CLASS_TEST_STRING self.assertEqual(get_element_by_attribute('class', 'foo bar', html), 'nice') self.assertEqual(get_element_by_attribute('class', 'foo', html), None) self.assertEqual(get_element_by_attribute('class', 'no-such-foo', html), None) - html = ''' - - ''' + html = self.GET_ELEMENT_BY_ATTRIBUTE_TEST_STRING self.assertEqual(get_element_by_attribute('itemprop', 'author', html), 'foo') + def test_get_element_html_by_attribute(self): + html = self.GET_ELEMENT_BY_CLASS_TEST_STRING + + self.assertEqual(get_element_html_by_attribute('class', 'foo bar', html), html.strip()) + self.assertEqual(get_element_html_by_attribute('class', 'foo', html), None) + self.assertEqual(get_element_html_by_attribute('class', 'no-such-foo', html), None) + + html = self.GET_ELEMENT_BY_ATTRIBUTE_TEST_STRING + + self.assertEqual(get_element_html_by_attribute('itemprop', 'author', html), html.strip()) + + GET_ELEMENTS_BY_CLASS_TEST_STRING = ''' + nicealso nice + ''' + GET_ELEMENTS_BY_CLASS_RES = ['nice', 'also nice'] + def test_get_elements_by_class(self): - html = ''' - nicealso nice - ''' + html = self.GET_ELEMENTS_BY_CLASS_TEST_STRING self.assertEqual(get_elements_by_class('foo', html), ['nice', 'also nice']) self.assertEqual(get_elements_by_class('no-such-class', html), []) + def test_get_elements_html_by_class(self): + html = self.GET_ELEMENTS_BY_CLASS_TEST_STRING + + self.assertEqual(get_elements_html_by_class('foo', html), self.GET_ELEMENTS_BY_CLASS_RES) + self.assertEqual(get_elements_html_by_class('no-such-class', html), []) + def test_get_elements_by_attribute(self): - html = ''' - nicealso nice - ''' + html = self.GET_ELEMENTS_BY_CLASS_TEST_STRING self.assertEqual(get_elements_by_attribute('class', 'foo bar', html), ['nice', 'also nice']) self.assertEqual(get_elements_by_attribute('class', 'foo', html), []) self.assertEqual(get_elements_by_attribute('class', 'no-such-foo', html), []) + def test_get_elements_html_by_attribute(self): + html = self.GET_ELEMENTS_BY_CLASS_TEST_STRING + + self.assertEqual(get_elements_html_by_attribute('class', 'foo bar', html), self.GET_ELEMENTS_BY_CLASS_RES) + self.assertEqual(get_elements_html_by_attribute('class', 'foo', html), []) + self.assertEqual(get_elements_html_by_attribute('class', 'no-such-foo', html), []) + + def test_get_elements_text_and_html_by_attribute(self): + html = self.GET_ELEMENTS_BY_CLASS_TEST_STRING + + self.assertEqual( + list(get_elements_text_and_html_by_attribute('class', 'foo bar', html)), + list(zip(['nice', 'also nice'], self.GET_ELEMENTS_BY_CLASS_RES))) + self.assertEqual(list(get_elements_text_and_html_by_attribute('class', 'foo', html)), []) + self.assertEqual(list(get_elements_text_and_html_by_attribute('class', 'no-such-foo', html)), []) + + self.assertEqual(list(get_elements_text_and_html_by_attribute( + 'class', 'foo', 'nicenice', tag='a')), [('nice', 'nice')]) + + GET_ELEMENT_BY_TAG_TEST_STRING = ''' + random text lorem ipsum

+
+ this should be returned + this should also be returned +
+ this should also be returned +
+ closing tag above should not trick, so this should also be returned +
+ but this text should not be returned + ''' + GET_ELEMENT_BY_TAG_RES_OUTERDIV_HTML = GET_ELEMENT_BY_TAG_TEST_STRING.strip()[32:276] + GET_ELEMENT_BY_TAG_RES_OUTERDIV_TEXT = GET_ELEMENT_BY_TAG_RES_OUTERDIV_HTML[5:-6] + GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML = GET_ELEMENT_BY_TAG_TEST_STRING.strip()[78:119] + GET_ELEMENT_BY_TAG_RES_INNERSPAN_TEXT = GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML[6:-7] + + def test_get_element_text_and_html_by_tag(self): + html = self.GET_ELEMENT_BY_TAG_TEST_STRING + + self.assertEqual( + get_element_text_and_html_by_tag('div', html), + (self.GET_ELEMENT_BY_TAG_RES_OUTERDIV_TEXT, self.GET_ELEMENT_BY_TAG_RES_OUTERDIV_HTML)) + self.assertEqual( + get_element_text_and_html_by_tag('span', html), + (self.GET_ELEMENT_BY_TAG_RES_INNERSPAN_TEXT, self.GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML)) + self.assertRaises(compat_HTMLParseError, get_element_text_and_html_by_tag, 'article', html) + def test_iri_to_uri(self): self.assertEqual( iri_to_uri('https://www.google.com/search?q=foo&ie=utf-8&oe=utf-8&client=firefox-b'), @@ -1595,6 +1847,8 @@ def test_iri_to_uri(self): def test_clean_podcast_url(self): self.assertEqual(clean_podcast_url('https://www.podtrac.com/pts/redirect.mp3/chtbl.com/track/5899E/traffic.megaphone.fm/HSW7835899191.mp3'), 'https://traffic.megaphone.fm/HSW7835899191.mp3') self.assertEqual(clean_podcast_url('https://play.podtrac.com/npr-344098539/edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3'), 'https://edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3') + self.assertEqual(clean_podcast_url('https://pdst.fm/e/2.gum.fm/chtbl.com/track/chrt.fm/track/34D33/pscrb.fm/rss/p/traffic.megaphone.fm/ITLLC7765286967.mp3?updated=1687282661'), 'https://traffic.megaphone.fm/ITLLC7765286967.mp3?updated=1687282661') + self.assertEqual(clean_podcast_url('https://pdst.fm/e/https://mgln.ai/e/441/www.buzzsprout.com/1121972/13019085-ep-252-the-deep-life-stack.mp3'), 'https://www.buzzsprout.com/1121972/13019085-ep-252-the-deep-life-stack.mp3') def test_LazyList(self): it = list(range(10)) @@ -1617,30 +1871,234 @@ def test_LazyList(self): self.assertEqual(repr(LazyList(it)), repr(it)) self.assertEqual(str(LazyList(it)), str(it)) - self.assertEqual(list(LazyList(it).reverse()), it[::-1]) - self.assertEqual(list(LazyList(it).reverse()[1:3:7]), it[::-1][1:3:7]) - self.assertEqual(list(LazyList(it).reverse()[::-1]), it) + self.assertEqual(list(LazyList(it, reverse=True)), it[::-1]) + self.assertEqual(list(reversed(LazyList(it))[::-1]), it) + self.assertEqual(list(reversed(LazyList(it))[1:3:7]), it[::-1][1:3:7]) def test_LazyList_laziness(self): def test(ll, idx, val, cache): self.assertEqual(ll[idx], val) - self.assertEqual(getattr(ll, '_LazyList__cache'), list(cache)) + self.assertEqual(ll._cache, list(cache)) ll = LazyList(range(10)) test(ll, 0, 0, range(1)) test(ll, 5, 5, range(6)) test(ll, -3, 7, range(10)) - ll = LazyList(range(10)).reverse() + ll = LazyList(range(10), reverse=True) test(ll, -1, 0, range(1)) test(ll, 3, 6, range(10)) ll = LazyList(itertools.count()) test(ll, 10, 10, range(11)) - ll.reverse() + ll = reversed(ll) test(ll, -15, 14, range(15)) + def test_format_bytes(self): + self.assertEqual(format_bytes(0), '0.00B') + self.assertEqual(format_bytes(1000), '1000.00B') + self.assertEqual(format_bytes(1024), '1.00KiB') + self.assertEqual(format_bytes(1024**2), '1.00MiB') + self.assertEqual(format_bytes(1024**3), '1.00GiB') + self.assertEqual(format_bytes(1024**4), '1.00TiB') + self.assertEqual(format_bytes(1024**5), '1.00PiB') + self.assertEqual(format_bytes(1024**6), '1.00EiB') + self.assertEqual(format_bytes(1024**7), '1.00ZiB') + self.assertEqual(format_bytes(1024**8), '1.00YiB') + self.assertEqual(format_bytes(1024**9), '1024.00YiB') + + def test_hide_login_info(self): + self.assertEqual(Config.hide_login_info(['-u', 'foo', '-p', 'bar']), + ['-u', 'PRIVATE', '-p', 'PRIVATE']) + self.assertEqual(Config.hide_login_info(['-u']), ['-u']) + self.assertEqual(Config.hide_login_info(['-u', 'foo', '-u', 'bar']), + ['-u', 'PRIVATE', '-u', 'PRIVATE']) + self.assertEqual(Config.hide_login_info(['--username=foo']), + ['--username=PRIVATE']) + + def test_locked_file(self): + TEXT = 'test_locked_file\n' + FILE = 'test_locked_file.ytdl' + MODES = 'war' # Order is important + + try: + for lock_mode in MODES: + with locked_file(FILE, lock_mode, False) as f: + if lock_mode == 'r': + self.assertEqual(f.read(), TEXT * 2, 'Wrong file content') + else: + f.write(TEXT) + for test_mode in MODES: + testing_write = test_mode != 'r' + try: + with locked_file(FILE, test_mode, False): + pass + except (BlockingIOError, PermissionError): + if not testing_write: # FIXME: blocked read access + print(f'Known issue: Exclusive lock ({lock_mode}) blocks read access ({test_mode})') + continue + self.assertTrue(testing_write, f'{test_mode} is blocked by {lock_mode}') + else: + self.assertFalse(testing_write, f'{test_mode} is not blocked by {lock_mode}') + finally: + with contextlib.suppress(OSError): + os.remove(FILE) + + def test_determine_file_encoding(self): + self.assertEqual(determine_file_encoding(b''), (None, 0)) + self.assertEqual(determine_file_encoding(b'--verbose -x --audio-format mkv\n'), (None, 0)) + + self.assertEqual(determine_file_encoding(b'\xef\xbb\xbf'), ('utf-8', 3)) + self.assertEqual(determine_file_encoding(b'\x00\x00\xfe\xff'), ('utf-32-be', 4)) + self.assertEqual(determine_file_encoding(b'\xff\xfe'), ('utf-16-le', 2)) + + self.assertEqual(determine_file_encoding(b'\xff\xfe# coding: utf-8\n--verbose'), ('utf-16-le', 2)) + + self.assertEqual(determine_file_encoding(b'# coding: utf-8\n--verbose'), ('utf-8', 0)) + self.assertEqual(determine_file_encoding(b'# coding: someencodinghere-12345\n--verbose'), ('someencodinghere-12345', 0)) + + self.assertEqual(determine_file_encoding(b'#coding:utf-8\n--verbose'), ('utf-8', 0)) + self.assertEqual(determine_file_encoding(b'# coding: utf-8 \r\n--verbose'), ('utf-8', 0)) + + self.assertEqual(determine_file_encoding('# coding: utf-32-be'.encode('utf-32-be')), ('utf-32-be', 0)) + self.assertEqual(determine_file_encoding('# coding: utf-16-le'.encode('utf-16-le')), ('utf-16-le', 0)) + + def test_get_compatible_ext(self): + self.assertEqual(get_compatible_ext( + vcodecs=[None], acodecs=[None, None], vexts=['mp4'], aexts=['m4a', 'm4a']), 'mkv') + self.assertEqual(get_compatible_ext( + vcodecs=[None], acodecs=[None], vexts=['flv'], aexts=['flv']), 'flv') + + self.assertEqual(get_compatible_ext( + vcodecs=[None], acodecs=[None], vexts=['mp4'], aexts=['m4a']), 'mp4') + self.assertEqual(get_compatible_ext( + vcodecs=[None], acodecs=[None], vexts=['mp4'], aexts=['webm']), 'mkv') + self.assertEqual(get_compatible_ext( + vcodecs=[None], acodecs=[None], vexts=['webm'], aexts=['m4a']), 'mkv') + self.assertEqual(get_compatible_ext( + vcodecs=[None], acodecs=[None], vexts=['webm'], aexts=['webm']), 'webm') + self.assertEqual(get_compatible_ext( + vcodecs=[None], acodecs=[None], vexts=['webm'], aexts=['weba']), 'webm') + + self.assertEqual(get_compatible_ext( + vcodecs=['h264'], acodecs=['mp4a'], vexts=['mov'], aexts=['m4a']), 'mp4') + self.assertEqual(get_compatible_ext( + vcodecs=['av01.0.12M.08'], acodecs=['opus'], vexts=['mp4'], aexts=['webm']), 'webm') + + self.assertEqual(get_compatible_ext( + vcodecs=['vp9'], acodecs=['opus'], vexts=['webm'], aexts=['webm'], preferences=['flv', 'mp4']), 'mp4') + self.assertEqual(get_compatible_ext( + vcodecs=['av1'], acodecs=['mp4a'], vexts=['webm'], aexts=['m4a'], preferences=('webm', 'mkv')), 'mkv') + + def test_try_call(self): + def total(*x, **kwargs): + return sum(x) + sum(kwargs.values()) + + self.assertEqual(try_call(None), None, + msg='not a fn should give None') + self.assertEqual(try_call(lambda: 1), 1, + msg='int fn with no expected_type should give int') + self.assertEqual(try_call(lambda: 1, expected_type=int), 1, + msg='int fn with expected_type int should give int') + self.assertEqual(try_call(lambda: 1, expected_type=dict), None, + msg='int fn with wrong expected_type should give None') + self.assertEqual(try_call(total, args=(0, 1, 0), expected_type=int), 1, + msg='fn should accept arglist') + self.assertEqual(try_call(total, kwargs={'a': 0, 'b': 1, 'c': 0}, expected_type=int), 1, + msg='fn should accept kwargs') + self.assertEqual(try_call(lambda: 1, expected_type=dict), None, + msg='int fn with no expected_type should give None') + self.assertEqual(try_call(lambda x: {}, total, args=(42, ), expected_type=int), 42, + msg='expect first int result with expected_type int') + + def test_variadic(self): + self.assertEqual(variadic(None), (None, )) + self.assertEqual(variadic('spam'), ('spam', )) + self.assertEqual(variadic('spam', allowed_types=dict), 'spam') + with warnings.catch_warnings(): + warnings.simplefilter('ignore') + self.assertEqual(variadic('spam', allowed_types=[dict]), 'spam') + + def test_http_header_dict(self): + headers = HTTPHeaderDict() + headers['ytdl-test'] = b'0' + self.assertEqual(list(headers.items()), [('Ytdl-Test', '0')]) + headers['ytdl-test'] = 1 + self.assertEqual(list(headers.items()), [('Ytdl-Test', '1')]) + headers['Ytdl-test'] = '2' + self.assertEqual(list(headers.items()), [('Ytdl-Test', '2')]) + self.assertTrue('ytDl-Test' in headers) + self.assertEqual(str(headers), str(dict(headers))) + self.assertEqual(repr(headers), str(dict(headers))) + + headers.update({'X-dlp': 'data'}) + self.assertEqual(set(headers.items()), {('Ytdl-Test', '2'), ('X-Dlp', 'data')}) + self.assertEqual(dict(headers), {'Ytdl-Test': '2', 'X-Dlp': 'data'}) + self.assertEqual(len(headers), 2) + self.assertEqual(headers.copy(), headers) + headers2 = HTTPHeaderDict({'X-dlp': 'data3'}, **headers, **{'X-dlp': 'data2'}) + self.assertEqual(set(headers2.items()), {('Ytdl-Test', '2'), ('X-Dlp', 'data2')}) + self.assertEqual(len(headers2), 2) + headers2.clear() + self.assertEqual(len(headers2), 0) + + # ensure we prefer latter headers + headers3 = HTTPHeaderDict({'Ytdl-TeSt': 1}, {'Ytdl-test': 2}) + self.assertEqual(set(headers3.items()), {('Ytdl-Test', '2')}) + del headers3['ytdl-tesT'] + self.assertEqual(dict(headers3), {}) + + headers4 = HTTPHeaderDict({'ytdl-test': 'data;'}) + self.assertEqual(set(headers4.items()), {('Ytdl-Test', 'data;')}) + + # common mistake: strip whitespace from values + # https://github.com/yt-dlp/yt-dlp/issues/8729 + headers5 = HTTPHeaderDict({'ytdl-test': ' data; '}) + self.assertEqual(set(headers5.items()), {('Ytdl-Test', 'data;')}) + + def test_extract_basic_auth(self): + assert extract_basic_auth('http://:foo.bar') == ('http://:foo.bar', None) + assert extract_basic_auth('http://foo.bar') == ('http://foo.bar', None) + assert extract_basic_auth('http://@foo.bar') == ('http://foo.bar', 'Basic Og==') + assert extract_basic_auth('http://:pass@foo.bar') == ('http://foo.bar', 'Basic OnBhc3M=') + assert extract_basic_auth('http://user:@foo.bar') == ('http://foo.bar', 'Basic dXNlcjo=') + assert extract_basic_auth('http://user:pass@foo.bar') == ('http://foo.bar', 'Basic dXNlcjpwYXNz') + + @unittest.skipUnless(compat_os_name == 'nt', 'Only relevant on Windows') + def test_windows_escaping(self): + tests = [ + 'test"&', + '%CMDCMDLINE:~-1%&', + 'a\nb', + '"', + '\\', + '!', + '^!', + 'a \\ b', + 'a \\" b', + 'a \\ b\\', + # We replace \r with \n + ('a\r\ra', 'a\n\na'), + ] + + def run_shell(args): + stdout, stderr, error = Popen.run( + args, text=True, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + assert not stderr + assert not error + return stdout + + for argument in tests: + if isinstance(argument, str): + expected = argument + else: + argument, expected = argument + + args = [sys.executable, '-c', 'import sys; print(end=sys.argv[1])', argument, 'end'] + assert run_shell(args) == expected + assert run_shell(shell_quote(args, shell=True)) == expected + if __name__ == '__main__': unittest.main()