X-Git-Url: https://jfr.im/git/yt-dlp.git/blobdiff_plain/f5ea47488a2c59b2520b4988b7eab4d8830e3077..61edf57f8f13f6dfd81154174e647eb5fdd26089:/test/test_utils.py?ds=sidebyside
diff --git a/test/test_utils.py b/test/test_utils.py
index 948d5d059..251739686 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -4,6 +4,8 @@
import os
import sys
import unittest
+import warnings
+import datetime as dt
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@@ -12,6 +14,7 @@
import io
import itertools
import json
+import subprocess
import xml.etree.ElementTree
from yt_dlp.compat import (
@@ -25,7 +28,9 @@
ExtractorError,
InAdvancePagedList,
LazyList,
+ NO_DEFAULT,
OnDemandPagedList,
+ Popen,
age_restricted,
args_to_str,
base_url,
@@ -39,19 +44,19 @@
datetime_from_str,
detect_exe_version,
determine_ext,
+ determine_file_encoding,
dfxp2srt,
- dict_get,
encode_base_n,
encode_compat_str,
encodeFilename,
- escape_rfc3986,
- escape_url,
expand_path,
extract_attributes,
+ extract_basic_auth,
find_xpath_attr,
fix_xml_ampersands,
float_or_none,
format_bytes,
+ get_compatible_ext,
get_element_by_attribute,
get_element_by_class,
get_element_html_by_attribute,
@@ -99,7 +104,6 @@
sanitize_filename,
sanitize_path,
sanitize_url,
- sanitized_Request,
shell_quote,
smuggle_url,
str_to_int,
@@ -107,6 +111,7 @@
strip_or_none,
subtitles_filename,
timeconvert,
+ try_call,
unescapeHTML,
unified_strdate,
unified_timestamp,
@@ -118,12 +123,19 @@
urlencode_postdata,
urljoin,
urshift,
+ variadic,
version_tuple,
xpath_attr,
xpath_element,
xpath_text,
xpath_with_ns,
)
+from yt_dlp.utils.networking import (
+ HTTPHeaderDict,
+ escape_rfc3986,
+ normalize_url,
+ remove_dot_segments,
+)
class TestUtil(unittest.TestCase):
@@ -138,13 +150,13 @@ def test_sanitize_filename(self):
self.assertEqual(sanitize_filename('123'), '123')
- self.assertEqual('abc_de', sanitize_filename('abc/de'))
+ self.assertEqual('abc⧸de', sanitize_filename('abc/de'))
self.assertFalse('/' in sanitize_filename('abc/de///'))
- self.assertEqual('abc_de', sanitize_filename('abc/<>\\*|de'))
- self.assertEqual('xxx', sanitize_filename('xxx/<>\\*|'))
- self.assertEqual('yes no', sanitize_filename('yes? no'))
- self.assertEqual('this - that', sanitize_filename('this: that'))
+ self.assertEqual('abc_de', sanitize_filename('abc/<>\\*|de', is_id=False))
+ self.assertEqual('xxx', sanitize_filename('xxx/<>\\*|', is_id=False))
+ self.assertEqual('yes no', sanitize_filename('yes? no', is_id=False))
+ self.assertEqual('this - that', sanitize_filename('this: that', is_id=False))
self.assertEqual(sanitize_filename('AT&T'), 'AT&T')
aumlaut = 'ä'
@@ -250,15 +262,6 @@ def test_sanitize_url(self):
self.assertEqual(sanitize_url('https://foo.bar'), 'https://foo.bar')
self.assertEqual(sanitize_url('foo bar'), 'foo bar')
- def test_extract_basic_auth(self):
- auth_header = lambda url: sanitized_Request(url).get_header('Authorization')
- self.assertFalse(auth_header('http://foo.bar'))
- self.assertFalse(auth_header('http://:foo.bar'))
- self.assertEqual(auth_header('http://@foo.bar'), 'Basic Og==')
- self.assertEqual(auth_header('http://:pass@foo.bar'), 'Basic OnBhc3M=')
- self.assertEqual(auth_header('http://user:@foo.bar'), 'Basic dXNlcjo=')
- self.assertEqual(auth_header('http://user:pass@foo.bar'), 'Basic dXNlcjpwYXNz')
-
def test_expand_path(self):
def env(var):
return f'%{var}%' if sys.platform == 'win32' else f'${var}'
@@ -273,8 +276,8 @@ def env(var):
self.assertEqual(expand_path(env('HOME')), os.getenv('HOME'))
self.assertEqual(expand_path('~'), os.getenv('HOME'))
self.assertEqual(
- expand_path('~/%s' % env('yt_dlp_EXPATH_PATH')),
- '%s/expanded' % os.getenv('HOME'))
+ expand_path('~/{}'.format(env('yt_dlp_EXPATH_PATH'))),
+ '{}/expanded'.format(os.getenv('HOME')))
finally:
os.environ['HOME'] = old_home or ''
@@ -353,12 +356,12 @@ def test_datetime_from_str(self):
self.assertEqual(datetime_from_str('now+23hours', precision='hour'), datetime_from_str('now+23hours', precision='auto'))
def test_daterange(self):
- _20century = DateRange("19000101", "20000101")
- self.assertFalse("17890714" in _20century)
- _ac = DateRange("00010101")
- self.assertTrue("19690721" in _ac)
- _firstmilenium = DateRange(end="10000101")
- self.assertTrue("07110427" in _firstmilenium)
+ _20century = DateRange('19000101', '20000101')
+ self.assertFalse('17890714' in _20century)
+ _ac = DateRange('00010101')
+ self.assertTrue('19690721' in _ac)
+ _firstmilenium = DateRange(end='10000101')
+ self.assertTrue('07110427' in _firstmilenium)
def test_unified_dates(self):
self.assertEqual(unified_strdate('December 21, 2010'), '20101221')
@@ -367,6 +370,7 @@ def test_unified_dates(self):
self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011')
self.assertEqual(unified_strdate('1968 12 10'), '19681210')
self.assertEqual(unified_strdate('1968-12-10'), '19681210')
+ self.assertEqual(unified_strdate('31-07-2022 20:00'), '20220731')
self.assertEqual(unified_strdate('28/01/2014 21:00:00 +0100'), '20140128')
self.assertEqual(
unified_strdate('11/26/2014 11:30:00 AM PST', day_first=False),
@@ -410,6 +414,10 @@ def test_unified_timestamps(self):
self.assertEqual(unified_timestamp('December 15, 2017 at 7:49 am'), 1513324140)
self.assertEqual(unified_timestamp('2018-03-14T08:32:43.1493874+00:00'), 1521016363)
+ self.assertEqual(unified_timestamp('December 31 1969 20:00:01 EDT'), 1)
+ self.assertEqual(unified_timestamp('Wednesday 31 December 1969 18:01:26 MDT'), 86)
+ self.assertEqual(unified_timestamp('12/31/1969 20:01:18 EDT', False), 78)
+
def test_determine_ext(self):
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
self.assertEqual(determine_ext('http://example.com/foo/bar/?download', None), None)
@@ -498,7 +506,7 @@ def test_xpath_attr(self):
self.assertRaises(ExtractorError, xpath_attr, doc, 'div/p', 'y', fatal=True)
def test_smuggle_url(self):
- data = {"ö": "ö", "abc": [3]}
+ data = {'ö': 'ö', 'abc': [3]}
url = 'https://foo.bar/baz?x=y#a'
smug_url = smuggle_url(url, data)
unsmug_url, unsmug_data = unsmuggle_url(smug_url)
@@ -559,6 +567,7 @@ def test_base_url(self):
self.assertEqual(base_url('http://foo.de/bar/'), 'http://foo.de/bar/')
self.assertEqual(base_url('http://foo.de/bar/baz'), 'http://foo.de/bar/')
self.assertEqual(base_url('http://foo.de/bar/baz?x=z/x/c'), 'http://foo.de/bar/')
+ self.assertEqual(base_url('http://foo.de/bar/baz&x=z&w=y/x/c'), 'http://foo.de/bar/baz&x=z&w=y/x/')
def test_urljoin(self):
self.assertEqual(urljoin('http://foo.de/', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
@@ -649,6 +658,8 @@ def test_parse_duration(self):
self.assertEqual(parse_duration('P0Y0M0DT0H4M20.880S'), 260.88)
self.assertEqual(parse_duration('01:02:03:050'), 3723.05)
self.assertEqual(parse_duration('103:050'), 103.05)
+ self.assertEqual(parse_duration('1HR 3MIN'), 3780)
+ self.assertEqual(parse_duration('2hrs 3mins'), 7380)
def test_fix_xml_ampersands(self):
self.assertEqual(
@@ -742,28 +753,6 @@ def test_multipart_encode(self):
self.assertRaises(
ValueError, multipart_encode, {b'field': b'value'}, boundary='value')
- def test_dict_get(self):
- FALSE_VALUES = {
- 'none': None,
- 'false': False,
- 'zero': 0,
- 'empty_string': '',
- 'empty_list': [],
- }
- d = FALSE_VALUES.copy()
- d['a'] = 42
- self.assertEqual(dict_get(d, 'a'), 42)
- self.assertEqual(dict_get(d, 'b'), None)
- self.assertEqual(dict_get(d, 'b', 42), 42)
- self.assertEqual(dict_get(d, ('a', )), 42)
- self.assertEqual(dict_get(d, ('b', 'a', )), 42)
- self.assertEqual(dict_get(d, ('b', 'c', 'a', 'd', )), 42)
- self.assertEqual(dict_get(d, ('b', 'c', )), None)
- self.assertEqual(dict_get(d, ('b', 'c', ), 42), 42)
- for key, false_value in FALSE_VALUES.items():
- self.assertEqual(dict_get(d, ('b', 'c', key, )), None)
- self.assertEqual(dict_get(d, ('b', 'c', key, ), skip_false_values=False), false_value)
-
def test_merge_dicts(self):
self.assertEqual(merge_dicts({'a': 1}, {'b': 2}), {'a': 1, 'b': 2})
self.assertEqual(merge_dicts({'a': 1}, {'a': 2}), {'a': 1})
@@ -781,6 +770,11 @@ def test_encode_compat_str(self):
def test_parse_iso8601(self):
self.assertEqual(parse_iso8601('2014-03-23T23:04:26+0100'), 1395612266)
+ self.assertEqual(parse_iso8601('2014-03-23T23:04:26-07:00'), 1395641066)
+ self.assertEqual(parse_iso8601('2014-03-23T23:04:26', timezone=dt.timedelta(hours=-7)), 1395641066)
+ self.assertEqual(parse_iso8601('2014-03-23T23:04:26', timezone=NO_DEFAULT), None)
+ # default does not override timezone in date_str
+ self.assertEqual(parse_iso8601('2014-03-23T23:04:26-07:00', timezone=dt.timedelta(hours=-10)), 1395641066)
self.assertEqual(parse_iso8601('2014-03-23T22:04:26+0000'), 1395612266)
self.assertEqual(parse_iso8601('2014-03-23T22:04:26Z'), 1395612266)
self.assertEqual(parse_iso8601('2014-03-23T22:04:26.1234Z'), 1395612266)
@@ -790,7 +784,7 @@ def test_parse_iso8601(self):
def test_strip_jsonp(self):
stripped = strip_jsonp('cb ([ {"id":"532cb",\n\n\n"x":\n3}\n]\n);')
d = json.loads(stripped)
- self.assertEqual(d, [{"id": "532cb", "x": 3}])
+ self.assertEqual(d, [{'id': '532cb', 'x': 3}])
stripped = strip_jsonp('parseMetadata({"STATUS":"OK"})\n\n\n//epc')
d = json.loads(stripped)
@@ -925,24 +919,124 @@ def test_escape_rfc3986(self):
self.assertEqual(escape_rfc3986('foo bar'), 'foo%20bar')
self.assertEqual(escape_rfc3986('foo%20bar'), 'foo%20bar')
- def test_escape_url(self):
+ def test_normalize_url(self):
self.assertEqual(
- escape_url('http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavreÌ_FD.mp4'),
- 'http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavre%CC%81_FD.mp4'
+ normalize_url('http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavreÌ_FD.mp4'),
+ 'http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavre%CC%81_FD.mp4',
)
self.assertEqual(
- escape_url('http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erklärt/Das-Erste/Video?documentId=22673108&bcastId=5290'),
- 'http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erkl%C3%A4rt/Das-Erste/Video?documentId=22673108&bcastId=5290'
+ normalize_url('http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erklärt/Das-Erste/Video?documentId=22673108&bcastId=5290'),
+ 'http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erkl%C3%A4rt/Das-Erste/Video?documentId=22673108&bcastId=5290',
)
self.assertEqual(
- escape_url('http://ÑеÑÑ.ÑÑ/ÑÑагменÑ'),
- 'http://xn--e1aybc.xn--p1ai/%D1%84%D1%80%D0%B0%D0%B3%D0%BC%D0%B5%D0%BD%D1%82'
+ normalize_url('http://ÑеÑÑ.ÑÑ/ÑÑагменÑ'),
+ 'http://xn--e1aybc.xn--p1ai/%D1%84%D1%80%D0%B0%D0%B3%D0%BC%D0%B5%D0%BD%D1%82',
)
self.assertEqual(
- escape_url('http://ÑеÑÑ.ÑÑ/абв?абв=абв#абв'),
- 'http://xn--e1aybc.xn--p1ai/%D0%B0%D0%B1%D0%B2?%D0%B0%D0%B1%D0%B2=%D0%B0%D0%B1%D0%B2#%D0%B0%D0%B1%D0%B2'
+ normalize_url('http://ÑеÑÑ.ÑÑ/абв?абв=абв#абв'),
+ 'http://xn--e1aybc.xn--p1ai/%D0%B0%D0%B1%D0%B2?%D0%B0%D0%B1%D0%B2=%D0%B0%D0%B1%D0%B2#%D0%B0%D0%B1%D0%B2',
+ )
+ self.assertEqual(normalize_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0')
+
+ self.assertEqual(normalize_url('http://www.example.com/../a/b/../c/./d.html'), 'http://www.example.com/a/c/d.html')
+
+ def test_remove_dot_segments(self):
+ self.assertEqual(remove_dot_segments('/a/b/c/./../../g'), '/a/g')
+ self.assertEqual(remove_dot_segments('mid/content=5/../6'), 'mid/6')
+ self.assertEqual(remove_dot_segments('/ad/../cd'), '/cd')
+ self.assertEqual(remove_dot_segments('/ad/../cd/'), '/cd/')
+ self.assertEqual(remove_dot_segments('/..'), '/')
+ self.assertEqual(remove_dot_segments('/./'), '/')
+ self.assertEqual(remove_dot_segments('/./a'), '/a')
+ self.assertEqual(remove_dot_segments('/abc/./.././d/././e/.././f/./../../ghi'), '/ghi')
+ self.assertEqual(remove_dot_segments('/'), '/')
+ self.assertEqual(remove_dot_segments('/t'), '/t')
+ self.assertEqual(remove_dot_segments('t'), 't')
+ self.assertEqual(remove_dot_segments(''), '')
+ self.assertEqual(remove_dot_segments('/../a/b/c'), '/a/b/c')
+ self.assertEqual(remove_dot_segments('../a'), 'a')
+ self.assertEqual(remove_dot_segments('./a'), 'a')
+ self.assertEqual(remove_dot_segments('.'), '')
+ self.assertEqual(remove_dot_segments('////'), '////')
+
+ def test_js_to_json_vars_strings(self):
+ self.assertDictEqual(
+ json.loads(js_to_json(
+ '''{
+ 'null': a,
+ 'nullStr': b,
+ 'true': c,
+ 'trueStr': d,
+ 'false': e,
+ 'falseStr': f,
+ 'unresolvedVar': g,
+ }''',
+ {
+ 'a': 'null',
+ 'b': '"null"',
+ 'c': 'true',
+ 'd': '"true"',
+ 'e': 'false',
+ 'f': '"false"',
+ 'g': 'var',
+ },
+ )),
+ {
+ 'null': None,
+ 'nullStr': 'null',
+ 'true': True,
+ 'trueStr': 'true',
+ 'false': False,
+ 'falseStr': 'false',
+ 'unresolvedVar': 'var',
+ },
+ )
+
+ self.assertDictEqual(
+ json.loads(js_to_json(
+ '''{
+ 'int': a,
+ 'intStr': b,
+ 'float': c,
+ 'floatStr': d,
+ }''',
+ {
+ 'a': '123',
+ 'b': '"123"',
+ 'c': '1.23',
+ 'd': '"1.23"',
+ },
+ )),
+ {
+ 'int': 123,
+ 'intStr': '123',
+ 'float': 1.23,
+ 'floatStr': '1.23',
+ },
+ )
+
+ self.assertDictEqual(
+ json.loads(js_to_json(
+ '''{
+ 'object': a,
+ 'objectStr': b,
+ 'array': c,
+ 'arrayStr': d,
+ }''',
+ {
+ 'a': '{}',
+ 'b': '"{}"',
+ 'c': '[]',
+ 'd': '"[]"',
+ },
+ )),
+ {
+ 'object': {},
+ 'objectStr': '{}',
+ 'array': [],
+ 'arrayStr': '[]',
+ },
)
- self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0')
def test_js_to_json_realworld(self):
inp = '''{
@@ -987,7 +1081,7 @@ def test_js_to_json_realworld(self):
def test_js_to_json_edgecases(self):
on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}")
- self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"})
+ self.assertEqual(json.loads(on), {'abc_def': "1'\\2\\'3\"4"})
on = js_to_json('{"abc": true}')
self.assertEqual(json.loads(on), {'abc': True})
@@ -1019,9 +1113,9 @@ def test_js_to_json_edgecases(self):
'c': 0,
'd': 42.42,
'e': [],
- 'f': "abc",
- 'g': "",
- '42': 42
+ 'f': 'abc',
+ 'g': '',
+ '42': 42,
})
on = js_to_json('["abc", "def",]')
@@ -1090,10 +1184,34 @@ def test_js_to_json_edgecases(self):
on = js_to_json('[1,//{},\n2]')
self.assertEqual(json.loads(on), [1, 2])
+ on = js_to_json(R'"\^\$\#"')
+ self.assertEqual(json.loads(on), R'^$#', msg='Unnecessary escapes should be stripped')
+
+ on = js_to_json('\'"\\""\'')
+ self.assertEqual(json.loads(on), '"""', msg='Unnecessary quote escape should be escaped')
+
+ on = js_to_json('[new Date("spam"), \'("eggs")\']')
+ self.assertEqual(json.loads(on), ['spam', '("eggs")'], msg='Date regex should match a single string')
+
def test_js_to_json_malformed(self):
self.assertEqual(js_to_json('42a1'), '42"a1"')
self.assertEqual(js_to_json('42a-1'), '42"a"-1')
+ def test_js_to_json_template_literal(self):
+ self.assertEqual(js_to_json('`Hello ${name}`', {'name': '"world"'}), '"Hello world"')
+ self.assertEqual(js_to_json('`${name}${name}`', {'name': '"X"'}), '"XX"')
+ self.assertEqual(js_to_json('`${name}${name}`', {'name': '5'}), '"55"')
+ self.assertEqual(js_to_json('`${name}"${name}"`', {'name': '5'}), '"5\\"5\\""')
+ self.assertEqual(js_to_json('`${name}`', {}), '"name"')
+
+ def test_js_to_json_common_constructors(self):
+ self.assertEqual(json.loads(js_to_json('new Map([["a", 5]])')), {'a': 5})
+ self.assertEqual(json.loads(js_to_json('Array(5, 10)')), [5, 10])
+ self.assertEqual(json.loads(js_to_json('new Array(15,5)')), [15, 5])
+ self.assertEqual(json.loads(js_to_json('new Map([Array(5, 10),new Array(15,5)])')), {'5': 10, '15': 5})
+ self.assertEqual(json.loads(js_to_json('new Date("123")')), '123')
+ self.assertEqual(json.loads(js_to_json('new Date(\'2023-10-19\')')), '2023-10-19')
+
def test_extract_attributes(self):
self.assertEqual(extract_attributes('