]> jfr.im git - yt-dlp.git/blobdiff - test/test_utils.py
Completely change project name to yt-dlp (#85)
[yt-dlp.git] / test / test_utils.py
index de841b1a07c618aa1c3c79e6602f87fed8d9c997..795d2b46a23d216bc8e22d8e5487e1f9065e5fa1 100644 (file)
 import json
 import xml.etree.ElementTree
 
-from youtube_dl.utils import (
+from yt_dlp.utils import (
     age_restricted,
     args_to_str,
     encode_base_n,
+    caesar,
     clean_html,
+    clean_podcast_url,
     date_from_str,
     DateRange,
     detect_exe_version,
     ExtractorError,
     find_xpath_attr,
     fix_xml_ampersands,
+    float_or_none,
     get_element_by_class,
     get_element_by_attribute,
     get_elements_by_class,
     get_elements_by_attribute,
     InAdvancePagedList,
+    int_or_none,
     intlist_to_bytes,
     is_html,
     js_to_json,
@@ -55,6 +59,7 @@
     parse_count,
     parse_iso8601,
     parse_resolution,
+    parse_bitrate,
     pkcs1pad,
     read_batch_urls,
     sanitize_filename,
     remove_start,
     remove_end,
     remove_quotes,
+    rot47,
     shell_quote,
     smuggle_url,
     str_to_int,
     strip_jsonp,
+    strip_or_none,
+    subtitles_filename,
     timeconvert,
     unescapeHTML,
     unified_strdate,
@@ -78,6 +86,7 @@
     uppercase_escape,
     lowercase_escape,
     url_basename,
+    url_or_none,
     base_url,
     urljoin,
     urlencode_postdata,
     cli_valueless_option,
     cli_bool_option,
     parse_codecs,
+    iri_to_uri,
 )
-from youtube_dl.compat import (
+from yt_dlp.compat import (
     compat_chr,
     compat_etree_fromstring,
     compat_getenv,
@@ -179,7 +189,7 @@ def test_sanitize_filename_restricted(self):
 
         self.assertEqual(sanitize_filename(
             'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ', restricted=True),
-            'AAAAAAAECEEEEIIIIDNOOOOOOOOEUUUUUYPssaaaaaaaeceeeeiiiionooooooooeuuuuuypy')
+            'AAAAAAAECEEEEIIIIDNOOOOOOOOEUUUUUYTHssaaaaaaaeceeeeiiiionooooooooeuuuuuythy')
 
     def test_sanitize_ids(self):
         self.assertEqual(sanitize_filename('_n_cd26wFpw', is_id=True), '_n_cd26wFpw')
@@ -232,12 +242,12 @@ def test_expand_path(self):
         def env(var):
             return '%{0}%'.format(var) if sys.platform == 'win32' else '${0}'.format(var)
 
-        compat_setenv('YOUTUBE_DL_EXPATH_PATH', 'expanded')
-        self.assertEqual(expand_path(env('YOUTUBE_DL_EXPATH_PATH')), 'expanded')
+        compat_setenv('yt_dlp_EXPATH_PATH', 'expanded')
+        self.assertEqual(expand_path(env('yt_dlp_EXPATH_PATH')), 'expanded')
         self.assertEqual(expand_path(env('HOME')), compat_getenv('HOME'))
         self.assertEqual(expand_path('~'), compat_getenv('HOME'))
         self.assertEqual(
-            expand_path('~/%s' % env('YOUTUBE_DL_EXPATH_PATH')),
+            expand_path('~/%s' % env('yt_dlp_EXPATH_PATH')),
             '%s/expanded' % compat_getenv('HOME'))
 
     def test_prepend_extension(self):
@@ -256,6 +266,11 @@ def test_replace_extension(self):
         self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp')
         self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp')
 
+    def test_subtitles_filename(self):
+        self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt'), 'abc.en.vtt')
+        self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt', 'ext'), 'abc.en.vtt')
+        self.assertEqual(subtitles_filename('abc.unexpected_ext', 'en', 'vtt', 'ext'), 'abc.unexpected_ext.en.vtt')
+
     def test_remove_start(self):
         self.assertEqual(remove_start(None, 'A - '), None)
         self.assertEqual(remove_start('A - B', 'A - '), 'B')
@@ -329,6 +344,8 @@ def test_unified_dates(self):
         self.assertEqual(unified_strdate('July 15th, 2013'), '20130715')
         self.assertEqual(unified_strdate('September 1st, 2013'), '20130901')
         self.assertEqual(unified_strdate('Sep 2nd, 2013'), '20130902')
+        self.assertEqual(unified_strdate('November 3rd, 2019'), '20191103')
+        self.assertEqual(unified_strdate('October 23rd, 2005'), '20051023')
 
     def test_unified_timestamps(self):
         self.assertEqual(unified_timestamp('December 21, 2010'), 1292889600)
@@ -466,9 +483,30 @@ def test_shell_quote(self):
             shell_quote(args),
             """ffmpeg -i 'ñ€ß'"'"'.mp4'""" if compat_os_name != 'nt' else '''ffmpeg -i "ñ€ß'.mp4"''')
 
+    def test_float_or_none(self):
+        self.assertEqual(float_or_none('42.42'), 42.42)
+        self.assertEqual(float_or_none('42'), 42.0)
+        self.assertEqual(float_or_none(''), None)
+        self.assertEqual(float_or_none(None), None)
+        self.assertEqual(float_or_none([]), None)
+        self.assertEqual(float_or_none(set()), None)
+
+    def test_int_or_none(self):
+        self.assertEqual(int_or_none('42'), 42)
+        self.assertEqual(int_or_none(''), None)
+        self.assertEqual(int_or_none(None), None)
+        self.assertEqual(int_or_none([]), None)
+        self.assertEqual(int_or_none(set()), None)
+
     def test_str_to_int(self):
         self.assertEqual(str_to_int('123,456'), 123456)
         self.assertEqual(str_to_int('123.456'), 123456)
+        self.assertEqual(str_to_int(523), 523)
+        # Python 3 has no long
+        if sys.version_info < (3, 0):
+            eval('self.assertEqual(str_to_int(123456L), 123456)')
+        self.assertEqual(str_to_int('noninteger'), None)
+        self.assertEqual(str_to_int([]), None)
 
     def test_url_basename(self):
         self.assertEqual(url_basename('http://foo.de/'), '')
@@ -506,6 +544,23 @@ def test_urljoin(self):
         self.assertEqual(urljoin('http://foo.de/', ''), None)
         self.assertEqual(urljoin('http://foo.de/', ['foobar']), None)
         self.assertEqual(urljoin('http://foo.de/a/b/c.txt', '.././../d.txt'), 'http://foo.de/d.txt')
+        self.assertEqual(urljoin('http://foo.de/a/b/c.txt', 'rtmp://foo.de'), 'rtmp://foo.de')
+        self.assertEqual(urljoin(None, 'rtmp://foo.de'), 'rtmp://foo.de')
+
+    def test_url_or_none(self):
+        self.assertEqual(url_or_none(None), None)
+        self.assertEqual(url_or_none(''), None)
+        self.assertEqual(url_or_none('foo'), None)
+        self.assertEqual(url_or_none('http://foo.de'), 'http://foo.de')
+        self.assertEqual(url_or_none('https://foo.de'), 'https://foo.de')
+        self.assertEqual(url_or_none('http$://foo.de'), None)
+        self.assertEqual(url_or_none('http://foo.de'), 'http://foo.de')
+        self.assertEqual(url_or_none('//foo.de'), '//foo.de')
+        self.assertEqual(url_or_none('s3://foo.de'), None)
+        self.assertEqual(url_or_none('rtmpte://foo.de'), 'rtmpte://foo.de')
+        self.assertEqual(url_or_none('mms://foo.de'), 'mms://foo.de')
+        self.assertEqual(url_or_none('rtspu://foo.de'), 'rtspu://foo.de')
+        self.assertEqual(url_or_none('ftps://foo.de'), 'ftps://foo.de')
 
     def test_parse_age_limit(self):
         self.assertEqual(parse_age_limit(None), None)
@@ -721,6 +776,18 @@ def test_strip_jsonp(self):
         d = json.loads(stripped)
         self.assertEqual(d, {'status': 'success'})
 
+    def test_strip_or_none(self):
+        self.assertEqual(strip_or_none(' abc'), 'abc')
+        self.assertEqual(strip_or_none('abc '), 'abc')
+        self.assertEqual(strip_or_none(' abc '), 'abc')
+        self.assertEqual(strip_or_none('\tabc\t'), 'abc')
+        self.assertEqual(strip_or_none('\n\tabc\n\t'), 'abc')
+        self.assertEqual(strip_or_none('abc'), 'abc')
+        self.assertEqual(strip_or_none(''), '')
+        self.assertEqual(strip_or_none(None), None)
+        self.assertEqual(strip_or_none(42), None)
+        self.assertEqual(strip_or_none([]), None)
+
     def test_uppercase_escape(self):
         self.assertEqual(uppercase_escape('aä'), 'aä')
         self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')
@@ -743,6 +810,8 @@ def test_mimetype2ext(self):
         self.assertEqual(mimetype2ext('text/vtt'), 'vtt')
         self.assertEqual(mimetype2ext('text/vtt;charset=utf-8'), 'vtt')
         self.assertEqual(mimetype2ext('text/html; charset=utf-8'), 'html')
+        self.assertEqual(mimetype2ext('audio/x-wav'), 'wav')
+        self.assertEqual(mimetype2ext('audio/x-wav;codec=pcm'), 'wav')
 
     def test_month_by_name(self):
         self.assertEqual(month_by_name(None), None)
@@ -774,6 +843,19 @@ def test_parse_codecs(self):
             'vcodec': 'h264',
             'acodec': 'aac',
         })
+        self.assertEqual(parse_codecs('av01.0.05M.08'), {
+            'vcodec': 'av01.0.05M.08',
+            'acodec': 'none',
+        })
+        self.assertEqual(parse_codecs('theora, vorbis'), {
+            'vcodec': 'theora',
+            'acodec': 'vorbis',
+        })
+        self.assertEqual(parse_codecs('unknownvcodec, unknownacodec'), {
+            'vcodec': 'unknownvcodec',
+            'acodec': 'unknownacodec',
+        })
+        self.assertEqual(parse_codecs('unknown'), {})
 
     def test_escape_rfc3986(self):
         reserved = "!*'();:@&=+$,/?#[]"
@@ -862,6 +944,28 @@ def test_js_to_json_edgecases(self):
         self.assertEqual(d['x'], 1)
         self.assertEqual(d['y'], 'a')
 
+        # Just drop ! prefix for now though this results in a wrong value
+        on = js_to_json('''{
+            a: !0,
+            b: !1,
+            c: !!0,
+            d: !!42.42,
+            e: !!![],
+            f: !"abc",
+            g: !"",
+            !42: 42
+        }''')
+        self.assertEqual(json.loads(on), {
+            'a': 0,
+            'b': 1,
+            'c': 0,
+            'd': 42.42,
+            'e': [],
+            'f': "abc",
+            'g': "",
+            '42': 42
+        })
+
         on = js_to_json('["abc", "def",]')
         self.assertEqual(json.loads(on), ['abc', 'def'])
 
@@ -919,6 +1023,12 @@ def test_js_to_json_edgecases(self):
         on = js_to_json('{42:4.2e1}')
         self.assertEqual(json.loads(on), {'42': 42.0})
 
+        on = js_to_json('{ "0x40": "0x40" }')
+        self.assertEqual(json.loads(on), {'0x40': '0x40'})
+
+        on = js_to_json('{ "040": "040" }')
+        self.assertEqual(json.loads(on), {'040': '040'})
+
     def test_js_to_json_malformed(self):
         self.assertEqual(js_to_json('42a1'), '42"a1"')
         self.assertEqual(js_to_json('42a-1'), '42"a"-1')
@@ -1013,6 +1123,13 @@ def test_parse_resolution(self):
         self.assertEqual(parse_resolution('4k'), {'height': 2160})
         self.assertEqual(parse_resolution('8K'), {'height': 4320})
 
+    def test_parse_bitrate(self):
+        self.assertEqual(parse_bitrate(None), None)
+        self.assertEqual(parse_bitrate(''), None)
+        self.assertEqual(parse_bitrate('300kbps'), 300)
+        self.assertEqual(parse_bitrate('1500kbps'), 1500)
+        self.assertEqual(parse_bitrate('300 kbps'), 300)
+
     def test_version_tuple(self):
         self.assertEqual(version_tuple('1'), (1,))
         self.assertEqual(version_tuple('10.23.344'), (10, 23, 344))
@@ -1297,6 +1414,20 @@ def test_encode_base_n(self):
         self.assertRaises(ValueError, encode_base_n, 0, 70)
         self.assertRaises(ValueError, encode_base_n, 0, 60, custom_table)
 
+    def test_caesar(self):
+        self.assertEqual(caesar('ace', 'abcdef', 2), 'cea')
+        self.assertEqual(caesar('cea', 'abcdef', -2), 'ace')
+        self.assertEqual(caesar('ace', 'abcdef', -2), 'eac')
+        self.assertEqual(caesar('eac', 'abcdef', 2), 'ace')
+        self.assertEqual(caesar('ace', 'abcdef', 0), 'ace')
+        self.assertEqual(caesar('xyz', 'abcdef', 2), 'xyz')
+        self.assertEqual(caesar('abc', 'acegik', 2), 'ebg')
+        self.assertEqual(caesar('ebg', 'acegik', -2), 'abc')
+
+    def test_rot47(self):
+        self.assertEqual(rot47('yt-dlp'), r'JE\5=A')
+        self.assertEqual(rot47('YT-DLP'), r'*%\s{!')
+
     def test_urshift(self):
         self.assertEqual(urshift(3, 1), 1)
         self.assertEqual(urshift(-3, 1), 2147483646)
@@ -1341,6 +1472,36 @@ def test_get_elements_by_attribute(self):
         self.assertEqual(get_elements_by_attribute('class', 'foo', html), [])
         self.assertEqual(get_elements_by_attribute('class', 'no-such-foo', html), [])
 
+    def test_iri_to_uri(self):
+        self.assertEqual(
+            iri_to_uri('https://www.google.com/search?q=foo&ie=utf-8&oe=utf-8&client=firefox-b'),
+            'https://www.google.com/search?q=foo&ie=utf-8&oe=utf-8&client=firefox-b')  # Same
+        self.assertEqual(
+            iri_to_uri('https://www.google.com/search?q=Käsesoßenrührlöffel'),  # German for cheese sauce stirring spoon
+            'https://www.google.com/search?q=K%C3%A4seso%C3%9Fenr%C3%BChrl%C3%B6ffel')
+        self.assertEqual(
+            iri_to_uri('https://www.google.com/search?q=lt<+gt>+eq%3D+amp%26+percent%25+hash%23+colon%3A+tilde~#trash=?&garbage=#'),
+            'https://www.google.com/search?q=lt%3C+gt%3E+eq%3D+amp%26+percent%25+hash%23+colon%3A+tilde~#trash=?&garbage=#')
+        self.assertEqual(
+            iri_to_uri('http://правозащита38.рф/category/news/'),
+            'http://xn--38-6kcaak9aj5chl4a3g.xn--p1ai/category/news/')
+        self.assertEqual(
+            iri_to_uri('http://www.правозащита38.рф/category/news/'),
+            'http://www.xn--38-6kcaak9aj5chl4a3g.xn--p1ai/category/news/')
+        self.assertEqual(
+            iri_to_uri('https://i❤.ws/emojidomain/👍👏🤝💪'),
+            'https://xn--i-7iq.ws/emojidomain/%F0%9F%91%8D%F0%9F%91%8F%F0%9F%A4%9D%F0%9F%92%AA')
+        self.assertEqual(
+            iri_to_uri('http://日本語.jp/'),
+            'http://xn--wgv71a119e.jp/')
+        self.assertEqual(
+            iri_to_uri('http://导航.中国/'),
+            'http://xn--fet810g.xn--fiqs8s/')
+
+    def test_clean_podcast_url(self):
+        self.assertEqual(clean_podcast_url('https://www.podtrac.com/pts/redirect.mp3/chtbl.com/track/5899E/traffic.megaphone.fm/HSW7835899191.mp3'), 'https://traffic.megaphone.fm/HSW7835899191.mp3')
+        self.assertEqual(clean_podcast_url('https://play.podtrac.com/npr-344098539/edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3'), 'https://edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3')
+
 
 if __name__ == '__main__':
     unittest.main()