[youtube, cleanup] Minor refactoring

[yt-dlp.git] / test / test_utils.py
diff --git a/test/test_utils.py b/test/test_utils.py

index 3f45b0bd1fa56c908cb1870343481ffe31e87fb9..1f826c2f222d3ddd87add3e6a9d894b2f1518499 100644 (file)
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
  # coding: utf-8
  
  from __future__ import unicode_literals
@@ -12,15 +12,20 @@
  
  # Various small unit tests
  import io
+import itertools
  import json
  import xml.etree.ElementTree
  
-from youtube_dl.utils import (
+from yt_dlp.utils import (
      age_restricted,
      args_to_str,
      encode_base_n,
+    caesar,
      clean_html,
+    clean_podcast_url,
+    Config,
      date_from_str,
+    datetime_from_str,
      DateRange,
      detect_exe_version,
      determine_ext,
@@ -33,14 +38,29 @@
      ExtractorError,
      find_xpath_attr,
      fix_xml_ampersands,
+    format_bytes,
+    float_or_none,
      get_element_by_class,
+    get_element_by_attribute,
+    get_elements_by_class,
+    get_elements_by_attribute,
+    get_element_html_by_class,
+    get_element_html_by_attribute,
+    get_elements_html_by_class,
+    get_elements_html_by_attribute,
+    get_elements_text_and_html_by_attribute,
+    get_element_text_and_html_by_tag,
      InAdvancePagedList,
+    int_or_none,
      intlist_to_bytes,
      is_html,
      js_to_json,
      limit_length,
+    locked_file,
+    merge_dicts,
      mimetype2ext,
      month_by_name,
+    multipart_encode,
      ohdave_rsa_encrypt,
      OnDemandPagedList,
      orderedSet,
@@ -49,18 +69,28 @@
      parse_filesize,
      parse_count,
      parse_iso8601,
+    parse_resolution,
+    parse_bitrate,
+    parse_qs,
+    pkcs1pad,
      read_batch_urls,
      sanitize_filename,
      sanitize_path,
+    sanitize_url,
+    sanitized_Request,
+    expand_path,
      prepend_extension,
      replace_extension,
      remove_start,
      remove_end,
      remove_quotes,
+    rot47,
      shell_quote,
      smuggle_url,
      str_to_int,
      strip_jsonp,
+    strip_or_none,
+    subtitles_filename,
      timeconvert,
      unescapeHTML,
      unified_strdate,
@@ -69,6 +99,7 @@
      uppercase_escape,
      lowercase_escape,
      url_basename,
+    url_or_none,
      base_url,
      urljoin,
      urlencode_postdata,
@@ -87,12 +118,16 @@
      cli_valueless_option,
      cli_bool_option,
      parse_codecs,
+    iri_to_uri,
+    LazyList,
  )
-from youtube_dl.compat import (
+from yt_dlp.compat import (
      compat_chr,
      compat_etree_fromstring,
-    compat_urlparse,
-    compat_parse_qs,
+    compat_getenv,
+    compat_HTMLParseError,
+    compat_os_name,
+    compat_setenv,
  )
  
  
@@ -102,6 +137,7 @@ def test_timeconvert(self):
          self.assertTrue(timeconvert('bougrg') is None)
  
      def test_sanitize_filename(self):
+        self.assertEqual(sanitize_filename(''), '')
          self.assertEqual(sanitize_filename('abc'), 'abc')
          self.assertEqual(sanitize_filename('abc_d-e'), 'abc_d-e')
  
@@ -125,10 +161,12 @@ def test_sanitize_filename(self):
              sanitize_filename('New World record at 0:12:34'),
              'New World record at 0_12_34')
  
-        self.assertEqual(sanitize_filename('--gasdgf'), '_-gasdgf')
+        self.assertEqual(sanitize_filename('--gasdgf'), '--gasdgf')
          self.assertEqual(sanitize_filename('--gasdgf', is_id=True), '--gasdgf')
-        self.assertEqual(sanitize_filename('.gasdgf'), 'gasdgf')
+        self.assertEqual(sanitize_filename('--gasdgf', is_id=False), '_-gasdgf')
+        self.assertEqual(sanitize_filename('.gasdgf'), '.gasdgf')
          self.assertEqual(sanitize_filename('.gasdgf', is_id=True), '.gasdgf')
+        self.assertEqual(sanitize_filename('.gasdgf', is_id=False), 'gasdgf')
  
          forbidden = '"\0\\/'
          for fc in forbidden:
@@ -167,7 +205,7 @@ def test_sanitize_filename_restricted(self):
  
          self.assertEqual(sanitize_filename(
              'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ', restricted=True),
-            'AAAAAAAECEEEEIIIIDNOOOOOOOOEUUUUUYPssaaaaaaaeceeeeiiiionooooooooeuuuuuypy')
+            'AAAAAAAECEEEEIIIIDNOOOOOOOOEUUUUUYTHssaaaaaaaeceeeeiiiionooooooooeuuuuuythy')
  
      def test_sanitize_ids(self):
          self.assertEqual(sanitize_filename('_n_cd26wFpw', is_id=True), '_n_cd26wFpw')
@@ -210,6 +248,34 @@ def test_sanitize_path(self):
          self.assertEqual(sanitize_path('./abc'), 'abc')
          self.assertEqual(sanitize_path('./../abc'), '..\\abc')
  
+    def test_sanitize_url(self):
+        self.assertEqual(sanitize_url('//foo.bar'), 'http://foo.bar')
+        self.assertEqual(sanitize_url('httpss://foo.bar'), 'https://foo.bar')
+        self.assertEqual(sanitize_url('rmtps://foo.bar'), 'rtmps://foo.bar')
+        self.assertEqual(sanitize_url('https://foo.bar'), 'https://foo.bar')
+        self.assertEqual(sanitize_url('foo bar'), 'foo bar')
+
+    def test_extract_basic_auth(self):
+        auth_header = lambda url: sanitized_Request(url).get_header('Authorization')
+        self.assertFalse(auth_header('http://foo.bar'))
+        self.assertFalse(auth_header('http://:foo.bar'))
+        self.assertEqual(auth_header('http://@foo.bar'), 'Basic Og==')
+        self.assertEqual(auth_header('http://:pass@foo.bar'), 'Basic OnBhc3M=')
+        self.assertEqual(auth_header('http://user:@foo.bar'), 'Basic dXNlcjo=')
+        self.assertEqual(auth_header('http://user:pass@foo.bar'), 'Basic dXNlcjpwYXNz')
+
+    def test_expand_path(self):
+        def env(var):
+            return '%{0}%'.format(var) if sys.platform == 'win32' else '${0}'.format(var)
+
+        compat_setenv('yt_dlp_EXPATH_PATH', 'expanded')
+        self.assertEqual(expand_path(env('yt_dlp_EXPATH_PATH')), 'expanded')
+        self.assertEqual(expand_path(env('HOME')), compat_getenv('HOME'))
+        self.assertEqual(expand_path('~'), compat_getenv('HOME'))
+        self.assertEqual(
+            expand_path('~/%s' % env('yt_dlp_EXPATH_PATH')),
+            '%s/expanded' % compat_getenv('HOME'))
+
      def test_prepend_extension(self):
          self.assertEqual(prepend_extension('abc.ext', 'temp'), 'abc.temp.ext')
          self.assertEqual(prepend_extension('abc.ext', 'temp', 'ext'), 'abc.temp.ext')
@@ -226,6 +292,11 @@ def test_replace_extension(self):
          self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp')
          self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp')
  
+    def test_subtitles_filename(self):
+        self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt'), 'abc.en.vtt')
+        self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt', 'ext'), 'abc.en.vtt')
+        self.assertEqual(subtitles_filename('abc.unexpected_ext', 'en', 'vtt', 'ext'), 'abc.unexpected_ext.en.vtt')
+
      def test_remove_start(self):
          self.assertEqual(remove_start(None, 'A - '), None)
          self.assertEqual(remove_start('A - B', 'A - '), 'B')
@@ -258,6 +329,7 @@ def test_unescape_html(self):
          self.assertEqual(unescapeHTML('&#47;'), '/')
          self.assertEqual(unescapeHTML('&eacute;'), 'é')
          self.assertEqual(unescapeHTML('&#2013266066;'), '&#2013266066;')
+        self.assertEqual(unescapeHTML('&a&quot;'), '&a"')
          # HTML5 entities
          self.assertEqual(unescapeHTML('&period;&apos;'), '.\'')
  
@@ -265,8 +337,18 @@ def test_date_from_str(self):
          self.assertEqual(date_from_str('yesterday'), date_from_str('now-1day'))
          self.assertEqual(date_from_str('now+7day'), date_from_str('now+1week'))
          self.assertEqual(date_from_str('now+14day'), date_from_str('now+2week'))
-        self.assertEqual(date_from_str('now+365day'), date_from_str('now+1year'))
-        self.assertEqual(date_from_str('now+30day'), date_from_str('now+1month'))
+        self.assertEqual(date_from_str('20200229+365day'), date_from_str('20200229+1year'))
+        self.assertEqual(date_from_str('20210131+28day'), date_from_str('20210131+1month'))
+
+    def test_datetime_from_str(self):
+        self.assertEqual(datetime_from_str('yesterday', precision='day'), datetime_from_str('now-1day', precision='auto'))
+        self.assertEqual(datetime_from_str('now+7day', precision='day'), datetime_from_str('now+1week', precision='auto'))
+        self.assertEqual(datetime_from_str('now+14day', precision='day'), datetime_from_str('now+2week', precision='auto'))
+        self.assertEqual(datetime_from_str('20200229+365day', precision='day'), datetime_from_str('20200229+1year', precision='auto'))
+        self.assertEqual(datetime_from_str('20210131+28day', precision='day'), datetime_from_str('20210131+1month', precision='auto'))
+        self.assertEqual(datetime_from_str('20210131+59day', precision='day'), datetime_from_str('20210131+2month', precision='auto'))
+        self.assertEqual(datetime_from_str('now+1day', precision='hour'), datetime_from_str('now+24hours', precision='auto'))
+        self.assertEqual(datetime_from_str('now+23hours', precision='hour'), datetime_from_str('now+23hours', precision='auto'))
  
      def test_daterange(self):
          _20century = DateRange("19000101", "20000101")
@@ -295,6 +377,11 @@ def test_unified_dates(self):
          self.assertEqual(unified_strdate('27.02.2016 17:30'), '20160227')
          self.assertEqual(unified_strdate('UNKNOWN DATE FORMAT'), None)
          self.assertEqual(unified_strdate('Feb 7, 2016 at 6:35 pm'), '20160207')
+        self.assertEqual(unified_strdate('July 15th, 2013'), '20130715')
+        self.assertEqual(unified_strdate('September 1st, 2013'), '20130901')
+        self.assertEqual(unified_strdate('Sep 2nd, 2013'), '20130902')
+        self.assertEqual(unified_strdate('November 3rd, 2019'), '20191103')
+        self.assertEqual(unified_strdate('October 23rd, 2005'), '20051023')
  
      def test_unified_timestamps(self):
          self.assertEqual(unified_timestamp('December 21, 2010'), 1292889600)
@@ -316,6 +403,10 @@ def test_unified_timestamps(self):
          self.assertEqual(unified_timestamp('UNKNOWN DATE FORMAT'), None)
          self.assertEqual(unified_timestamp('May 16, 2016 11:15 PM'), 1463440500)
          self.assertEqual(unified_timestamp('Feb 7, 2016 at 6:35 pm'), 1454870100)
+        self.assertEqual(unified_timestamp('2017-03-30T17:52:41Q'), 1490896361)
+        self.assertEqual(unified_timestamp('Sep 11, 2013 | 5:49 AM'), 1378878540)
+        self.assertEqual(unified_timestamp('December 15, 2017 at 7:49 am'), 1513324140)
+        self.assertEqual(unified_timestamp('2018-03-14T08:32:43.1493874+00:00'), 1521016363)
  
      def test_determine_ext(self):
          self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
@@ -323,6 +414,7 @@ def test_determine_ext(self):
          self.assertEqual(determine_ext('http://example.com/foo/bar.nonext/?download', None), None)
          self.assertEqual(determine_ext('http://example.com/foo/bar/mp4?download', None), None)
          self.assertEqual(determine_ext('http://example.com/foo/bar.m3u8//?download'), 'm3u8')
+        self.assertEqual(determine_ext('foobar', None), None)
  
      def test_find_xpath_attr(self):
          testxml = '''<root>
@@ -423,11 +515,34 @@ def test_smuggle_url(self):
  
      def test_shell_quote(self):
          args = ['ffmpeg', '-i', encodeFilename('ñ€ß\'.mp4')]
-        self.assertEqual(shell_quote(args), """ffmpeg -i 'ñ€ß'"'"'.mp4'""")
+        self.assertEqual(
+            shell_quote(args),
+            """ffmpeg -i 'ñ€ß'"'"'.mp4'""" if compat_os_name != 'nt' else '''ffmpeg -i "ñ€ß'.mp4"''')
+
+    def test_float_or_none(self):
+        self.assertEqual(float_or_none('42.42'), 42.42)
+        self.assertEqual(float_or_none('42'), 42.0)
+        self.assertEqual(float_or_none(''), None)
+        self.assertEqual(float_or_none(None), None)
+        self.assertEqual(float_or_none([]), None)
+        self.assertEqual(float_or_none(set()), None)
+
+    def test_int_or_none(self):
+        self.assertEqual(int_or_none('42'), 42)
+        self.assertEqual(int_or_none(''), None)
+        self.assertEqual(int_or_none(None), None)
+        self.assertEqual(int_or_none([]), None)
+        self.assertEqual(int_or_none(set()), None)
  
      def test_str_to_int(self):
          self.assertEqual(str_to_int('123,456'), 123456)
          self.assertEqual(str_to_int('123.456'), 123456)
+        self.assertEqual(str_to_int(523), 523)
+        # Python 3 has no long
+        if sys.version_info < (3, 0):
+            eval('self.assertEqual(str_to_int(123456L), 123456)')
+        self.assertEqual(str_to_int('noninteger'), None)
+        self.assertEqual(str_to_int([]), None)
  
      def test_url_basename(self):
          self.assertEqual(url_basename('http://foo.de/'), '')
@@ -448,16 +563,40 @@ def test_base_url(self):
  
      def test_urljoin(self):
          self.assertEqual(urljoin('http://foo.de/', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
+        self.assertEqual(urljoin(b'http://foo.de/', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
+        self.assertEqual(urljoin('http://foo.de/', b'/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
+        self.assertEqual(urljoin(b'http://foo.de/', b'/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
+        self.assertEqual(urljoin('//foo.de/', '/a/b/c.txt'), '//foo.de/a/b/c.txt')
          self.assertEqual(urljoin('http://foo.de/', 'a/b/c.txt'), 'http://foo.de/a/b/c.txt')
          self.assertEqual(urljoin('http://foo.de', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
          self.assertEqual(urljoin('http://foo.de', 'a/b/c.txt'), 'http://foo.de/a/b/c.txt')
          self.assertEqual(urljoin('http://foo.de/', 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
+        self.assertEqual(urljoin('http://foo.de/', '//foo.de/a/b/c.txt'), '//foo.de/a/b/c.txt')
          self.assertEqual(urljoin(None, 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
+        self.assertEqual(urljoin(None, '//foo.de/a/b/c.txt'), '//foo.de/a/b/c.txt')
          self.assertEqual(urljoin('', 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
          self.assertEqual(urljoin(['foobar'], 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
          self.assertEqual(urljoin('http://foo.de/', None), None)
          self.assertEqual(urljoin('http://foo.de/', ''), None)
          self.assertEqual(urljoin('http://foo.de/', ['foobar']), None)
+        self.assertEqual(urljoin('http://foo.de/a/b/c.txt', '.././../d.txt'), 'http://foo.de/d.txt')
+        self.assertEqual(urljoin('http://foo.de/a/b/c.txt', 'rtmp://foo.de'), 'rtmp://foo.de')
+        self.assertEqual(urljoin(None, 'rtmp://foo.de'), 'rtmp://foo.de')
+
+    def test_url_or_none(self):
+        self.assertEqual(url_or_none(None), None)
+        self.assertEqual(url_or_none(''), None)
+        self.assertEqual(url_or_none('foo'), None)
+        self.assertEqual(url_or_none('http://foo.de'), 'http://foo.de')
+        self.assertEqual(url_or_none('https://foo.de'), 'https://foo.de')
+        self.assertEqual(url_or_none('http$://foo.de'), None)
+        self.assertEqual(url_or_none('http://foo.de'), 'http://foo.de')
+        self.assertEqual(url_or_none('//foo.de'), '//foo.de')
+        self.assertEqual(url_or_none('s3://foo.de'), None)
+        self.assertEqual(url_or_none('rtmpte://foo.de'), 'rtmpte://foo.de')
+        self.assertEqual(url_or_none('mms://foo.de'), 'mms://foo.de')
+        self.assertEqual(url_or_none('rtspu://foo.de'), 'rtspu://foo.de')
+        self.assertEqual(url_or_none('ftps://foo.de'), 'ftps://foo.de')
  
      def test_parse_age_limit(self):
          self.assertEqual(parse_age_limit(None), None)
@@ -472,6 +611,8 @@ def test_parse_age_limit(self):
          self.assertEqual(parse_age_limit('PG-13'), 13)
          self.assertEqual(parse_age_limit('TV-14'), 14)
          self.assertEqual(parse_age_limit('TV-MA'), 17)
+        self.assertEqual(parse_age_limit('TV14'), 14)
+        self.assertEqual(parse_age_limit('TV_G'), 0)
  
      def test_parse_duration(self):
          self.assertEqual(parse_duration(None), None)
@@ -487,6 +628,8 @@ def test_parse_duration(self):
          self.assertEqual(parse_duration('3h 11m 53s'), 11513)
          self.assertEqual(parse_duration('3 hours 11 minutes 53 seconds'), 11513)
          self.assertEqual(parse_duration('3 hours 11 mins 53 secs'), 11513)
+        self.assertEqual(parse_duration('3 hours, 11 minutes, 53 seconds'), 11513)
+        self.assertEqual(parse_duration('3 hours, 11 mins, 53 secs'), 11513)
          self.assertEqual(parse_duration('62m45s'), 3765)
          self.assertEqual(parse_duration('6m59s'), 419)
          self.assertEqual(parse_duration('49s'), 49)
@@ -503,6 +646,10 @@ def test_parse_duration(self):
          self.assertEqual(parse_duration('1 hour 3 minutes'), 3780)
          self.assertEqual(parse_duration('87 Min.'), 5220)
          self.assertEqual(parse_duration('PT1H0.040S'), 3600.04)
+        self.assertEqual(parse_duration('PT00H03M30SZ'), 210)
+        self.assertEqual(parse_duration('P0Y0M0DT0H4M20.880S'), 260.88)
+        self.assertEqual(parse_duration('01:02:03:050'), 3723.05)
+        self.assertEqual(parse_duration('103:050'), 103.05)
  
      def test_fix_xml_ampersands(self):
          self.assertEqual(
@@ -556,38 +703,46 @@ def test_urlencode_postdata(self):
          self.assertTrue(isinstance(data, bytes))
  
      def test_update_url_query(self):
-        def query_dict(url):
-            return compat_parse_qs(compat_urlparse.urlparse(url).query)
-        self.assertEqual(query_dict(update_url_query(
+        self.assertEqual(parse_qs(update_url_query(
              'http://example.com/path', {'quality': ['HD'], 'format': ['mp4']})),
-            query_dict('http://example.com/path?quality=HD&format=mp4'))
-        self.assertEqual(query_dict(update_url_query(
+            parse_qs('http://example.com/path?quality=HD&format=mp4'))
+        self.assertEqual(parse_qs(update_url_query(
              'http://example.com/path', {'system': ['LINUX', 'WINDOWS']})),
-            query_dict('http://example.com/path?system=LINUX&system=WINDOWS'))
-        self.assertEqual(query_dict(update_url_query(
+            parse_qs('http://example.com/path?system=LINUX&system=WINDOWS'))
+        self.assertEqual(parse_qs(update_url_query(
              'http://example.com/path', {'fields': 'id,formats,subtitles'})),
-            query_dict('http://example.com/path?fields=id,formats,subtitles'))
-        self.assertEqual(query_dict(update_url_query(
+            parse_qs('http://example.com/path?fields=id,formats,subtitles'))
+        self.assertEqual(parse_qs(update_url_query(
              'http://example.com/path', {'fields': ('id,formats,subtitles', 'thumbnails')})),
-            query_dict('http://example.com/path?fields=id,formats,subtitles&fields=thumbnails'))
-        self.assertEqual(query_dict(update_url_query(
+            parse_qs('http://example.com/path?fields=id,formats,subtitles&fields=thumbnails'))
+        self.assertEqual(parse_qs(update_url_query(
              'http://example.com/path?manifest=f4m', {'manifest': []})),
-            query_dict('http://example.com/path'))
-        self.assertEqual(query_dict(update_url_query(
+            parse_qs('http://example.com/path'))
+        self.assertEqual(parse_qs(update_url_query(
              'http://example.com/path?system=LINUX&system=WINDOWS', {'system': 'LINUX'})),
-            query_dict('http://example.com/path?system=LINUX'))
-        self.assertEqual(query_dict(update_url_query(
+            parse_qs('http://example.com/path?system=LINUX'))
+        self.assertEqual(parse_qs(update_url_query(
              'http://example.com/path', {'fields': b'id,formats,subtitles'})),
-            query_dict('http://example.com/path?fields=id,formats,subtitles'))
-        self.assertEqual(query_dict(update_url_query(
+            parse_qs('http://example.com/path?fields=id,formats,subtitles'))
+        self.assertEqual(parse_qs(update_url_query(
              'http://example.com/path', {'width': 1080, 'height': 720})),
-            query_dict('http://example.com/path?width=1080&height=720'))
-        self.assertEqual(query_dict(update_url_query(
+            parse_qs('http://example.com/path?width=1080&height=720'))
+        self.assertEqual(parse_qs(update_url_query(
              'http://example.com/path', {'bitrate': 5020.43})),
-            query_dict('http://example.com/path?bitrate=5020.43'))
-        self.assertEqual(query_dict(update_url_query(
+            parse_qs('http://example.com/path?bitrate=5020.43'))
+        self.assertEqual(parse_qs(update_url_query(
              'http://example.com/path', {'test': '第二行тест'})),
-            query_dict('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82'))
+            parse_qs('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82'))
+
+    def test_multipart_encode(self):
+        self.assertEqual(
+            multipart_encode({b'field': b'value'}, boundary='AAAAAA')[0],
+            b'--AAAAAA\r\nContent-Disposition: form-data; name="field"\r\n\r\nvalue\r\n--AAAAAA--\r\n')
+        self.assertEqual(
+            multipart_encode({'欄位'.encode('utf-8'): '值'.encode('utf-8')}, boundary='AAAAAA')[0],
+            b'--AAAAAA\r\nContent-Disposition: form-data; name="\xe6\xac\x84\xe4\xbd\x8d"\r\n\r\n\xe5\x80\xbc\r\n--AAAAAA--\r\n')
+        self.assertRaises(
+            ValueError, multipart_encode, {b'field': b'value'}, boundary='value')
  
      def test_dict_get(self):
          FALSE_VALUES = {
@@ -611,6 +766,17 @@ def test_dict_get(self):
              self.assertEqual(dict_get(d, ('b', 'c', key, )), None)
              self.assertEqual(dict_get(d, ('b', 'c', key, ), skip_false_values=False), false_value)
  
+    def test_merge_dicts(self):
+        self.assertEqual(merge_dicts({'a': 1}, {'b': 2}), {'a': 1, 'b': 2})
+        self.assertEqual(merge_dicts({'a': 1}, {'a': 2}), {'a': 1})
+        self.assertEqual(merge_dicts({'a': 1}, {'a': None}), {'a': 1})
+        self.assertEqual(merge_dicts({'a': 1}, {'a': ''}), {'a': 1})
+        self.assertEqual(merge_dicts({'a': 1}, {}), {'a': 1})
+        self.assertEqual(merge_dicts({'a': None}, {'a': 1}), {'a': 1})
+        self.assertEqual(merge_dicts({'a': ''}, {'a': 1}), {'a': ''})
+        self.assertEqual(merge_dicts({'a': ''}, {'a': 'abc'}), {'a': 'abc'})
+        self.assertEqual(merge_dicts({'a': None}, {'a': ''}, {'a': 'abc'}), {'a': 'abc'})
+
      def test_encode_compat_str(self):
          self.assertEqual(encode_compat_str(b'\xd1\x82\xd0\xb5\xd1\x81\xd1\x82', 'utf-8'), 'тест')
          self.assertEqual(encode_compat_str('тест', 'utf-8'), 'тест')
@@ -636,6 +802,30 @@ def test_strip_jsonp(self):
          d = json.loads(stripped)
          self.assertEqual(d, {'status': 'success'})
  
+        stripped = strip_jsonp('window.cb && window.cb({"status": "success"});')
+        d = json.loads(stripped)
+        self.assertEqual(d, {'status': 'success'})
+
+        stripped = strip_jsonp('window.cb && cb({"status": "success"});')
+        d = json.loads(stripped)
+        self.assertEqual(d, {'status': 'success'})
+
+        stripped = strip_jsonp('({"status": "success"});')
+        d = json.loads(stripped)
+        self.assertEqual(d, {'status': 'success'})
+
+    def test_strip_or_none(self):
+        self.assertEqual(strip_or_none(' abc'), 'abc')
+        self.assertEqual(strip_or_none('abc '), 'abc')
+        self.assertEqual(strip_or_none(' abc '), 'abc')
+        self.assertEqual(strip_or_none('\tabc\t'), 'abc')
+        self.assertEqual(strip_or_none('\n\tabc\n\t'), 'abc')
+        self.assertEqual(strip_or_none('abc'), 'abc')
+        self.assertEqual(strip_or_none(''), '')
+        self.assertEqual(strip_or_none(None), None)
+        self.assertEqual(strip_or_none(42), None)
+        self.assertEqual(strip_or_none([]), None)
+
      def test_uppercase_escape(self):
          self.assertEqual(uppercase_escape('aä'), 'aä')
          self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')
@@ -658,6 +848,8 @@ def test_mimetype2ext(self):
          self.assertEqual(mimetype2ext('text/vtt'), 'vtt')
          self.assertEqual(mimetype2ext('text/vtt;charset=utf-8'), 'vtt')
          self.assertEqual(mimetype2ext('text/html; charset=utf-8'), 'html')
+        self.assertEqual(mimetype2ext('audio/x-wav'), 'wav')
+        self.assertEqual(mimetype2ext('audio/x-wav;codec=pcm'), 'wav')
  
      def test_month_by_name(self):
          self.assertEqual(month_by_name(None), None)
@@ -672,23 +864,58 @@ def test_parse_codecs(self):
          self.assertEqual(parse_codecs('avc1.77.30, mp4a.40.2'), {
              'vcodec': 'avc1.77.30',
              'acodec': 'mp4a.40.2',
+            'dynamic_range': None,
          })
          self.assertEqual(parse_codecs('mp4a.40.2'), {
              'vcodec': 'none',
              'acodec': 'mp4a.40.2',
+            'dynamic_range': None,
          })
          self.assertEqual(parse_codecs('mp4a.40.5,avc1.42001e'), {
              'vcodec': 'avc1.42001e',
              'acodec': 'mp4a.40.5',
+            'dynamic_range': None,
          })
          self.assertEqual(parse_codecs('avc3.640028'), {
              'vcodec': 'avc3.640028',
              'acodec': 'none',
+            'dynamic_range': None,
          })
          self.assertEqual(parse_codecs(', h264,,newcodec,aac'), {
              'vcodec': 'h264',
              'acodec': 'aac',
+            'dynamic_range': None,
          })
+        self.assertEqual(parse_codecs('av01.0.05M.08'), {
+            'vcodec': 'av01.0.05M.08',
+            'acodec': 'none',
+            'dynamic_range': None,
+        })
+        self.assertEqual(parse_codecs('vp9.2'), {
+            'vcodec': 'vp9.2',
+            'acodec': 'none',
+            'dynamic_range': 'HDR10',
+        })
+        self.assertEqual(parse_codecs('av01.0.12M.10.0.110.09.16.09.0'), {
+            'vcodec': 'av01.0.12M.10',
+            'acodec': 'none',
+            'dynamic_range': 'HDR10',
+        })
+        self.assertEqual(parse_codecs('dvhe'), {
+            'vcodec': 'dvhe',
+            'acodec': 'none',
+            'dynamic_range': 'DV',
+        })
+        self.assertEqual(parse_codecs('theora, vorbis'), {
+            'vcodec': 'theora',
+            'acodec': 'vorbis',
+            'dynamic_range': None,
+        })
+        self.assertEqual(parse_codecs('unknownvcodec, unknownacodec'), {
+            'vcodec': 'unknownvcodec',
+            'acodec': 'unknownacodec',
+        })
+        self.assertEqual(parse_codecs('unknown'), {})
  
      def test_escape_rfc3986(self):
          reserved = "!*'();:@&=+$,/?#[]"
@@ -757,6 +984,9 @@ def test_js_to_json_realworld(self):
          inp = '''{"duration": "00:01:07"}'''
          self.assertEqual(js_to_json(inp), '''{"duration": "00:01:07"}''')
  
+        inp = '''{segments: [{"offset":-3.885780586188048e-16,"duration":39.75000000000001}]}'''
+        self.assertEqual(js_to_json(inp), '''{"segments": [{"offset":-3.885780586188048e-16,"duration":39.75000000000001}]}''')
+
      def test_js_to_json_edgecases(self):
          on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}")
          self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"})
@@ -774,15 +1004,52 @@ def test_js_to_json_edgecases(self):
          self.assertEqual(d['x'], 1)
          self.assertEqual(d['y'], 'a')
  
+        # Just drop ! prefix for now though this results in a wrong value
+        on = js_to_json('''{
+            a: !0,
+            b: !1,
+            c: !!0,
+            d: !!42.42,
+            e: !!![],
+            f: !"abc",
+            g: !"",
+            !42: 42
+        }''')
+        self.assertEqual(json.loads(on), {
+            'a': 0,
+            'b': 1,
+            'c': 0,
+            'd': 42.42,
+            'e': [],
+            'f': "abc",
+            'g': "",
+            '42': 42
+        })
+
          on = js_to_json('["abc", "def",]')
          self.assertEqual(json.loads(on), ['abc', 'def'])
  
+        on = js_to_json('[/*comment\n*/"abc"/*comment\n*/,/*comment\n*/"def",/*comment\n*/]')
+        self.assertEqual(json.loads(on), ['abc', 'def'])
+
+        on = js_to_json('[//comment\n"abc" //comment\n,//comment\n"def",//comment\n]')
+        self.assertEqual(json.loads(on), ['abc', 'def'])
+
          on = js_to_json('{"abc": "def",}')
          self.assertEqual(json.loads(on), {'abc': 'def'})
  
+        on = js_to_json('{/*comment\n*/"abc"/*comment\n*/:/*comment\n*/"def"/*comment\n*/,/*comment\n*/}')
+        self.assertEqual(json.loads(on), {'abc': 'def'})
+
          on = js_to_json('{ 0: /* " \n */ ",]" , }')
          self.assertEqual(json.loads(on), {'0': ',]'})
  
+        on = js_to_json('{ /*comment\n*/0/*comment\n*/: /* " \n */ ",]" , }')
+        self.assertEqual(json.loads(on), {'0': ',]'})
+
+        on = js_to_json('{ 0: // comment\n1 }')
+        self.assertEqual(json.loads(on), {'0': 1})
+
          on = js_to_json(r'["<p>x<\/p>"]')
          self.assertEqual(json.loads(on), ['<p>x</p>'])
  
@@ -792,15 +1059,43 @@ def test_js_to_json_edgecases(self):
          on = js_to_json("['a\\\nb']")
          self.assertEqual(json.loads(on), ['ab'])
  
+        on = js_to_json("/*comment\n*/[/*comment\n*/'a\\\nb'/*comment\n*/]/*comment\n*/")
+        self.assertEqual(json.loads(on), ['ab'])
+
          on = js_to_json('{0xff:0xff}')
          self.assertEqual(json.loads(on), {'255': 255})
  
+        on = js_to_json('{/*comment\n*/0xff/*comment\n*/:/*comment\n*/0xff/*comment\n*/}')
+        self.assertEqual(json.loads(on), {'255': 255})
+
          on = js_to_json('{077:077}')
          self.assertEqual(json.loads(on), {'63': 63})
  
+        on = js_to_json('{/*comment\n*/077/*comment\n*/:/*comment\n*/077/*comment\n*/}')
+        self.assertEqual(json.loads(on), {'63': 63})
+
          on = js_to_json('{42:42}')
          self.assertEqual(json.loads(on), {'42': 42})
  
+        on = js_to_json('{/*comment\n*/42/*comment\n*/:/*comment\n*/42/*comment\n*/}')
+        self.assertEqual(json.loads(on), {'42': 42})
+
+        on = js_to_json('{42:4.2e1}')
+        self.assertEqual(json.loads(on), {'42': 42.0})
+
+        on = js_to_json('{ "0x40": "0x40" }')
+        self.assertEqual(json.loads(on), {'0x40': '0x40'})
+
+        on = js_to_json('{ "040": "040" }')
+        self.assertEqual(json.loads(on), {'040': '040'})
+
+        on = js_to_json('[1,//{},\n2]')
+        self.assertEqual(json.loads(on), [1, 2])
+
+    def test_js_to_json_malformed(self):
+        self.assertEqual(js_to_json('42a1'), '42"a1"')
+        self.assertEqual(js_to_json('42a-1'), '42"a"-1')
+
      def test_extract_attributes(self):
          self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
          self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'})
@@ -838,10 +1133,13 @@ def test_extract_attributes(self):
              supports_outside_bmp = False
          if supports_outside_bmp:
              self.assertEqual(extract_attributes('<e x="Smile &#128512;!">'), {'x': 'Smile \U0001f600!'})
+        # Malformed HTML should not break attributes extraction on older Python
+        self.assertEqual(extract_attributes('<mal"formed/>'), {})
  
      def test_clean_html(self):
          self.assertEqual(clean_html('a:\nb'), 'a: b')
-        self.assertEqual(clean_html('a:\n   "b"'), 'a:    "b"')
+        self.assertEqual(clean_html('a:\n   "b"'), 'a: "b"')
+        self.assertEqual(clean_html('a<br>\xa0b'), 'a\nb')
  
      def test_intlist_to_bytes(self):
          self.assertEqual(
@@ -851,7 +1149,7 @@ def test_intlist_to_bytes(self):
      def test_args_to_str(self):
          self.assertEqual(
              args_to_str(['foo', 'ba/r', '-baz', '2 be', '']),
-            'foo ba/r -baz \'2 be\' \'\''
+            'foo ba/r -baz \'2 be\' \'\'' if compat_os_name != 'nt' else 'foo ba/r -baz "2 be" ""'
          )
  
      def test_parse_filesize(self):
@@ -874,9 +1172,36 @@ def test_parse_count(self):
          self.assertEqual(parse_count('1000'), 1000)
          self.assertEqual(parse_count('1.000'), 1000)
          self.assertEqual(parse_count('1.1k'), 1100)
+        self.assertEqual(parse_count('1.1 k'), 1100)
+        self.assertEqual(parse_count('1,1 k'), 1100)
          self.assertEqual(parse_count('1.1kk'), 1100000)
          self.assertEqual(parse_count('1.1kk '), 1100000)
+        self.assertEqual(parse_count('1,1kk'), 1100000)
+        self.assertEqual(parse_count('100 views'), 100)
+        self.assertEqual(parse_count('1,100 views'), 1100)
          self.assertEqual(parse_count('1.1kk views'), 1100000)
+        self.assertEqual(parse_count('10M views'), 10000000)
+        self.assertEqual(parse_count('has 10M views'), 10000000)
+
+    def test_parse_resolution(self):
+        self.assertEqual(parse_resolution(None), {})
+        self.assertEqual(parse_resolution(''), {})
+        self.assertEqual(parse_resolution(' 1920x1080'), {'width': 1920, 'height': 1080})
+        self.assertEqual(parse_resolution('1920×1080 '), {'width': 1920, 'height': 1080})
+        self.assertEqual(parse_resolution('1920 x 1080'), {'width': 1920, 'height': 1080})
+        self.assertEqual(parse_resolution('720p'), {'height': 720})
+        self.assertEqual(parse_resolution('4k'), {'height': 2160})
+        self.assertEqual(parse_resolution('8K'), {'height': 4320})
+        self.assertEqual(parse_resolution('pre_1920x1080_post'), {'width': 1920, 'height': 1080})
+        self.assertEqual(parse_resolution('ep1x2'), {})
+        self.assertEqual(parse_resolution('1920, 1080'), {'width': 1920, 'height': 1080})
+
+    def test_parse_bitrate(self):
+        self.assertEqual(parse_bitrate(None), None)
+        self.assertEqual(parse_bitrate(''), None)
+        self.assertEqual(parse_bitrate('300kbps'), 300)
+        self.assertEqual(parse_bitrate('1500kbps'), 1500)
+        self.assertEqual(parse_bitrate('300 kbps'), 300)
  
      def test_version_tuple(self):
          self.assertEqual(version_tuple('1'), (1,))
@@ -918,21 +1243,72 @@ def test_is_html(self):
              b'\xFF\xFE\x00\x00<\x00\x00\x00h\x00\x00\x00t\x00\x00\x00m\x00\x00\x00l\x00\x00\x00>\x00\x00\x00\xe4\x00\x00\x00'))
  
      def test_render_table(self):
+        self.assertEqual(
+            render_table(
+                ['a', 'empty', 'bcd'],
+                [[123, '', 4], [9999, '', 51]]),
+            'a    empty bcd\n'
+            '123        4\n'
+            '9999       51')
+
+        self.assertEqual(
+            render_table(
+                ['a', 'empty', 'bcd'],
+                [[123, '', 4], [9999, '', 51]],
+                hide_empty=True),
+            'a    bcd\n'
+            '123  4\n'
+            '9999 51')
+
+        self.assertEqual(
+            render_table(
+                ['\ta', 'bcd'],
+                [['1\t23', 4], ['\t9999', 51]]),
+            '   a bcd\n'
+            '1 23 4\n'
+            '9999 51')
+
          self.assertEqual(
              render_table(
                  ['a', 'bcd'],
-                [[123, 4], [9999, 51]]),
+                [[123, 4], [9999, 51]],
+                delim='-'),
              'a    bcd\n'
+            '--------\n'
              '123  4\n'
              '9999 51')
  
+        self.assertEqual(
+            render_table(
+                ['a', 'bcd'],
+                [[123, 4], [9999, 51]],
+                delim='-', extra_gap=2),
+            'a      bcd\n'
+            '----------\n'
+            '123    4\n'
+            '9999   51')
+
      def test_match_str(self):
-        self.assertRaises(ValueError, match_str, 'xy>foobar', {})
+        # Unary
          self.assertFalse(match_str('xy', {'x': 1200}))
          self.assertTrue(match_str('!xy', {'x': 1200}))
          self.assertTrue(match_str('x', {'x': 1200}))
          self.assertFalse(match_str('!x', {'x': 1200}))
          self.assertTrue(match_str('x', {'x': 0}))
+        self.assertTrue(match_str('is_live', {'is_live': True}))
+        self.assertFalse(match_str('is_live', {'is_live': False}))
+        self.assertFalse(match_str('is_live', {'is_live': None}))
+        self.assertFalse(match_str('is_live', {}))
+        self.assertFalse(match_str('!is_live', {'is_live': True}))
+        self.assertTrue(match_str('!is_live', {'is_live': False}))
+        self.assertTrue(match_str('!is_live', {'is_live': None}))
+        self.assertTrue(match_str('!is_live', {}))
+        self.assertTrue(match_str('title', {'title': 'abc'}))
+        self.assertTrue(match_str('title', {'title': ''}))
+        self.assertFalse(match_str('!title', {'title': 'abc'}))
+        self.assertFalse(match_str('!title', {'title': ''}))
+
+        # Numeric
          self.assertFalse(match_str('x>0', {'x': 0}))
          self.assertFalse(match_str('x>0', {}))
          self.assertTrue(match_str('x>?0', {}))
@@ -940,10 +1316,26 @@ def test_match_str(self):
          self.assertFalse(match_str('x>2K', {'x': 1200}))
          self.assertTrue(match_str('x>=1200 & x < 1300', {'x': 1200}))
          self.assertFalse(match_str('x>=1100 & x < 1200', {'x': 1200}))
+        self.assertTrue(match_str('x > 1:0:0', {'x': 3700}))
+
+        # String
          self.assertFalse(match_str('y=a212', {'y': 'foobar42'}))
          self.assertTrue(match_str('y=foobar42', {'y': 'foobar42'}))
          self.assertFalse(match_str('y!=foobar42', {'y': 'foobar42'}))
          self.assertTrue(match_str('y!=foobar2', {'y': 'foobar42'}))
+        self.assertTrue(match_str('y^=foo', {'y': 'foobar42'}))
+        self.assertFalse(match_str('y!^=foo', {'y': 'foobar42'}))
+        self.assertFalse(match_str('y^=bar', {'y': 'foobar42'}))
+        self.assertTrue(match_str('y!^=bar', {'y': 'foobar42'}))
+        self.assertRaises(ValueError, match_str, 'x^=42', {'x': 42})
+        self.assertTrue(match_str('y*=bar', {'y': 'foobar42'}))
+        self.assertFalse(match_str('y!*=bar', {'y': 'foobar42'}))
+        self.assertFalse(match_str('y*=baz', {'y': 'foobar42'}))
+        self.assertTrue(match_str('y!*=baz', {'y': 'foobar42'}))
+        self.assertTrue(match_str('y$=42', {'y': 'foobar42'}))
+        self.assertFalse(match_str('y$=43', {'y': 'foobar42'}))
+
+        # And
          self.assertFalse(match_str(
              'like_count > 100 & dislike_count <? 50 & description',
              {'like_count': 90, 'description': 'foo'}))
@@ -957,6 +1349,35 @@ def test_match_str(self):
              'like_count > 100 & dislike_count <? 50 & description',
              {'like_count': 190, 'dislike_count': 10}))
  
+        # Regex
+        self.assertTrue(match_str(r'x~=\bbar', {'x': 'foo bar'}))
+        self.assertFalse(match_str(r'x~=\bbar.+', {'x': 'foo bar'}))
+        self.assertFalse(match_str(r'x~=^FOO', {'x': 'foo bar'}))
+        self.assertTrue(match_str(r'x~=(?i)^FOO', {'x': 'foo bar'}))
+
+        # Quotes
+        self.assertTrue(match_str(r'x^="foo"', {'x': 'foo "bar"'}))
+        self.assertFalse(match_str(r'x^="foo  "', {'x': 'foo "bar"'}))
+        self.assertFalse(match_str(r'x$="bar"', {'x': 'foo "bar"'}))
+        self.assertTrue(match_str(r'x$=" \"bar\""', {'x': 'foo "bar"'}))
+
+        # Escaping &
+        self.assertFalse(match_str(r'x=foo & bar', {'x': 'foo & bar'}))
+        self.assertTrue(match_str(r'x=foo \& bar', {'x': 'foo & bar'}))
+        self.assertTrue(match_str(r'x=foo \& bar & x^=foo', {'x': 'foo & bar'}))
+        self.assertTrue(match_str(r'x="foo \& bar" & x^=foo', {'x': 'foo & bar'}))
+
+        # Example from docs
+        self.assertTrue(match_str(
+            r"!is_live & like_count>?100 & description~='(?i)\bcats \& dogs\b'",
+            {'description': 'Raining Cats & Dogs'}))
+
+        # Incomplete
+        self.assertFalse(match_str('id!=foo', {'id': 'foo'}, True))
+        self.assertTrue(match_str('x', {'id': 'foo'}, True))
+        self.assertTrue(match_str('!x', {'id': 'foo'}, True))
+        self.assertFalse(match_str('x', {'id': 'foo'}, False))
+
      def test_parse_dfxp_time_expr(self):
          self.assertEqual(parse_dfxp_time_expr(None), None)
          self.assertEqual(parse_dfxp_time_expr(''), None)
@@ -979,7 +1400,7 @@ def test_dfxp2srt(self):
                      <p begin="3" dur="-1">Ignored, three</p>
                  </div>
              </body>
-            </tt>'''
+            </tt>'''.encode('utf-8')
          srt_data = '''1
  00:00:00,000 --> 00:00:01,000
  The following line contains Chinese characters and special symbols
@@ -1004,7 +1425,7 @@ def test_dfxp2srt(self):
                      <p begin="0" end="1">The first line</p>
                  </div>
              </body>
-            </tt>'''
+            </tt>'''.encode('utf-8')
          srt_data = '''1
  00:00:00,000 --> 00:00:01,000
  The first line
@@ -1012,6 +1433,67 @@ def test_dfxp2srt(self):
  '''
          self.assertEqual(dfxp2srt(dfxp_data_no_default_namespace), srt_data)
  
+        dfxp_data_with_style = '''<?xml version="1.0" encoding="utf-8"?>
+<tt xmlns="http://www.w3.org/2006/10/ttaf1" xmlns:ttp="http://www.w3.org/2006/10/ttaf1#parameter" ttp:timeBase="media" xmlns:tts="http://www.w3.org/2006/10/ttaf1#style" xml:lang="en" xmlns:ttm="http://www.w3.org/2006/10/ttaf1#metadata">
+  <head>
+    <styling>
+      <style id="s2" style="s0" tts:color="cyan" tts:fontWeight="bold" />
+      <style id="s1" style="s0" tts:color="yellow" tts:fontStyle="italic" />
+      <style id="s3" style="s0" tts:color="lime" tts:textDecoration="underline" />
+      <style id="s0" tts:backgroundColor="black" tts:fontStyle="normal" tts:fontSize="16" tts:fontFamily="sansSerif" tts:color="white" />
+    </styling>
+  </head>
+  <body tts:textAlign="center" style="s0">
+    <div>
+      <p begin="00:00:02.08" id="p0" end="00:00:05.84">default style<span tts:color="red">custom style</span></p>
+      <p style="s2" begin="00:00:02.08" id="p0" end="00:00:05.84"><span tts:color="lime">part 1<br /></span><span tts:color="cyan">part 2</span></p>
+      <p style="s3" begin="00:00:05.84" id="p1" end="00:00:09.56">line 3<br />part 3</p>
+      <p style="s1" tts:textDecoration="underline" begin="00:00:09.56" id="p2" end="00:00:12.36"><span style="s2" tts:color="lime">inner<br /> </span>style</p>
+    </div>
+  </body>
+</tt>'''.encode('utf-8')
+        srt_data = '''1
+00:00:02,080 --> 00:00:05,840
+<font color="white" face="sansSerif" size="16">default style<font color="red">custom style</font></font>
+
+2
+00:00:02,080 --> 00:00:05,840
+<b><font color="cyan" face="sansSerif" size="16"><font color="lime">part 1
+</font>part 2</font></b>
+
+3
+00:00:05,840 --> 00:00:09,560
+<u><font color="lime">line 3
+part 3</font></u>
+
+4
+00:00:09,560 --> 00:00:12,360
+<i><u><font color="yellow"><font color="lime">inner
+ </font>style</font></u></i>
+
+'''
+        self.assertEqual(dfxp2srt(dfxp_data_with_style), srt_data)
+
+        dfxp_data_non_utf8 = '''<?xml version="1.0" encoding="UTF-16"?>
+            <tt xmlns="http://www.w3.org/ns/ttml" xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter">
+            <body>
+                <div xml:lang="en">
+                    <p begin="0" end="1">Line 1</p>
+                    <p begin="1" end="2">第二行</p>
+                </div>
+            </body>
+            </tt>'''.encode('utf-16')
+        srt_data = '''1
+00:00:00,000 --> 00:00:01,000
+Line 1
+
+2
+00:00:01,000 --> 00:00:02,000
+第二行
+
+'''
+        self.assertEqual(dfxp2srt(dfxp_data_non_utf8), srt_data)
+
      def test_cli_option(self):
          self.assertEqual(cli_option({'proxy': '127.0.0.1:3128'}, '--proxy', 'proxy'), ['--proxy', '127.0.0.1:3128'])
          self.assertEqual(cli_option({'proxy': None}, '--proxy', 'proxy'), [])
@@ -1057,6 +1539,10 @@ def test_cli_bool_option(self):
              cli_bool_option(
                  {'nocheckcertificate': False}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='),
              ['--check-certificate=true'])
+        self.assertEqual(
+            cli_bool_option(
+                {}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='),
+            [])
  
      def test_ohdave_rsa_encrypt(self):
          N = 0xab86b6371b5318aaa1d3c9e612a9f1264f372323c8c0f19875b5fc3b3fd3afcc1e5bec527aa94bfa85bffc157e4245aebda05389a5357b75115ac94f074aefcd
@@ -1066,6 +1552,14 @@ def test_ohdave_rsa_encrypt(self):
              ohdave_rsa_encrypt(b'aa111222', e, N),
              '726664bd9a23fd0c70f9f1b84aab5e3905ce1e45a584e9cbcf9bcc7510338fc1986d6c599ff990d923aa43c51c0d9013cd572e13bc58f4ae48f2ed8c0b0ba881')
  
+    def test_pkcs1pad(self):
+        data = [1, 2, 3]
+        padded_data = pkcs1pad(data, 32)
+        self.assertEqual(padded_data[:2], [0, 2])
+        self.assertEqual(padded_data[28:], [0, 1, 2, 3])
+
+        self.assertRaises(ValueError, pkcs1pad, data, 8)
+
      def test_encode_base_n(self):
          self.assertEqual(encode_base_n(0, 30), '0')
          self.assertEqual(encode_base_n(80, 30), '2k')
@@ -1077,18 +1571,261 @@ def test_encode_base_n(self):
          self.assertRaises(ValueError, encode_base_n, 0, 70)
          self.assertRaises(ValueError, encode_base_n, 0, 60, custom_table)
  
+    def test_caesar(self):
+        self.assertEqual(caesar('ace', 'abcdef', 2), 'cea')
+        self.assertEqual(caesar('cea', 'abcdef', -2), 'ace')
+        self.assertEqual(caesar('ace', 'abcdef', -2), 'eac')
+        self.assertEqual(caesar('eac', 'abcdef', 2), 'ace')
+        self.assertEqual(caesar('ace', 'abcdef', 0), 'ace')
+        self.assertEqual(caesar('xyz', 'abcdef', 2), 'xyz')
+        self.assertEqual(caesar('abc', 'acegik', 2), 'ebg')
+        self.assertEqual(caesar('ebg', 'acegik', -2), 'abc')
+
+    def test_rot47(self):
+        self.assertEqual(rot47('yt-dlp'), r'JE\5=A')
+        self.assertEqual(rot47('YT-DLP'), r'*%\s{!')
+
      def test_urshift(self):
          self.assertEqual(urshift(3, 1), 1)
          self.assertEqual(urshift(-3, 1), 2147483646)
  
+    GET_ELEMENT_BY_CLASS_TEST_STRING = '''
+        <span class="foo bar">nice</span>
+    '''
+
      def test_get_element_by_class(self):
-        html = '''
-            <span class="foo bar">nice</span>
-        '''
+        html = self.GET_ELEMENT_BY_CLASS_TEST_STRING
  
          self.assertEqual(get_element_by_class('foo', html), 'nice')
          self.assertEqual(get_element_by_class('no-such-class', html), None)
  
+    def test_get_element_html_by_class(self):
+        html = self.GET_ELEMENT_BY_CLASS_TEST_STRING
+
+        self.assertEqual(get_element_html_by_class('foo', html), html.strip())
+        self.assertEqual(get_element_by_class('no-such-class', html), None)
+
+    GET_ELEMENT_BY_ATTRIBUTE_TEST_STRING = '''
+        <div itemprop="author" itemscope>foo</div>
+    '''
+
+    def test_get_element_by_attribute(self):
+        html = self.GET_ELEMENT_BY_CLASS_TEST_STRING
+
+        self.assertEqual(get_element_by_attribute('class', 'foo bar', html), 'nice')
+        self.assertEqual(get_element_by_attribute('class', 'foo', html), None)
+        self.assertEqual(get_element_by_attribute('class', 'no-such-foo', html), None)
+
+        html = self.GET_ELEMENT_BY_ATTRIBUTE_TEST_STRING
+
+        self.assertEqual(get_element_by_attribute('itemprop', 'author', html), 'foo')
+
+    def test_get_element_html_by_attribute(self):
+        html = self.GET_ELEMENT_BY_CLASS_TEST_STRING
+
+        self.assertEqual(get_element_html_by_attribute('class', 'foo bar', html), html.strip())
+        self.assertEqual(get_element_html_by_attribute('class', 'foo', html), None)
+        self.assertEqual(get_element_html_by_attribute('class', 'no-such-foo', html), None)
+
+        html = self.GET_ELEMENT_BY_ATTRIBUTE_TEST_STRING
+
+        self.assertEqual(get_element_html_by_attribute('itemprop', 'author', html), html.strip())
+
+    GET_ELEMENTS_BY_CLASS_TEST_STRING = '''
+        <span class="foo bar">nice</span><span class="foo bar">also nice</span>
+    '''
+    GET_ELEMENTS_BY_CLASS_RES = ['<span class="foo bar">nice</span>', '<span class="foo bar">also nice</span>']
+
+    def test_get_elements_by_class(self):
+        html = self.GET_ELEMENTS_BY_CLASS_TEST_STRING
+
+        self.assertEqual(get_elements_by_class('foo', html), ['nice', 'also nice'])
+        self.assertEqual(get_elements_by_class('no-such-class', html), [])
+
+    def test_get_elements_html_by_class(self):
+        html = self.GET_ELEMENTS_BY_CLASS_TEST_STRING
+
+        self.assertEqual(get_elements_html_by_class('foo', html), self.GET_ELEMENTS_BY_CLASS_RES)
+        self.assertEqual(get_elements_html_by_class('no-such-class', html), [])
+
+    def test_get_elements_by_attribute(self):
+        html = self.GET_ELEMENTS_BY_CLASS_TEST_STRING
+
+        self.assertEqual(get_elements_by_attribute('class', 'foo bar', html), ['nice', 'also nice'])
+        self.assertEqual(get_elements_by_attribute('class', 'foo', html), [])
+        self.assertEqual(get_elements_by_attribute('class', 'no-such-foo', html), [])
+
+    def test_get_elements_html_by_attribute(self):
+        html = self.GET_ELEMENTS_BY_CLASS_TEST_STRING
+
+        self.assertEqual(get_elements_html_by_attribute('class', 'foo bar', html), self.GET_ELEMENTS_BY_CLASS_RES)
+        self.assertEqual(get_elements_html_by_attribute('class', 'foo', html), [])
+        self.assertEqual(get_elements_html_by_attribute('class', 'no-such-foo', html), [])
+
+    def test_get_elements_text_and_html_by_attribute(self):
+        html = self.GET_ELEMENTS_BY_CLASS_TEST_STRING
+
+        self.assertEqual(
+            list(get_elements_text_and_html_by_attribute('class', 'foo bar', html)),
+            list(zip(['nice', 'also nice'], self.GET_ELEMENTS_BY_CLASS_RES)))
+        self.assertEqual(list(get_elements_text_and_html_by_attribute('class', 'foo', html)), [])
+        self.assertEqual(list(get_elements_text_and_html_by_attribute('class', 'no-such-foo', html)), [])
+
+    GET_ELEMENT_BY_TAG_TEST_STRING = '''
+    random text lorem ipsum</p>
+    <div>
+        this should be returned
+        <span>this should also be returned</span>
+        <div>
+            this should also be returned
+        </div>
+        closing tag above should not trick, so this should also be returned
+    </div>
+    but this text should not be returned
+    '''
+    GET_ELEMENT_BY_TAG_RES_OUTERDIV_HTML = GET_ELEMENT_BY_TAG_TEST_STRING.strip()[32:276]
+    GET_ELEMENT_BY_TAG_RES_OUTERDIV_TEXT = GET_ELEMENT_BY_TAG_RES_OUTERDIV_HTML[5:-6]
+    GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML = GET_ELEMENT_BY_TAG_TEST_STRING.strip()[78:119]
+    GET_ELEMENT_BY_TAG_RES_INNERSPAN_TEXT = GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML[6:-7]
+
+    def test_get_element_text_and_html_by_tag(self):
+        html = self.GET_ELEMENT_BY_TAG_TEST_STRING
+
+        self.assertEqual(
+            get_element_text_and_html_by_tag('div', html),
+            (self.GET_ELEMENT_BY_TAG_RES_OUTERDIV_TEXT, self.GET_ELEMENT_BY_TAG_RES_OUTERDIV_HTML))
+        self.assertEqual(
+            get_element_text_and_html_by_tag('span', html),
+            (self.GET_ELEMENT_BY_TAG_RES_INNERSPAN_TEXT, self.GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML))
+        self.assertRaises(compat_HTMLParseError, get_element_text_and_html_by_tag, 'article', html)
+
+    def test_iri_to_uri(self):
+        self.assertEqual(
+            iri_to_uri('https://www.google.com/search?q=foo&ie=utf-8&oe=utf-8&client=firefox-b'),
+            'https://www.google.com/search?q=foo&ie=utf-8&oe=utf-8&client=firefox-b')  # Same
+        self.assertEqual(
+            iri_to_uri('https://www.google.com/search?q=Käsesoßenrührlöffel'),  # German for cheese sauce stirring spoon
+            'https://www.google.com/search?q=K%C3%A4seso%C3%9Fenr%C3%BChrl%C3%B6ffel')
+        self.assertEqual(
+            iri_to_uri('https://www.google.com/search?q=lt<+gt>+eq%3D+amp%26+percent%25+hash%23+colon%3A+tilde~#trash=?&garbage=#'),
+            'https://www.google.com/search?q=lt%3C+gt%3E+eq%3D+amp%26+percent%25+hash%23+colon%3A+tilde~#trash=?&garbage=#')
+        self.assertEqual(
+            iri_to_uri('http://правозащита38.рф/category/news/'),
+            'http://xn--38-6kcaak9aj5chl4a3g.xn--p1ai/category/news/')
+        self.assertEqual(
+            iri_to_uri('http://www.правозащита38.рф/category/news/'),
+            'http://www.xn--38-6kcaak9aj5chl4a3g.xn--p1ai/category/news/')
+        self.assertEqual(
+            iri_to_uri('https://i❤.ws/emojidomain/👍👏🤝💪'),
+            'https://xn--i-7iq.ws/emojidomain/%F0%9F%91%8D%F0%9F%91%8F%F0%9F%A4%9D%F0%9F%92%AA')
+        self.assertEqual(
+            iri_to_uri('http://日本語.jp/'),
+            'http://xn--wgv71a119e.jp/')
+        self.assertEqual(
+            iri_to_uri('http://导航.中国/'),
+            'http://xn--fet810g.xn--fiqs8s/')
+
+    def test_clean_podcast_url(self):
+        self.assertEqual(clean_podcast_url('https://www.podtrac.com/pts/redirect.mp3/chtbl.com/track/5899E/traffic.megaphone.fm/HSW7835899191.mp3'), 'https://traffic.megaphone.fm/HSW7835899191.mp3')
+        self.assertEqual(clean_podcast_url('https://play.podtrac.com/npr-344098539/edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3'), 'https://edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3')
+
+    def test_LazyList(self):
+        it = list(range(10))
+
+        self.assertEqual(list(LazyList(it)), it)
+        self.assertEqual(LazyList(it).exhaust(), it)
+        self.assertEqual(LazyList(it)[5], it[5])
+
+        self.assertEqual(LazyList(it)[5:], it[5:])
+        self.assertEqual(LazyList(it)[:5], it[:5])
+        self.assertEqual(LazyList(it)[::2], it[::2])
+        self.assertEqual(LazyList(it)[1::2], it[1::2])
+        self.assertEqual(LazyList(it)[5::-1], it[5::-1])
+        self.assertEqual(LazyList(it)[6:2:-2], it[6:2:-2])
+        self.assertEqual(LazyList(it)[::-1], it[::-1])
+
+        self.assertTrue(LazyList(it))
+        self.assertFalse(LazyList(range(0)))
+        self.assertEqual(len(LazyList(it)), len(it))
+        self.assertEqual(repr(LazyList(it)), repr(it))
+        self.assertEqual(str(LazyList(it)), str(it))
+
+        self.assertEqual(list(LazyList(it, reverse=True)), it[::-1])
+        self.assertEqual(list(reversed(LazyList(it))[::-1]), it)
+        self.assertEqual(list(reversed(LazyList(it))[1:3:7]), it[::-1][1:3:7])
+
+    def test_LazyList_laziness(self):
+
+        def test(ll, idx, val, cache):
+            self.assertEqual(ll[idx], val)
+            self.assertEqual(getattr(ll, '_LazyList__cache'), list(cache))
+
+        ll = LazyList(range(10))
+        test(ll, 0, 0, range(1))
+        test(ll, 5, 5, range(6))
+        test(ll, -3, 7, range(10))
+
+        ll = LazyList(range(10), reverse=True)
+        test(ll, -1, 0, range(1))
+        test(ll, 3, 6, range(10))
+
+        ll = LazyList(itertools.count())
+        test(ll, 10, 10, range(11))
+        ll = reversed(ll)
+        test(ll, -15, 14, range(15))
+
+    def test_format_bytes(self):
+        self.assertEqual(format_bytes(0), '0.00B')
+        self.assertEqual(format_bytes(1000), '1000.00B')
+        self.assertEqual(format_bytes(1024), '1.00KiB')
+        self.assertEqual(format_bytes(1024**2), '1.00MiB')
+        self.assertEqual(format_bytes(1024**3), '1.00GiB')
+        self.assertEqual(format_bytes(1024**4), '1.00TiB')
+        self.assertEqual(format_bytes(1024**5), '1.00PiB')
+        self.assertEqual(format_bytes(1024**6), '1.00EiB')
+        self.assertEqual(format_bytes(1024**7), '1.00ZiB')
+        self.assertEqual(format_bytes(1024**8), '1.00YiB')
+        self.assertEqual(format_bytes(1024**9), '1024.00YiB')
+
+    def test_hide_login_info(self):
+        self.assertEqual(Config.hide_login_info(['-u', 'foo', '-p', 'bar']),
+                         ['-u', 'PRIVATE', '-p', 'PRIVATE'])
+        self.assertEqual(Config.hide_login_info(['-u']), ['-u'])
+        self.assertEqual(Config.hide_login_info(['-u', 'foo', '-u', 'bar']),
+                         ['-u', 'PRIVATE', '-u', 'PRIVATE'])
+        self.assertEqual(Config.hide_login_info(['--username=foo']),
+                         ['--username=PRIVATE'])
+
+    def test_locked_file(self):
+        TEXT = 'test_locked_file\n'
+        FILE = 'test_locked_file.ytdl'
+        MODES = 'war'  # Order is important
+
+        try:
+            for lock_mode in MODES:
+                with locked_file(FILE, lock_mode, False) as f:
+                    if lock_mode == 'r':
+                        self.assertEqual(f.read(), TEXT * 2, 'Wrong file content')
+                    else:
+                        f.write(TEXT)
+                    for test_mode in MODES:
+                        testing_write = test_mode != 'r'
+                        try:
+                            with locked_file(FILE, test_mode, False):
+                                pass
+                        except (BlockingIOError, PermissionError):
+                            if not testing_write:  # FIXME
+                                print(f'Known issue: Exclusive lock ({lock_mode}) blocks read access ({test_mode})')
+                                continue
+                            self.assertTrue(testing_write, f'{test_mode} is blocked by {lock_mode}')
+                        else:
+                            self.assertFalse(testing_write, f'{test_mode} is not blocked by {lock_mode}')
+        finally:
+            try:
+                os.remove(FILE)
+            except Exception:
+                pass
+
  
  if __name__ == '__main__':
      unittest.main()