-#!/usr/bin/env python
-# coding: utf-8
-
-from __future__ import unicode_literals
+#!/usr/bin/env python3
# Allow direct execution
import os
import sys
import unittest
+import warnings
+import datetime as dt
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-# Various small unit tests
+import contextlib
import io
+import itertools
import json
+import subprocess
import xml.etree.ElementTree
-from youtube_dlc.utils import (
+from yt_dlp.compat import (
+ compat_etree_fromstring,
+ compat_HTMLParseError,
+ compat_os_name,
+)
+from yt_dlp.utils import (
+ Config,
+ DateRange,
+ ExtractorError,
+ InAdvancePagedList,
+ LazyList,
+ NO_DEFAULT,
+ OnDemandPagedList,
+ Popen,
age_restricted,
args_to_str,
- encode_base_n,
+ base_url,
caesar,
clean_html,
+ clean_podcast_url,
+ cli_bool_option,
+ cli_option,
+ cli_valueless_option,
date_from_str,
- DateRange,
+ datetime_from_str,
detect_exe_version,
determine_ext,
- dict_get,
+ determine_file_encoding,
+ dfxp2srt,
+ encode_base_n,
encode_compat_str,
encodeFilename,
- escape_rfc3986,
- escape_url,
+ expand_path,
extract_attributes,
- ExtractorError,
+ extract_basic_auth,
find_xpath_attr,
fix_xml_ampersands,
float_or_none,
- get_element_by_class,
+ format_bytes,
+ get_compatible_ext,
get_element_by_attribute,
- get_elements_by_class,
+ get_element_by_class,
+ get_element_html_by_attribute,
+ get_element_html_by_class,
+ get_element_text_and_html_by_tag,
get_elements_by_attribute,
- InAdvancePagedList,
+ get_elements_by_class,
+ get_elements_html_by_attribute,
+ get_elements_html_by_class,
+ get_elements_text_and_html_by_attribute,
int_or_none,
intlist_to_bytes,
+ iri_to_uri,
is_html,
js_to_json,
limit_length,
+ locked_file,
+ lowercase_escape,
+ match_str,
merge_dicts,
mimetype2ext,
month_by_name,
multipart_encode,
ohdave_rsa_encrypt,
- OnDemandPagedList,
orderedSet,
parse_age_limit,
+ parse_bitrate,
+ parse_codecs,
+ parse_count,
+ parse_dfxp_time_expr,
parse_duration,
parse_filesize,
- parse_count,
parse_iso8601,
+ parse_qs,
parse_resolution,
- parse_bitrate,
pkcs1pad,
- read_batch_urls,
- sanitize_filename,
- sanitize_path,
- sanitize_url,
- expand_path,
prepend_extension,
- replace_extension,
- remove_start,
+ read_batch_urls,
remove_end,
remove_quotes,
+ remove_start,
+ render_table,
+ replace_extension,
rot47,
+ sanitize_filename,
+ sanitize_path,
+ sanitize_url,
shell_quote,
smuggle_url,
str_to_int,
strip_or_none,
subtitles_filename,
timeconvert,
+ try_call,
unescapeHTML,
unified_strdate,
unified_timestamp,
unsmuggle_url,
+ update_url_query,
uppercase_escape,
- lowercase_escape,
url_basename,
url_or_none,
- base_url,
- urljoin,
urlencode_postdata,
+ urljoin,
urshift,
- update_url_query,
+ variadic,
version_tuple,
- xpath_with_ns,
+ xpath_attr,
xpath_element,
xpath_text,
- xpath_attr,
- render_table,
- match_str,
- parse_dfxp_time_expr,
- dfxp2srt,
- cli_option,
- cli_valueless_option,
- cli_bool_option,
- parse_codecs,
+ xpath_with_ns,
)
-from youtube_dlc.compat import (
- compat_chr,
- compat_etree_fromstring,
- compat_getenv,
- compat_os_name,
- compat_setenv,
- compat_urlparse,
- compat_parse_qs,
+from yt_dlp.utils.networking import (
+ HTTPHeaderDict,
+ escape_rfc3986,
+ normalize_url,
+ remove_dot_segments,
)
self.assertTrue(timeconvert('bougrg') is None)
def test_sanitize_filename(self):
+ self.assertEqual(sanitize_filename(''), '')
self.assertEqual(sanitize_filename('abc'), 'abc')
self.assertEqual(sanitize_filename('abc_d-e'), 'abc_d-e')
self.assertEqual(sanitize_filename('123'), '123')
- self.assertEqual('abc_de', sanitize_filename('abc/de'))
+ self.assertEqual('abc⧸de', sanitize_filename('abc/de'))
self.assertFalse('/' in sanitize_filename('abc/de///'))
- self.assertEqual('abc_de', sanitize_filename('abc/<>\\*|de'))
- self.assertEqual('xxx', sanitize_filename('xxx/<>\\*|'))
- self.assertEqual('yes no', sanitize_filename('yes? no'))
- self.assertEqual('this - that', sanitize_filename('this: that'))
+ self.assertEqual('abc_de', sanitize_filename('abc/<>\\*|de', is_id=False))
+ self.assertEqual('xxx', sanitize_filename('xxx/<>\\*|', is_id=False))
+ self.assertEqual('yes no', sanitize_filename('yes? no', is_id=False))
+ self.assertEqual('this - that', sanitize_filename('this: that', is_id=False))
self.assertEqual(sanitize_filename('AT&T'), 'AT&T')
aumlaut = 'ä'
sanitize_filename('New World record at 0:12:34'),
'New World record at 0_12_34')
- self.assertEqual(sanitize_filename('--gasdgf'), '_-gasdgf')
+ self.assertEqual(sanitize_filename('--gasdgf'), '--gasdgf')
self.assertEqual(sanitize_filename('--gasdgf', is_id=True), '--gasdgf')
- self.assertEqual(sanitize_filename('.gasdgf'), 'gasdgf')
+ self.assertEqual(sanitize_filename('--gasdgf', is_id=False), '_-gasdgf')
+ self.assertEqual(sanitize_filename('.gasdgf'), '.gasdgf')
self.assertEqual(sanitize_filename('.gasdgf', is_id=True), '.gasdgf')
+ self.assertEqual(sanitize_filename('.gasdgf', is_id=False), 'gasdgf')
forbidden = '"\0\\/'
for fc in forbidden:
self.assertEqual(sanitize_url('httpss://foo.bar'), 'https://foo.bar')
self.assertEqual(sanitize_url('rmtps://foo.bar'), 'rtmps://foo.bar')
self.assertEqual(sanitize_url('https://foo.bar'), 'https://foo.bar')
+ self.assertEqual(sanitize_url('foo bar'), 'foo bar')
def test_expand_path(self):
def env(var):
- return '%{0}%'.format(var) if sys.platform == 'win32' else '${0}'.format(var)
+ return f'%{var}%' if sys.platform == 'win32' else f'${var}'
- compat_setenv('youtube_dlc_EXPATH_PATH', 'expanded')
- self.assertEqual(expand_path(env('youtube_dlc_EXPATH_PATH')), 'expanded')
- self.assertEqual(expand_path(env('HOME')), compat_getenv('HOME'))
- self.assertEqual(expand_path('~'), compat_getenv('HOME'))
- self.assertEqual(
- expand_path('~/%s' % env('youtube_dlc_EXPATH_PATH')),
- '%s/expanded' % compat_getenv('HOME'))
+ os.environ['yt_dlp_EXPATH_PATH'] = 'expanded'
+ self.assertEqual(expand_path(env('yt_dlp_EXPATH_PATH')), 'expanded')
+
+ old_home = os.environ.get('HOME')
+ test_str = R'C:\Documents and Settings\тест\Application Data'
+ try:
+ os.environ['HOME'] = test_str
+ self.assertEqual(expand_path(env('HOME')), os.getenv('HOME'))
+ self.assertEqual(expand_path('~'), os.getenv('HOME'))
+ self.assertEqual(
+ expand_path('~/%s' % env('yt_dlp_EXPATH_PATH')),
+ '%s/expanded' % os.getenv('HOME'))
+ finally:
+ os.environ['HOME'] = old_home or ''
def test_prepend_extension(self):
self.assertEqual(prepend_extension('abc.ext', 'temp'), 'abc.temp.ext')
self.assertEqual(date_from_str('yesterday'), date_from_str('now-1day'))
self.assertEqual(date_from_str('now+7day'), date_from_str('now+1week'))
self.assertEqual(date_from_str('now+14day'), date_from_str('now+2week'))
- self.assertEqual(date_from_str('now+365day'), date_from_str('now+1year'))
- self.assertEqual(date_from_str('now+30day'), date_from_str('now+1month'))
+ self.assertEqual(date_from_str('20200229+365day'), date_from_str('20200229+1year'))
+ self.assertEqual(date_from_str('20210131+28day'), date_from_str('20210131+1month'))
+
+ def test_datetime_from_str(self):
+ self.assertEqual(datetime_from_str('yesterday', precision='day'), datetime_from_str('now-1day', precision='auto'))
+ self.assertEqual(datetime_from_str('now+7day', precision='day'), datetime_from_str('now+1week', precision='auto'))
+ self.assertEqual(datetime_from_str('now+14day', precision='day'), datetime_from_str('now+2week', precision='auto'))
+ self.assertEqual(datetime_from_str('20200229+365day', precision='day'), datetime_from_str('20200229+1year', precision='auto'))
+ self.assertEqual(datetime_from_str('20210131+28day', precision='day'), datetime_from_str('20210131+1month', precision='auto'))
+ self.assertEqual(datetime_from_str('20210131+59day', precision='day'), datetime_from_str('20210131+2month', precision='auto'))
+ self.assertEqual(datetime_from_str('now+1day', precision='hour'), datetime_from_str('now+24hours', precision='auto'))
+ self.assertEqual(datetime_from_str('now+23hours', precision='hour'), datetime_from_str('now+23hours', precision='auto'))
def test_daterange(self):
_20century = DateRange("19000101", "20000101")
self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011')
self.assertEqual(unified_strdate('1968 12 10'), '19681210')
self.assertEqual(unified_strdate('1968-12-10'), '19681210')
+ self.assertEqual(unified_strdate('31-07-2022 20:00'), '20220731')
self.assertEqual(unified_strdate('28/01/2014 21:00:00 +0100'), '20140128')
self.assertEqual(
unified_strdate('11/26/2014 11:30:00 AM PST', day_first=False),
self.assertEqual(unified_timestamp('December 15, 2017 at 7:49 am'), 1513324140)
self.assertEqual(unified_timestamp('2018-03-14T08:32:43.1493874+00:00'), 1521016363)
+ self.assertEqual(unified_timestamp('December 31 1969 20:00:01 EDT'), 1)
+ self.assertEqual(unified_timestamp('Wednesday 31 December 1969 18:01:26 MDT'), 86)
+ self.assertEqual(unified_timestamp('12/31/1969 20:01:18 EDT', False), 78)
+
def test_determine_ext(self):
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
self.assertEqual(determine_ext('http://example.com/foo/bar/?download', None), None)
self.assertEqual(str_to_int('123,456'), 123456)
self.assertEqual(str_to_int('123.456'), 123456)
self.assertEqual(str_to_int(523), 523)
- # Python 3 has no long
- if sys.version_info < (3, 0):
- eval('self.assertEqual(str_to_int(123456L), 123456)')
self.assertEqual(str_to_int('noninteger'), None)
self.assertEqual(str_to_int([]), None)
self.assertEqual(base_url('http://foo.de/bar/'), 'http://foo.de/bar/')
self.assertEqual(base_url('http://foo.de/bar/baz'), 'http://foo.de/bar/')
self.assertEqual(base_url('http://foo.de/bar/baz?x=z/x/c'), 'http://foo.de/bar/')
+ self.assertEqual(base_url('http://foo.de/bar/baz&x=z&w=y/x/c'), 'http://foo.de/bar/baz&x=z&w=y/x/')
def test_urljoin(self):
self.assertEqual(urljoin('http://foo.de/', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
self.assertEqual(url_or_none('http$://foo.de'), None)
self.assertEqual(url_or_none('http://foo.de'), 'http://foo.de')
self.assertEqual(url_or_none('//foo.de'), '//foo.de')
+ self.assertEqual(url_or_none('s3://foo.de'), None)
+ self.assertEqual(url_or_none('rtmpte://foo.de'), 'rtmpte://foo.de')
+ self.assertEqual(url_or_none('mms://foo.de'), 'mms://foo.de')
+ self.assertEqual(url_or_none('rtspu://foo.de'), 'rtspu://foo.de')
+ self.assertEqual(url_or_none('ftps://foo.de'), 'ftps://foo.de')
def test_parse_age_limit(self):
self.assertEqual(parse_age_limit(None), None)
self.assertEqual(parse_duration('3h 11m 53s'), 11513)
self.assertEqual(parse_duration('3 hours 11 minutes 53 seconds'), 11513)
self.assertEqual(parse_duration('3 hours 11 mins 53 secs'), 11513)
+ self.assertEqual(parse_duration('3 hours, 11 minutes, 53 seconds'), 11513)
+ self.assertEqual(parse_duration('3 hours, 11 mins, 53 secs'), 11513)
self.assertEqual(parse_duration('62m45s'), 3765)
self.assertEqual(parse_duration('6m59s'), 419)
self.assertEqual(parse_duration('49s'), 49)
self.assertEqual(parse_duration('PT1H0.040S'), 3600.04)
self.assertEqual(parse_duration('PT00H03M30SZ'), 210)
self.assertEqual(parse_duration('P0Y0M0DT0H4M20.880S'), 260.88)
+ self.assertEqual(parse_duration('01:02:03:050'), 3723.05)
+ self.assertEqual(parse_duration('103:050'), 103.05)
+ self.assertEqual(parse_duration('1HR 3MIN'), 3780)
+ self.assertEqual(parse_duration('2hrs 3mins'), 7380)
def test_fix_xml_ampersands(self):
self.assertEqual(
def get_page(pagenum):
firstid = pagenum * pagesize
upto = min(size, pagenum * pagesize + pagesize)
- for i in range(firstid, upto):
- yield i
+ yield from range(firstid, upto)
pl = OnDemandPagedList(get_page, pagesize)
got = pl.getslice(*sliceargs)
self.assertTrue(isinstance(data, bytes))
def test_update_url_query(self):
- def query_dict(url):
- return compat_parse_qs(compat_urlparse.urlparse(url).query)
- self.assertEqual(query_dict(update_url_query(
+ self.assertEqual(parse_qs(update_url_query(
'http://example.com/path', {'quality': ['HD'], 'format': ['mp4']})),
- query_dict('http://example.com/path?quality=HD&format=mp4'))
- self.assertEqual(query_dict(update_url_query(
+ parse_qs('http://example.com/path?quality=HD&format=mp4'))
+ self.assertEqual(parse_qs(update_url_query(
'http://example.com/path', {'system': ['LINUX', 'WINDOWS']})),
- query_dict('http://example.com/path?system=LINUX&system=WINDOWS'))
- self.assertEqual(query_dict(update_url_query(
+ parse_qs('http://example.com/path?system=LINUX&system=WINDOWS'))
+ self.assertEqual(parse_qs(update_url_query(
'http://example.com/path', {'fields': 'id,formats,subtitles'})),
- query_dict('http://example.com/path?fields=id,formats,subtitles'))
- self.assertEqual(query_dict(update_url_query(
+ parse_qs('http://example.com/path?fields=id,formats,subtitles'))
+ self.assertEqual(parse_qs(update_url_query(
'http://example.com/path', {'fields': ('id,formats,subtitles', 'thumbnails')})),
- query_dict('http://example.com/path?fields=id,formats,subtitles&fields=thumbnails'))
- self.assertEqual(query_dict(update_url_query(
+ parse_qs('http://example.com/path?fields=id,formats,subtitles&fields=thumbnails'))
+ self.assertEqual(parse_qs(update_url_query(
'http://example.com/path?manifest=f4m', {'manifest': []})),
- query_dict('http://example.com/path'))
- self.assertEqual(query_dict(update_url_query(
+ parse_qs('http://example.com/path'))
+ self.assertEqual(parse_qs(update_url_query(
'http://example.com/path?system=LINUX&system=WINDOWS', {'system': 'LINUX'})),
- query_dict('http://example.com/path?system=LINUX'))
- self.assertEqual(query_dict(update_url_query(
+ parse_qs('http://example.com/path?system=LINUX'))
+ self.assertEqual(parse_qs(update_url_query(
'http://example.com/path', {'fields': b'id,formats,subtitles'})),
- query_dict('http://example.com/path?fields=id,formats,subtitles'))
- self.assertEqual(query_dict(update_url_query(
+ parse_qs('http://example.com/path?fields=id,formats,subtitles'))
+ self.assertEqual(parse_qs(update_url_query(
'http://example.com/path', {'width': 1080, 'height': 720})),
- query_dict('http://example.com/path?width=1080&height=720'))
- self.assertEqual(query_dict(update_url_query(
+ parse_qs('http://example.com/path?width=1080&height=720'))
+ self.assertEqual(parse_qs(update_url_query(
'http://example.com/path', {'bitrate': 5020.43})),
- query_dict('http://example.com/path?bitrate=5020.43'))
- self.assertEqual(query_dict(update_url_query(
+ parse_qs('http://example.com/path?bitrate=5020.43'))
+ self.assertEqual(parse_qs(update_url_query(
'http://example.com/path', {'test': '第二行тест'})),
- query_dict('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82'))
+ parse_qs('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82'))
def test_multipart_encode(self):
self.assertEqual(
multipart_encode({b'field': b'value'}, boundary='AAAAAA')[0],
b'--AAAAAA\r\nContent-Disposition: form-data; name="field"\r\n\r\nvalue\r\n--AAAAAA--\r\n')
self.assertEqual(
- multipart_encode({'欄位'.encode('utf-8'): '值'.encode('utf-8')}, boundary='AAAAAA')[0],
+ multipart_encode({'欄位'.encode(): '值'.encode()}, boundary='AAAAAA')[0],
b'--AAAAAA\r\nContent-Disposition: form-data; name="\xe6\xac\x84\xe4\xbd\x8d"\r\n\r\n\xe5\x80\xbc\r\n--AAAAAA--\r\n')
self.assertRaises(
ValueError, multipart_encode, {b'field': b'value'}, boundary='value')
- def test_dict_get(self):
- FALSE_VALUES = {
- 'none': None,
- 'false': False,
- 'zero': 0,
- 'empty_string': '',
- 'empty_list': [],
- }
- d = FALSE_VALUES.copy()
- d['a'] = 42
- self.assertEqual(dict_get(d, 'a'), 42)
- self.assertEqual(dict_get(d, 'b'), None)
- self.assertEqual(dict_get(d, 'b', 42), 42)
- self.assertEqual(dict_get(d, ('a', )), 42)
- self.assertEqual(dict_get(d, ('b', 'a', )), 42)
- self.assertEqual(dict_get(d, ('b', 'c', 'a', 'd', )), 42)
- self.assertEqual(dict_get(d, ('b', 'c', )), None)
- self.assertEqual(dict_get(d, ('b', 'c', ), 42), 42)
- for key, false_value in FALSE_VALUES.items():
- self.assertEqual(dict_get(d, ('b', 'c', key, )), None)
- self.assertEqual(dict_get(d, ('b', 'c', key, ), skip_false_values=False), false_value)
-
def test_merge_dicts(self):
self.assertEqual(merge_dicts({'a': 1}, {'b': 2}), {'a': 1, 'b': 2})
self.assertEqual(merge_dicts({'a': 1}, {'a': 2}), {'a': 1})
def test_parse_iso8601(self):
self.assertEqual(parse_iso8601('2014-03-23T23:04:26+0100'), 1395612266)
+ self.assertEqual(parse_iso8601('2014-03-23T23:04:26-07:00'), 1395641066)
+ self.assertEqual(parse_iso8601('2014-03-23T23:04:26', timezone=dt.timedelta(hours=-7)), 1395641066)
+ self.assertEqual(parse_iso8601('2014-03-23T23:04:26', timezone=NO_DEFAULT), None)
+ # default does not override timezone in date_str
+ self.assertEqual(parse_iso8601('2014-03-23T23:04:26-07:00', timezone=dt.timedelta(hours=-10)), 1395641066)
self.assertEqual(parse_iso8601('2014-03-23T22:04:26+0000'), 1395612266)
self.assertEqual(parse_iso8601('2014-03-23T22:04:26Z'), 1395612266)
self.assertEqual(parse_iso8601('2014-03-23T22:04:26.1234Z'), 1395612266)
self.assertEqual(parse_codecs('avc1.77.30, mp4a.40.2'), {
'vcodec': 'avc1.77.30',
'acodec': 'mp4a.40.2',
+ 'dynamic_range': None,
})
self.assertEqual(parse_codecs('mp4a.40.2'), {
'vcodec': 'none',
'acodec': 'mp4a.40.2',
+ 'dynamic_range': None,
})
self.assertEqual(parse_codecs('mp4a.40.5,avc1.42001e'), {
'vcodec': 'avc1.42001e',
'acodec': 'mp4a.40.5',
+ 'dynamic_range': None,
})
self.assertEqual(parse_codecs('avc3.640028'), {
'vcodec': 'avc3.640028',
'acodec': 'none',
+ 'dynamic_range': None,
})
self.assertEqual(parse_codecs(', h264,,newcodec,aac'), {
'vcodec': 'h264',
'acodec': 'aac',
+ 'dynamic_range': None,
})
self.assertEqual(parse_codecs('av01.0.05M.08'), {
'vcodec': 'av01.0.05M.08',
'acodec': 'none',
+ 'dynamic_range': None,
+ })
+ self.assertEqual(parse_codecs('vp9.2'), {
+ 'vcodec': 'vp9.2',
+ 'acodec': 'none',
+ 'dynamic_range': 'HDR10',
+ })
+ self.assertEqual(parse_codecs('av01.0.12M.10.0.110.09.16.09.0'), {
+ 'vcodec': 'av01.0.12M.10.0.110.09.16.09.0',
+ 'acodec': 'none',
+ 'dynamic_range': 'HDR10',
+ })
+ self.assertEqual(parse_codecs('dvhe'), {
+ 'vcodec': 'dvhe',
+ 'acodec': 'none',
+ 'dynamic_range': 'DV',
})
self.assertEqual(parse_codecs('theora, vorbis'), {
'vcodec': 'theora',
'acodec': 'vorbis',
+ 'dynamic_range': None,
})
self.assertEqual(parse_codecs('unknownvcodec, unknownacodec'), {
'vcodec': 'unknownvcodec',
self.assertEqual(escape_rfc3986('foo bar'), 'foo%20bar')
self.assertEqual(escape_rfc3986('foo%20bar'), 'foo%20bar')
- def test_escape_url(self):
+ def test_normalize_url(self):
self.assertEqual(
- escape_url('http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavré_FD.mp4'),
+ normalize_url('http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavré_FD.mp4'),
'http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavre%CC%81_FD.mp4'
)
self.assertEqual(
- escape_url('http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erklärt/Das-Erste/Video?documentId=22673108&bcastId=5290'),
+ normalize_url('http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erklärt/Das-Erste/Video?documentId=22673108&bcastId=5290'),
'http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erkl%C3%A4rt/Das-Erste/Video?documentId=22673108&bcastId=5290'
)
self.assertEqual(
- escape_url('http://тест.рф/фрагмент'),
+ normalize_url('http://тест.рф/фрагмент'),
'http://xn--e1aybc.xn--p1ai/%D1%84%D1%80%D0%B0%D0%B3%D0%BC%D0%B5%D0%BD%D1%82'
)
self.assertEqual(
- escape_url('http://тест.рф/абв?абв=абв#абв'),
+ normalize_url('http://тест.рф/абв?абв=абв#абв'),
'http://xn--e1aybc.xn--p1ai/%D0%B0%D0%B1%D0%B2?%D0%B0%D0%B1%D0%B2=%D0%B0%D0%B1%D0%B2#%D0%B0%D0%B1%D0%B2'
)
- self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0')
+ self.assertEqual(normalize_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0')
+
+ self.assertEqual(normalize_url('http://www.example.com/../a/b/../c/./d.html'), 'http://www.example.com/a/c/d.html')
+
+ def test_remove_dot_segments(self):
+ self.assertEqual(remove_dot_segments('/a/b/c/./../../g'), '/a/g')
+ self.assertEqual(remove_dot_segments('mid/content=5/../6'), 'mid/6')
+ self.assertEqual(remove_dot_segments('/ad/../cd'), '/cd')
+ self.assertEqual(remove_dot_segments('/ad/../cd/'), '/cd/')
+ self.assertEqual(remove_dot_segments('/..'), '/')
+ self.assertEqual(remove_dot_segments('/./'), '/')
+ self.assertEqual(remove_dot_segments('/./a'), '/a')
+ self.assertEqual(remove_dot_segments('/abc/./.././d/././e/.././f/./../../ghi'), '/ghi')
+ self.assertEqual(remove_dot_segments('/'), '/')
+ self.assertEqual(remove_dot_segments('/t'), '/t')
+ self.assertEqual(remove_dot_segments('t'), 't')
+ self.assertEqual(remove_dot_segments(''), '')
+ self.assertEqual(remove_dot_segments('/../a/b/c'), '/a/b/c')
+ self.assertEqual(remove_dot_segments('../a'), 'a')
+ self.assertEqual(remove_dot_segments('./a'), 'a')
+ self.assertEqual(remove_dot_segments('.'), '')
+ self.assertEqual(remove_dot_segments('////'), '////')
+
+ def test_js_to_json_vars_strings(self):
+ self.assertDictEqual(
+ json.loads(js_to_json(
+ '''{
+ 'null': a,
+ 'nullStr': b,
+ 'true': c,
+ 'trueStr': d,
+ 'false': e,
+ 'falseStr': f,
+ 'unresolvedVar': g,
+ }''',
+ {
+ 'a': 'null',
+ 'b': '"null"',
+ 'c': 'true',
+ 'd': '"true"',
+ 'e': 'false',
+ 'f': '"false"',
+ 'g': 'var',
+ }
+ )),
+ {
+ 'null': None,
+ 'nullStr': 'null',
+ 'true': True,
+ 'trueStr': 'true',
+ 'false': False,
+ 'falseStr': 'false',
+ 'unresolvedVar': 'var'
+ }
+ )
+
+ self.assertDictEqual(
+ json.loads(js_to_json(
+ '''{
+ 'int': a,
+ 'intStr': b,
+ 'float': c,
+ 'floatStr': d,
+ }''',
+ {
+ 'a': '123',
+ 'b': '"123"',
+ 'c': '1.23',
+ 'd': '"1.23"',
+ }
+ )),
+ {
+ 'int': 123,
+ 'intStr': '123',
+ 'float': 1.23,
+ 'floatStr': '1.23',
+ }
+ )
+
+ self.assertDictEqual(
+ json.loads(js_to_json(
+ '''{
+ 'object': a,
+ 'objectStr': b,
+ 'array': c,
+ 'arrayStr': d,
+ }''',
+ {
+ 'a': '{}',
+ 'b': '"{}"',
+ 'c': '[]',
+ 'd': '"[]"',
+ }
+ )),
+ {
+ 'object': {},
+ 'objectStr': '{}',
+ 'array': [],
+ 'arrayStr': '[]',
+ }
+ )
def test_js_to_json_realworld(self):
inp = '''{
on = js_to_json('{ "040": "040" }')
self.assertEqual(json.loads(on), {'040': '040'})
+ on = js_to_json('[1,//{},\n2]')
+ self.assertEqual(json.loads(on), [1, 2])
+
+ on = js_to_json(R'"\^\$\#"')
+ self.assertEqual(json.loads(on), R'^$#', msg='Unnecessary escapes should be stripped')
+
+ on = js_to_json('\'"\\""\'')
+ self.assertEqual(json.loads(on), '"""', msg='Unnecessary quote escape should be escaped')
+
+ on = js_to_json('[new Date("spam"), \'("eggs")\']')
+ self.assertEqual(json.loads(on), ['spam', '("eggs")'], msg='Date regex should match a single string')
+
def test_js_to_json_malformed(self):
self.assertEqual(js_to_json('42a1'), '42"a1"')
self.assertEqual(js_to_json('42a-1'), '42"a"-1')
+ def test_js_to_json_template_literal(self):
+ self.assertEqual(js_to_json('`Hello ${name}`', {'name': '"world"'}), '"Hello world"')
+ self.assertEqual(js_to_json('`${name}${name}`', {'name': '"X"'}), '"XX"')
+ self.assertEqual(js_to_json('`${name}${name}`', {'name': '5'}), '"55"')
+ self.assertEqual(js_to_json('`${name}"${name}"`', {'name': '5'}), '"5\\"5\\""')
+ self.assertEqual(js_to_json('`${name}`', {}), '"name"')
+
+ def test_js_to_json_common_constructors(self):
+ self.assertEqual(json.loads(js_to_json('new Map([["a", 5]])')), {'a': 5})
+ self.assertEqual(json.loads(js_to_json('Array(5, 10)')), [5, 10])
+ self.assertEqual(json.loads(js_to_json('new Array(15,5)')), [15, 5])
+ self.assertEqual(json.loads(js_to_json('new Map([Array(5, 10),new Array(15,5)])')), {'5': 10, '15': 5})
+ self.assertEqual(json.loads(js_to_json('new Date("123")')), "123")
+ self.assertEqual(json.loads(js_to_json('new Date(\'2023-10-19\')')), "2023-10-19")
+
def test_extract_attributes(self):
self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'})
self.assertEqual(extract_attributes('<e x="décomposé">'), {'x': 'décompose\u0301'})
# "Narrow" Python builds don't support unicode code points outside BMP.
try:
- compat_chr(0x10000)
+ chr(0x10000)
supports_outside_bmp = True
except ValueError:
supports_outside_bmp = False
def test_clean_html(self):
self.assertEqual(clean_html('a:\nb'), 'a: b')
- self.assertEqual(clean_html('a:\n "b"'), 'a: "b"')
+ self.assertEqual(clean_html('a:\n "b"'), 'a: "b"')
self.assertEqual(clean_html('a<br>\xa0b'), 'a\nb')
def test_intlist_to_bytes(self):
self.assertEqual(parse_count('1000'), 1000)
self.assertEqual(parse_count('1.000'), 1000)
self.assertEqual(parse_count('1.1k'), 1100)
+ self.assertEqual(parse_count('1.1 k'), 1100)
+ self.assertEqual(parse_count('1,1 k'), 1100)
self.assertEqual(parse_count('1.1kk'), 1100000)
self.assertEqual(parse_count('1.1kk '), 1100000)
+ self.assertEqual(parse_count('1,1kk'), 1100000)
+ self.assertEqual(parse_count('100 views'), 100)
+ self.assertEqual(parse_count('1,100 views'), 1100)
self.assertEqual(parse_count('1.1kk views'), 1100000)
+ self.assertEqual(parse_count('10M views'), 10000000)
+ self.assertEqual(parse_count('has 10M views'), 10000000)
def test_parse_resolution(self):
self.assertEqual(parse_resolution(None), {})
self.assertEqual(parse_resolution(''), {})
- self.assertEqual(parse_resolution('1920x1080'), {'width': 1920, 'height': 1080})
- self.assertEqual(parse_resolution('1920×1080'), {'width': 1920, 'height': 1080})
+ self.assertEqual(parse_resolution(' 1920x1080'), {'width': 1920, 'height': 1080})
+ self.assertEqual(parse_resolution('1920×1080 '), {'width': 1920, 'height': 1080})
self.assertEqual(parse_resolution('1920 x 1080'), {'width': 1920, 'height': 1080})
self.assertEqual(parse_resolution('720p'), {'height': 720})
self.assertEqual(parse_resolution('4k'), {'height': 2160})
self.assertEqual(parse_resolution('8K'), {'height': 4320})
+ self.assertEqual(parse_resolution('pre_1920x1080_post'), {'width': 1920, 'height': 1080})
+ self.assertEqual(parse_resolution('ep1x2'), {})
+ self.assertEqual(parse_resolution('1920, 1080'), {'width': 1920, 'height': 1080})
def test_parse_bitrate(self):
self.assertEqual(parse_bitrate(None), None)
b'\xFF\xFE\x00\x00<\x00\x00\x00h\x00\x00\x00t\x00\x00\x00m\x00\x00\x00l\x00\x00\x00>\x00\x00\x00\xe4\x00\x00\x00'))
def test_render_table(self):
+ self.assertEqual(
+ render_table(
+ ['a', 'empty', 'bcd'],
+ [[123, '', 4], [9999, '', 51]]),
+ 'a empty bcd\n'
+ '123 4\n'
+ '9999 51')
+
+ self.assertEqual(
+ render_table(
+ ['a', 'empty', 'bcd'],
+ [[123, '', 4], [9999, '', 51]],
+ hide_empty=True),
+ 'a bcd\n'
+ '123 4\n'
+ '9999 51')
+
+ self.assertEqual(
+ render_table(
+ ['\ta', 'bcd'],
+ [['1\t23', 4], ['\t9999', 51]]),
+ ' a bcd\n'
+ '1 23 4\n'
+ '9999 51')
+
self.assertEqual(
render_table(
['a', 'bcd'],
- [[123, 4], [9999, 51]]),
+ [[123, 4], [9999, 51]],
+ delim='-'),
'a bcd\n'
+ '--------\n'
'123 4\n'
'9999 51')
+ self.assertEqual(
+ render_table(
+ ['a', 'bcd'],
+ [[123, 4], [9999, 51]],
+ delim='-', extra_gap=2),
+ 'a bcd\n'
+ '----------\n'
+ '123 4\n'
+ '9999 51')
+
def test_match_str(self):
- self.assertRaises(ValueError, match_str, 'xy>foobar', {})
+ # Unary
self.assertFalse(match_str('xy', {'x': 1200}))
self.assertTrue(match_str('!xy', {'x': 1200}))
self.assertTrue(match_str('x', {'x': 1200}))
self.assertFalse(match_str('!x', {'x': 1200}))
self.assertTrue(match_str('x', {'x': 0}))
+ self.assertTrue(match_str('is_live', {'is_live': True}))
+ self.assertFalse(match_str('is_live', {'is_live': False}))
+ self.assertFalse(match_str('is_live', {'is_live': None}))
+ self.assertFalse(match_str('is_live', {}))
+ self.assertFalse(match_str('!is_live', {'is_live': True}))
+ self.assertTrue(match_str('!is_live', {'is_live': False}))
+ self.assertTrue(match_str('!is_live', {'is_live': None}))
+ self.assertTrue(match_str('!is_live', {}))
+ self.assertTrue(match_str('title', {'title': 'abc'}))
+ self.assertTrue(match_str('title', {'title': ''}))
+ self.assertFalse(match_str('!title', {'title': 'abc'}))
+ self.assertFalse(match_str('!title', {'title': ''}))
+
+ # Numeric
self.assertFalse(match_str('x>0', {'x': 0}))
self.assertFalse(match_str('x>0', {}))
self.assertTrue(match_str('x>?0', {}))
self.assertFalse(match_str('x>2K', {'x': 1200}))
self.assertTrue(match_str('x>=1200 & x < 1300', {'x': 1200}))
self.assertFalse(match_str('x>=1100 & x < 1200', {'x': 1200}))
+ self.assertTrue(match_str('x > 1:0:0', {'x': 3700}))
+
+ # String
self.assertFalse(match_str('y=a212', {'y': 'foobar42'}))
self.assertTrue(match_str('y=foobar42', {'y': 'foobar42'}))
self.assertFalse(match_str('y!=foobar42', {'y': 'foobar42'}))
self.assertTrue(match_str('y!=foobar2', {'y': 'foobar42'}))
+ self.assertTrue(match_str('y^=foo', {'y': 'foobar42'}))
+ self.assertFalse(match_str('y!^=foo', {'y': 'foobar42'}))
+ self.assertFalse(match_str('y^=bar', {'y': 'foobar42'}))
+ self.assertTrue(match_str('y!^=bar', {'y': 'foobar42'}))
+ self.assertRaises(ValueError, match_str, 'x^=42', {'x': 42})
+ self.assertTrue(match_str('y*=bar', {'y': 'foobar42'}))
+ self.assertFalse(match_str('y!*=bar', {'y': 'foobar42'}))
+ self.assertFalse(match_str('y*=baz', {'y': 'foobar42'}))
+ self.assertTrue(match_str('y!*=baz', {'y': 'foobar42'}))
+ self.assertTrue(match_str('y$=42', {'y': 'foobar42'}))
+ self.assertFalse(match_str('y$=43', {'y': 'foobar42'}))
+
+ # And
self.assertFalse(match_str(
'like_count > 100 & dislike_count <? 50 & description',
{'like_count': 90, 'description': 'foo'}))
self.assertFalse(match_str(
'like_count > 100 & dislike_count <? 50 & description',
{'like_count': 190, 'dislike_count': 10}))
- self.assertTrue(match_str('is_live', {'is_live': True}))
- self.assertFalse(match_str('is_live', {'is_live': False}))
- self.assertFalse(match_str('is_live', {'is_live': None}))
- self.assertFalse(match_str('is_live', {}))
- self.assertFalse(match_str('!is_live', {'is_live': True}))
- self.assertTrue(match_str('!is_live', {'is_live': False}))
- self.assertTrue(match_str('!is_live', {'is_live': None}))
- self.assertTrue(match_str('!is_live', {}))
- self.assertTrue(match_str('title', {'title': 'abc'}))
- self.assertTrue(match_str('title', {'title': ''}))
- self.assertFalse(match_str('!title', {'title': 'abc'}))
- self.assertFalse(match_str('!title', {'title': ''}))
+
+ # Regex
+ self.assertTrue(match_str(r'x~=\bbar', {'x': 'foo bar'}))
+ self.assertFalse(match_str(r'x~=\bbar.+', {'x': 'foo bar'}))
+ self.assertFalse(match_str(r'x~=^FOO', {'x': 'foo bar'}))
+ self.assertTrue(match_str(r'x~=(?i)^FOO', {'x': 'foo bar'}))
+
+ # Quotes
+ self.assertTrue(match_str(r'x^="foo"', {'x': 'foo "bar"'}))
+ self.assertFalse(match_str(r'x^="foo "', {'x': 'foo "bar"'}))
+ self.assertFalse(match_str(r'x$="bar"', {'x': 'foo "bar"'}))
+ self.assertTrue(match_str(r'x$=" \"bar\""', {'x': 'foo "bar"'}))
+
+ # Escaping &
+ self.assertFalse(match_str(r'x=foo & bar', {'x': 'foo & bar'}))
+ self.assertTrue(match_str(r'x=foo \& bar', {'x': 'foo & bar'}))
+ self.assertTrue(match_str(r'x=foo \& bar & x^=foo', {'x': 'foo & bar'}))
+ self.assertTrue(match_str(r'x="foo \& bar" & x^=foo', {'x': 'foo & bar'}))
+
+ # Example from docs
+ self.assertTrue(match_str(
+ r"!is_live & like_count>?100 & description~='(?i)\bcats \& dogs\b'",
+ {'description': 'Raining Cats & Dogs'}))
+
+ # Incomplete
+ self.assertFalse(match_str('id!=foo', {'id': 'foo'}, True))
+ self.assertTrue(match_str('x', {'id': 'foo'}, True))
+ self.assertTrue(match_str('!x', {'id': 'foo'}, True))
+ self.assertFalse(match_str('x', {'id': 'foo'}, False))
def test_parse_dfxp_time_expr(self):
self.assertEqual(parse_dfxp_time_expr(None), None)
<p begin="3" dur="-1">Ignored, three</p>
</div>
</body>
- </tt>'''.encode('utf-8')
+ </tt>'''.encode()
srt_data = '''1
00:00:00,000 --> 00:00:01,000
The following line contains Chinese characters and special symbols
'''
self.assertEqual(dfxp2srt(dfxp_data), srt_data)
- dfxp_data_no_default_namespace = '''<?xml version="1.0" encoding="UTF-8"?>
+ dfxp_data_no_default_namespace = b'''<?xml version="1.0" encoding="UTF-8"?>
<tt xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter">
<body>
<div xml:lang="en">
<p begin="0" end="1">The first line</p>
</div>
</body>
- </tt>'''.encode('utf-8')
+ </tt>'''
srt_data = '''1
00:00:00,000 --> 00:00:01,000
The first line
'''
self.assertEqual(dfxp2srt(dfxp_data_no_default_namespace), srt_data)
- dfxp_data_with_style = '''<?xml version="1.0" encoding="utf-8"?>
+ dfxp_data_with_style = b'''<?xml version="1.0" encoding="utf-8"?>
<tt xmlns="http://www.w3.org/2006/10/ttaf1" xmlns:ttp="http://www.w3.org/2006/10/ttaf1#parameter" ttp:timeBase="media" xmlns:tts="http://www.w3.org/2006/10/ttaf1#style" xml:lang="en" xmlns:ttm="http://www.w3.org/2006/10/ttaf1#metadata">
<head>
<styling>
<p style="s1" tts:textDecoration="underline" begin="00:00:09.56" id="p2" end="00:00:12.36"><span style="s2" tts:color="lime">inner<br /> </span>style</p>
</div>
</body>
-</tt>'''.encode('utf-8')
+</tt>'''
srt_data = '''1
-00:00:02,080 --> 00:00:05,839
+00:00:02,080 --> 00:00:05,840
<font color="white" face="sansSerif" size="16">default style<font color="red">custom style</font></font>
2
-00:00:02,080 --> 00:00:05,839
+00:00:02,080 --> 00:00:05,840
<b><font color="cyan" face="sansSerif" size="16"><font color="lime">part 1
</font>part 2</font></b>
3
-00:00:05,839 --> 00:00:09,560
+00:00:05,840 --> 00:00:09,560
<u><font color="lime">line 3
part 3</font></u>
4
-00:00:09,560 --> 00:00:12,359
+00:00:09,560 --> 00:00:12,360
<i><u><font color="yellow"><font color="lime">inner
</font>style</font></u></i>
self.assertEqual(caesar('ebg', 'acegik', -2), 'abc')
def test_rot47(self):
- self.assertEqual(rot47('youtube-dlc'), r'J@FEF36\5=4')
- self.assertEqual(rot47('YOUTUBE-DLC'), r'*~&%&qt\s{r')
+ self.assertEqual(rot47('yt-dlp'), r'JE\5=A')
+ self.assertEqual(rot47('YT-DLP'), r'*%\s{!')
def test_urshift(self):
self.assertEqual(urshift(3, 1), 1)
self.assertEqual(urshift(-3, 1), 2147483646)
+ GET_ELEMENT_BY_CLASS_TEST_STRING = '''
+ <span class="foo bar">nice</span>
+ '''
+
def test_get_element_by_class(self):
- html = '''
- <span class="foo bar">nice</span>
- '''
+ html = self.GET_ELEMENT_BY_CLASS_TEST_STRING
self.assertEqual(get_element_by_class('foo', html), 'nice')
self.assertEqual(get_element_by_class('no-such-class', html), None)
+ def test_get_element_html_by_class(self):
+ html = self.GET_ELEMENT_BY_CLASS_TEST_STRING
+
+ self.assertEqual(get_element_html_by_class('foo', html), html.strip())
+ self.assertEqual(get_element_by_class('no-such-class', html), None)
+
+ GET_ELEMENT_BY_ATTRIBUTE_TEST_STRING = '''
+ <div itemprop="author" itemscope>foo</div>
+ '''
+
def test_get_element_by_attribute(self):
- html = '''
- <span class="foo bar">nice</span>
- '''
+ html = self.GET_ELEMENT_BY_CLASS_TEST_STRING
self.assertEqual(get_element_by_attribute('class', 'foo bar', html), 'nice')
self.assertEqual(get_element_by_attribute('class', 'foo', html), None)
self.assertEqual(get_element_by_attribute('class', 'no-such-foo', html), None)
- html = '''
- <div itemprop="author" itemscope>foo</div>
- '''
+ html = self.GET_ELEMENT_BY_ATTRIBUTE_TEST_STRING
self.assertEqual(get_element_by_attribute('itemprop', 'author', html), 'foo')
+ def test_get_element_html_by_attribute(self):
+ html = self.GET_ELEMENT_BY_CLASS_TEST_STRING
+
+ self.assertEqual(get_element_html_by_attribute('class', 'foo bar', html), html.strip())
+ self.assertEqual(get_element_html_by_attribute('class', 'foo', html), None)
+ self.assertEqual(get_element_html_by_attribute('class', 'no-such-foo', html), None)
+
+ html = self.GET_ELEMENT_BY_ATTRIBUTE_TEST_STRING
+
+ self.assertEqual(get_element_html_by_attribute('itemprop', 'author', html), html.strip())
+
+ GET_ELEMENTS_BY_CLASS_TEST_STRING = '''
+ <span class="foo bar">nice</span><span class="foo bar">also nice</span>
+ '''
+ GET_ELEMENTS_BY_CLASS_RES = ['<span class="foo bar">nice</span>', '<span class="foo bar">also nice</span>']
+
def test_get_elements_by_class(self):
- html = '''
- <span class="foo bar">nice</span><span class="foo bar">also nice</span>
- '''
+ html = self.GET_ELEMENTS_BY_CLASS_TEST_STRING
self.assertEqual(get_elements_by_class('foo', html), ['nice', 'also nice'])
self.assertEqual(get_elements_by_class('no-such-class', html), [])
+ def test_get_elements_html_by_class(self):
+ html = self.GET_ELEMENTS_BY_CLASS_TEST_STRING
+
+ self.assertEqual(get_elements_html_by_class('foo', html), self.GET_ELEMENTS_BY_CLASS_RES)
+ self.assertEqual(get_elements_html_by_class('no-such-class', html), [])
+
def test_get_elements_by_attribute(self):
- html = '''
- <span class="foo bar">nice</span><span class="foo bar">also nice</span>
- '''
+ html = self.GET_ELEMENTS_BY_CLASS_TEST_STRING
self.assertEqual(get_elements_by_attribute('class', 'foo bar', html), ['nice', 'also nice'])
self.assertEqual(get_elements_by_attribute('class', 'foo', html), [])
self.assertEqual(get_elements_by_attribute('class', 'no-such-foo', html), [])
+ def test_get_elements_html_by_attribute(self):
+ html = self.GET_ELEMENTS_BY_CLASS_TEST_STRING
+
+ self.assertEqual(get_elements_html_by_attribute('class', 'foo bar', html), self.GET_ELEMENTS_BY_CLASS_RES)
+ self.assertEqual(get_elements_html_by_attribute('class', 'foo', html), [])
+ self.assertEqual(get_elements_html_by_attribute('class', 'no-such-foo', html), [])
+
+ def test_get_elements_text_and_html_by_attribute(self):
+ html = self.GET_ELEMENTS_BY_CLASS_TEST_STRING
+
+ self.assertEqual(
+ list(get_elements_text_and_html_by_attribute('class', 'foo bar', html)),
+ list(zip(['nice', 'also nice'], self.GET_ELEMENTS_BY_CLASS_RES)))
+ self.assertEqual(list(get_elements_text_and_html_by_attribute('class', 'foo', html)), [])
+ self.assertEqual(list(get_elements_text_and_html_by_attribute('class', 'no-such-foo', html)), [])
+
+ self.assertEqual(list(get_elements_text_and_html_by_attribute(
+ 'class', 'foo', '<a class="foo">nice</a><span class="foo">nice</span>', tag='a')), [('nice', '<a class="foo">nice</a>')])
+
+ GET_ELEMENT_BY_TAG_TEST_STRING = '''
+ random text lorem ipsum</p>
+ <div>
+ this should be returned
+ <span>this should also be returned</span>
+ <div>
+ this should also be returned
+ </div>
+ closing tag above should not trick, so this should also be returned
+ </div>
+ but this text should not be returned
+ '''
+ GET_ELEMENT_BY_TAG_RES_OUTERDIV_HTML = GET_ELEMENT_BY_TAG_TEST_STRING.strip()[32:276]
+ GET_ELEMENT_BY_TAG_RES_OUTERDIV_TEXT = GET_ELEMENT_BY_TAG_RES_OUTERDIV_HTML[5:-6]
+ GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML = GET_ELEMENT_BY_TAG_TEST_STRING.strip()[78:119]
+ GET_ELEMENT_BY_TAG_RES_INNERSPAN_TEXT = GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML[6:-7]
+
+ def test_get_element_text_and_html_by_tag(self):
+ html = self.GET_ELEMENT_BY_TAG_TEST_STRING
+
+ self.assertEqual(
+ get_element_text_and_html_by_tag('div', html),
+ (self.GET_ELEMENT_BY_TAG_RES_OUTERDIV_TEXT, self.GET_ELEMENT_BY_TAG_RES_OUTERDIV_HTML))
+ self.assertEqual(
+ get_element_text_and_html_by_tag('span', html),
+ (self.GET_ELEMENT_BY_TAG_RES_INNERSPAN_TEXT, self.GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML))
+ self.assertRaises(compat_HTMLParseError, get_element_text_and_html_by_tag, 'article', html)
+
+ def test_iri_to_uri(self):
+ self.assertEqual(
+ iri_to_uri('https://www.google.com/search?q=foo&ie=utf-8&oe=utf-8&client=firefox-b'),
+ 'https://www.google.com/search?q=foo&ie=utf-8&oe=utf-8&client=firefox-b') # Same
+ self.assertEqual(
+ iri_to_uri('https://www.google.com/search?q=Käsesoßenrührlöffel'), # German for cheese sauce stirring spoon
+ 'https://www.google.com/search?q=K%C3%A4seso%C3%9Fenr%C3%BChrl%C3%B6ffel')
+ self.assertEqual(
+ iri_to_uri('https://www.google.com/search?q=lt<+gt>+eq%3D+amp%26+percent%25+hash%23+colon%3A+tilde~#trash=?&garbage=#'),
+ 'https://www.google.com/search?q=lt%3C+gt%3E+eq%3D+amp%26+percent%25+hash%23+colon%3A+tilde~#trash=?&garbage=#')
+ self.assertEqual(
+ iri_to_uri('http://правозащита38.рф/category/news/'),
+ 'http://xn--38-6kcaak9aj5chl4a3g.xn--p1ai/category/news/')
+ self.assertEqual(
+ iri_to_uri('http://www.правозащита38.рф/category/news/'),
+ 'http://www.xn--38-6kcaak9aj5chl4a3g.xn--p1ai/category/news/')
+ self.assertEqual(
+ iri_to_uri('https://i❤.ws/emojidomain/👍👏🤝💪'),
+ 'https://xn--i-7iq.ws/emojidomain/%F0%9F%91%8D%F0%9F%91%8F%F0%9F%A4%9D%F0%9F%92%AA')
+ self.assertEqual(
+ iri_to_uri('http://日本語.jp/'),
+ 'http://xn--wgv71a119e.jp/')
+ self.assertEqual(
+ iri_to_uri('http://导航.中国/'),
+ 'http://xn--fet810g.xn--fiqs8s/')
+
+ def test_clean_podcast_url(self):
+ self.assertEqual(clean_podcast_url('https://www.podtrac.com/pts/redirect.mp3/chtbl.com/track/5899E/traffic.megaphone.fm/HSW7835899191.mp3'), 'https://traffic.megaphone.fm/HSW7835899191.mp3')
+ self.assertEqual(clean_podcast_url('https://play.podtrac.com/npr-344098539/edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3'), 'https://edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3')
+ self.assertEqual(clean_podcast_url('https://pdst.fm/e/2.gum.fm/chtbl.com/track/chrt.fm/track/34D33/pscrb.fm/rss/p/traffic.megaphone.fm/ITLLC7765286967.mp3?updated=1687282661'), 'https://traffic.megaphone.fm/ITLLC7765286967.mp3?updated=1687282661')
+ self.assertEqual(clean_podcast_url('https://pdst.fm/e/https://mgln.ai/e/441/www.buzzsprout.com/1121972/13019085-ep-252-the-deep-life-stack.mp3'), 'https://www.buzzsprout.com/1121972/13019085-ep-252-the-deep-life-stack.mp3')
+
+ def test_LazyList(self):
+ it = list(range(10))
+
+ self.assertEqual(list(LazyList(it)), it)
+ self.assertEqual(LazyList(it).exhaust(), it)
+ self.assertEqual(LazyList(it)[5], it[5])
+
+ self.assertEqual(LazyList(it)[5:], it[5:])
+ self.assertEqual(LazyList(it)[:5], it[:5])
+ self.assertEqual(LazyList(it)[::2], it[::2])
+ self.assertEqual(LazyList(it)[1::2], it[1::2])
+ self.assertEqual(LazyList(it)[5::-1], it[5::-1])
+ self.assertEqual(LazyList(it)[6:2:-2], it[6:2:-2])
+ self.assertEqual(LazyList(it)[::-1], it[::-1])
+
+ self.assertTrue(LazyList(it))
+ self.assertFalse(LazyList(range(0)))
+ self.assertEqual(len(LazyList(it)), len(it))
+ self.assertEqual(repr(LazyList(it)), repr(it))
+ self.assertEqual(str(LazyList(it)), str(it))
+
+ self.assertEqual(list(LazyList(it, reverse=True)), it[::-1])
+ self.assertEqual(list(reversed(LazyList(it))[::-1]), it)
+ self.assertEqual(list(reversed(LazyList(it))[1:3:7]), it[::-1][1:3:7])
+
+ def test_LazyList_laziness(self):
+
+ def test(ll, idx, val, cache):
+ self.assertEqual(ll[idx], val)
+ self.assertEqual(ll._cache, list(cache))
+
+ ll = LazyList(range(10))
+ test(ll, 0, 0, range(1))
+ test(ll, 5, 5, range(6))
+ test(ll, -3, 7, range(10))
+
+ ll = LazyList(range(10), reverse=True)
+ test(ll, -1, 0, range(1))
+ test(ll, 3, 6, range(10))
+
+ ll = LazyList(itertools.count())
+ test(ll, 10, 10, range(11))
+ ll = reversed(ll)
+ test(ll, -15, 14, range(15))
+
+ def test_format_bytes(self):
+ self.assertEqual(format_bytes(0), '0.00B')
+ self.assertEqual(format_bytes(1000), '1000.00B')
+ self.assertEqual(format_bytes(1024), '1.00KiB')
+ self.assertEqual(format_bytes(1024**2), '1.00MiB')
+ self.assertEqual(format_bytes(1024**3), '1.00GiB')
+ self.assertEqual(format_bytes(1024**4), '1.00TiB')
+ self.assertEqual(format_bytes(1024**5), '1.00PiB')
+ self.assertEqual(format_bytes(1024**6), '1.00EiB')
+ self.assertEqual(format_bytes(1024**7), '1.00ZiB')
+ self.assertEqual(format_bytes(1024**8), '1.00YiB')
+ self.assertEqual(format_bytes(1024**9), '1024.00YiB')
+
+ def test_hide_login_info(self):
+ self.assertEqual(Config.hide_login_info(['-u', 'foo', '-p', 'bar']),
+ ['-u', 'PRIVATE', '-p', 'PRIVATE'])
+ self.assertEqual(Config.hide_login_info(['-u']), ['-u'])
+ self.assertEqual(Config.hide_login_info(['-u', 'foo', '-u', 'bar']),
+ ['-u', 'PRIVATE', '-u', 'PRIVATE'])
+ self.assertEqual(Config.hide_login_info(['--username=foo']),
+ ['--username=PRIVATE'])
+
+ def test_locked_file(self):
+ TEXT = 'test_locked_file\n'
+ FILE = 'test_locked_file.ytdl'
+ MODES = 'war' # Order is important
+
+ try:
+ for lock_mode in MODES:
+ with locked_file(FILE, lock_mode, False) as f:
+ if lock_mode == 'r':
+ self.assertEqual(f.read(), TEXT * 2, 'Wrong file content')
+ else:
+ f.write(TEXT)
+ for test_mode in MODES:
+ testing_write = test_mode != 'r'
+ try:
+ with locked_file(FILE, test_mode, False):
+ pass
+ except (BlockingIOError, PermissionError):
+ if not testing_write: # FIXME
+ print(f'Known issue: Exclusive lock ({lock_mode}) blocks read access ({test_mode})')
+ continue
+ self.assertTrue(testing_write, f'{test_mode} is blocked by {lock_mode}')
+ else:
+ self.assertFalse(testing_write, f'{test_mode} is not blocked by {lock_mode}')
+ finally:
+ with contextlib.suppress(OSError):
+ os.remove(FILE)
+
+ def test_determine_file_encoding(self):
+ self.assertEqual(determine_file_encoding(b''), (None, 0))
+ self.assertEqual(determine_file_encoding(b'--verbose -x --audio-format mkv\n'), (None, 0))
+
+ self.assertEqual(determine_file_encoding(b'\xef\xbb\xbf'), ('utf-8', 3))
+ self.assertEqual(determine_file_encoding(b'\x00\x00\xfe\xff'), ('utf-32-be', 4))
+ self.assertEqual(determine_file_encoding(b'\xff\xfe'), ('utf-16-le', 2))
+
+ self.assertEqual(determine_file_encoding(b'\xff\xfe# coding: utf-8\n--verbose'), ('utf-16-le', 2))
+
+ self.assertEqual(determine_file_encoding(b'# coding: utf-8\n--verbose'), ('utf-8', 0))
+ self.assertEqual(determine_file_encoding(b'# coding: someencodinghere-12345\n--verbose'), ('someencodinghere-12345', 0))
+
+ self.assertEqual(determine_file_encoding(b'#coding:utf-8\n--verbose'), ('utf-8', 0))
+ self.assertEqual(determine_file_encoding(b'# coding: utf-8 \r\n--verbose'), ('utf-8', 0))
+
+ self.assertEqual(determine_file_encoding('# coding: utf-32-be'.encode('utf-32-be')), ('utf-32-be', 0))
+ self.assertEqual(determine_file_encoding('# coding: utf-16-le'.encode('utf-16-le')), ('utf-16-le', 0))
+
+ def test_get_compatible_ext(self):
+ self.assertEqual(get_compatible_ext(
+ vcodecs=[None], acodecs=[None, None], vexts=['mp4'], aexts=['m4a', 'm4a']), 'mkv')
+ self.assertEqual(get_compatible_ext(
+ vcodecs=[None], acodecs=[None], vexts=['flv'], aexts=['flv']), 'flv')
+
+ self.assertEqual(get_compatible_ext(
+ vcodecs=[None], acodecs=[None], vexts=['mp4'], aexts=['m4a']), 'mp4')
+ self.assertEqual(get_compatible_ext(
+ vcodecs=[None], acodecs=[None], vexts=['mp4'], aexts=['webm']), 'mkv')
+ self.assertEqual(get_compatible_ext(
+ vcodecs=[None], acodecs=[None], vexts=['webm'], aexts=['m4a']), 'mkv')
+ self.assertEqual(get_compatible_ext(
+ vcodecs=[None], acodecs=[None], vexts=['webm'], aexts=['webm']), 'webm')
+ self.assertEqual(get_compatible_ext(
+ vcodecs=[None], acodecs=[None], vexts=['webm'], aexts=['weba']), 'webm')
+
+ self.assertEqual(get_compatible_ext(
+ vcodecs=['h264'], acodecs=['mp4a'], vexts=['mov'], aexts=['m4a']), 'mp4')
+ self.assertEqual(get_compatible_ext(
+ vcodecs=['av01.0.12M.08'], acodecs=['opus'], vexts=['mp4'], aexts=['webm']), 'webm')
+
+ self.assertEqual(get_compatible_ext(
+ vcodecs=['vp9'], acodecs=['opus'], vexts=['webm'], aexts=['webm'], preferences=['flv', 'mp4']), 'mp4')
+ self.assertEqual(get_compatible_ext(
+ vcodecs=['av1'], acodecs=['mp4a'], vexts=['webm'], aexts=['m4a'], preferences=('webm', 'mkv')), 'mkv')
+
+ def test_try_call(self):
+ def total(*x, **kwargs):
+ return sum(x) + sum(kwargs.values())
+
+ self.assertEqual(try_call(None), None,
+ msg='not a fn should give None')
+ self.assertEqual(try_call(lambda: 1), 1,
+ msg='int fn with no expected_type should give int')
+ self.assertEqual(try_call(lambda: 1, expected_type=int), 1,
+ msg='int fn with expected_type int should give int')
+ self.assertEqual(try_call(lambda: 1, expected_type=dict), None,
+ msg='int fn with wrong expected_type should give None')
+ self.assertEqual(try_call(total, args=(0, 1, 0, ), expected_type=int), 1,
+ msg='fn should accept arglist')
+ self.assertEqual(try_call(total, kwargs={'a': 0, 'b': 1, 'c': 0}, expected_type=int), 1,
+ msg='fn should accept kwargs')
+ self.assertEqual(try_call(lambda: 1, expected_type=dict), None,
+ msg='int fn with no expected_type should give None')
+ self.assertEqual(try_call(lambda x: {}, total, args=(42, ), expected_type=int), 42,
+ msg='expect first int result with expected_type int')
+
+ def test_variadic(self):
+ self.assertEqual(variadic(None), (None, ))
+ self.assertEqual(variadic('spam'), ('spam', ))
+ self.assertEqual(variadic('spam', allowed_types=dict), 'spam')
+ with warnings.catch_warnings():
+ warnings.simplefilter('ignore')
+ self.assertEqual(variadic('spam', allowed_types=[dict]), 'spam')
+
+ def test_http_header_dict(self):
+ headers = HTTPHeaderDict()
+ headers['ytdl-test'] = b'0'
+ self.assertEqual(list(headers.items()), [('Ytdl-Test', '0')])
+ headers['ytdl-test'] = 1
+ self.assertEqual(list(headers.items()), [('Ytdl-Test', '1')])
+ headers['Ytdl-test'] = '2'
+ self.assertEqual(list(headers.items()), [('Ytdl-Test', '2')])
+ self.assertTrue('ytDl-Test' in headers)
+ self.assertEqual(str(headers), str(dict(headers)))
+ self.assertEqual(repr(headers), str(dict(headers)))
+
+ headers.update({'X-dlp': 'data'})
+ self.assertEqual(set(headers.items()), {('Ytdl-Test', '2'), ('X-Dlp', 'data')})
+ self.assertEqual(dict(headers), {'Ytdl-Test': '2', 'X-Dlp': 'data'})
+ self.assertEqual(len(headers), 2)
+ self.assertEqual(headers.copy(), headers)
+ headers2 = HTTPHeaderDict({'X-dlp': 'data3'}, **headers, **{'X-dlp': 'data2'})
+ self.assertEqual(set(headers2.items()), {('Ytdl-Test', '2'), ('X-Dlp', 'data2')})
+ self.assertEqual(len(headers2), 2)
+ headers2.clear()
+ self.assertEqual(len(headers2), 0)
+
+ # ensure we prefer latter headers
+ headers3 = HTTPHeaderDict({'Ytdl-TeSt': 1}, {'Ytdl-test': 2})
+ self.assertEqual(set(headers3.items()), {('Ytdl-Test', '2')})
+ del headers3['ytdl-tesT']
+ self.assertEqual(dict(headers3), {})
+
+ headers4 = HTTPHeaderDict({'ytdl-test': 'data;'})
+ self.assertEqual(set(headers4.items()), {('Ytdl-Test', 'data;')})
+
+ # common mistake: strip whitespace from values
+ # https://github.com/yt-dlp/yt-dlp/issues/8729
+ headers5 = HTTPHeaderDict({'ytdl-test': ' data; '})
+ self.assertEqual(set(headers5.items()), {('Ytdl-Test', 'data;')})
+
+ def test_extract_basic_auth(self):
+ assert extract_basic_auth('http://:foo.bar') == ('http://:foo.bar', None)
+ assert extract_basic_auth('http://foo.bar') == ('http://foo.bar', None)
+ assert extract_basic_auth('http://@foo.bar') == ('http://foo.bar', 'Basic Og==')
+ assert extract_basic_auth('http://:pass@foo.bar') == ('http://foo.bar', 'Basic OnBhc3M=')
+ assert extract_basic_auth('http://user:@foo.bar') == ('http://foo.bar', 'Basic dXNlcjo=')
+ assert extract_basic_auth('http://user:pass@foo.bar') == ('http://foo.bar', 'Basic dXNlcjpwYXNz')
+
+ @unittest.skipUnless(compat_os_name == 'nt', 'Only relevant on Windows')
+ def test_windows_escaping(self):
+ tests = [
+ 'test"&',
+ '%CMDCMDLINE:~-1%&',
+ 'a\nb',
+ '"',
+ '\\',
+ '!',
+ '^!',
+ 'a \\ b',
+ 'a \\" b',
+ 'a \\ b\\',
+ # We replace \r with \n
+ ('a\r\ra', 'a\n\na'),
+ ]
+
+ def run_shell(args):
+ stdout, stderr, error = Popen.run(
+ args, text=True, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ assert not stderr
+ assert not error
+ return stdout
+
+ for argument in tests:
+ if isinstance(argument, str):
+ expected = argument
+ else:
+ argument, expected = argument
+
+ args = [sys.executable, '-c', 'import sys; print(end=sys.argv[1])', argument, 'end']
+ assert run_shell(args) == expected
+ assert run_shell(shell_quote(args, shell=True)) == expected
+
if __name__ == '__main__':
unittest.main()