X-Git-Url: https://jfr.im/git/yt-dlp.git/blobdiff_plain/cefecac12cd3c70f9c7a30992c60b05c2eb5d34e..1c1b2f96ae9696ef16b1b27d1a007bf89c683a0c:/test/test_utils.py diff --git a/test/test_utils.py b/test/test_utils.py index 663a34e07..31f168998 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # coding: utf-8 from __future__ import unicode_literals @@ -12,16 +12,20 @@ # Various small unit tests import io +import itertools import json import xml.etree.ElementTree -from youtube_dlc.utils import ( +from yt_dlp.utils import ( age_restricted, args_to_str, encode_base_n, caesar, clean_html, + clean_podcast_url, + Config, date_from_str, + datetime_from_str, DateRange, detect_exe_version, determine_ext, @@ -34,11 +38,18 @@ ExtractorError, find_xpath_attr, fix_xml_ampersands, + format_bytes, float_or_none, get_element_by_class, get_element_by_attribute, get_elements_by_class, get_elements_by_attribute, + get_element_html_by_class, + get_element_html_by_attribute, + get_elements_html_by_class, + get_elements_html_by_attribute, + get_elements_text_and_html_by_attribute, + get_element_text_and_html_by_tag, InAdvancePagedList, int_or_none, intlist_to_bytes, @@ -59,11 +70,13 @@ parse_iso8601, parse_resolution, parse_bitrate, + parse_qs, pkcs1pad, read_batch_urls, sanitize_filename, sanitize_path, sanitize_url, + sanitized_Request, expand_path, prepend_extension, replace_extension, @@ -104,15 +117,16 @@ cli_valueless_option, cli_bool_option, parse_codecs, + iri_to_uri, + LazyList, ) -from youtube_dlc.compat import ( +from yt_dlp.compat import ( compat_chr, compat_etree_fromstring, compat_getenv, + compat_HTMLParseError, compat_os_name, compat_setenv, - compat_urlparse, - compat_parse_qs, ) @@ -122,6 +136,7 @@ def test_timeconvert(self): self.assertTrue(timeconvert('bougrg') is None) def test_sanitize_filename(self): + self.assertEqual(sanitize_filename(''), '') self.assertEqual(sanitize_filename('abc'), 'abc') self.assertEqual(sanitize_filename('abc_d-e'), 'abc_d-e') @@ -145,10 +160,12 @@ def test_sanitize_filename(self): sanitize_filename('New World record at 0:12:34'), 'New World record at 0_12_34') - self.assertEqual(sanitize_filename('--gasdgf'), '_-gasdgf') + self.assertEqual(sanitize_filename('--gasdgf'), '--gasdgf') self.assertEqual(sanitize_filename('--gasdgf', is_id=True), '--gasdgf') - self.assertEqual(sanitize_filename('.gasdgf'), 'gasdgf') + self.assertEqual(sanitize_filename('--gasdgf', is_id=False), '_-gasdgf') + self.assertEqual(sanitize_filename('.gasdgf'), '.gasdgf') self.assertEqual(sanitize_filename('.gasdgf', is_id=True), '.gasdgf') + self.assertEqual(sanitize_filename('.gasdgf', is_id=False), 'gasdgf') forbidden = '"\0\\/' for fc in forbidden: @@ -235,17 +252,27 @@ def test_sanitize_url(self): self.assertEqual(sanitize_url('httpss://foo.bar'), 'https://foo.bar') self.assertEqual(sanitize_url('rmtps://foo.bar'), 'rtmps://foo.bar') self.assertEqual(sanitize_url('https://foo.bar'), 'https://foo.bar') + self.assertEqual(sanitize_url('foo bar'), 'foo bar') + + def test_extract_basic_auth(self): + auth_header = lambda url: sanitized_Request(url).get_header('Authorization') + self.assertFalse(auth_header('http://foo.bar')) + self.assertFalse(auth_header('http://:foo.bar')) + self.assertEqual(auth_header('http://@foo.bar'), 'Basic Og==') + self.assertEqual(auth_header('http://:pass@foo.bar'), 'Basic OnBhc3M=') + self.assertEqual(auth_header('http://user:@foo.bar'), 'Basic dXNlcjo=') + self.assertEqual(auth_header('http://user:pass@foo.bar'), 'Basic dXNlcjpwYXNz') def test_expand_path(self): def env(var): return '%{0}%'.format(var) if sys.platform == 'win32' else '${0}'.format(var) - compat_setenv('youtube_dlc_EXPATH_PATH', 'expanded') - self.assertEqual(expand_path(env('youtube_dlc_EXPATH_PATH')), 'expanded') + compat_setenv('yt_dlp_EXPATH_PATH', 'expanded') + self.assertEqual(expand_path(env('yt_dlp_EXPATH_PATH')), 'expanded') self.assertEqual(expand_path(env('HOME')), compat_getenv('HOME')) self.assertEqual(expand_path('~'), compat_getenv('HOME')) self.assertEqual( - expand_path('~/%s' % env('youtube_dlc_EXPATH_PATH')), + expand_path('~/%s' % env('yt_dlp_EXPATH_PATH')), '%s/expanded' % compat_getenv('HOME')) def test_prepend_extension(self): @@ -309,8 +336,18 @@ def test_date_from_str(self): self.assertEqual(date_from_str('yesterday'), date_from_str('now-1day')) self.assertEqual(date_from_str('now+7day'), date_from_str('now+1week')) self.assertEqual(date_from_str('now+14day'), date_from_str('now+2week')) - self.assertEqual(date_from_str('now+365day'), date_from_str('now+1year')) - self.assertEqual(date_from_str('now+30day'), date_from_str('now+1month')) + self.assertEqual(date_from_str('20200229+365day'), date_from_str('20200229+1year')) + self.assertEqual(date_from_str('20210131+28day'), date_from_str('20210131+1month')) + + def test_datetime_from_str(self): + self.assertEqual(datetime_from_str('yesterday', precision='day'), datetime_from_str('now-1day', precision='auto')) + self.assertEqual(datetime_from_str('now+7day', precision='day'), datetime_from_str('now+1week', precision='auto')) + self.assertEqual(datetime_from_str('now+14day', precision='day'), datetime_from_str('now+2week', precision='auto')) + self.assertEqual(datetime_from_str('20200229+365day', precision='day'), datetime_from_str('20200229+1year', precision='auto')) + self.assertEqual(datetime_from_str('20210131+28day', precision='day'), datetime_from_str('20210131+1month', precision='auto')) + self.assertEqual(datetime_from_str('20210131+59day', precision='day'), datetime_from_str('20210131+2month', precision='auto')) + self.assertEqual(datetime_from_str('now+1day', precision='hour'), datetime_from_str('now+24hours', precision='auto')) + self.assertEqual(datetime_from_str('now+23hours', precision='hour'), datetime_from_str('now+23hours', precision='auto')) def test_daterange(self): _20century = DateRange("19000101", "20000101") @@ -554,6 +591,11 @@ def test_url_or_none(self): self.assertEqual(url_or_none('http$://foo.de'), None) self.assertEqual(url_or_none('http://foo.de'), 'http://foo.de') self.assertEqual(url_or_none('//foo.de'), '//foo.de') + self.assertEqual(url_or_none('s3://foo.de'), None) + self.assertEqual(url_or_none('rtmpte://foo.de'), 'rtmpte://foo.de') + self.assertEqual(url_or_none('mms://foo.de'), 'mms://foo.de') + self.assertEqual(url_or_none('rtspu://foo.de'), 'rtspu://foo.de') + self.assertEqual(url_or_none('ftps://foo.de'), 'ftps://foo.de') def test_parse_age_limit(self): self.assertEqual(parse_age_limit(None), None) @@ -585,6 +627,8 @@ def test_parse_duration(self): self.assertEqual(parse_duration('3h 11m 53s'), 11513) self.assertEqual(parse_duration('3 hours 11 minutes 53 seconds'), 11513) self.assertEqual(parse_duration('3 hours 11 mins 53 secs'), 11513) + self.assertEqual(parse_duration('3 hours, 11 minutes, 53 seconds'), 11513) + self.assertEqual(parse_duration('3 hours, 11 mins, 53 secs'), 11513) self.assertEqual(parse_duration('62m45s'), 3765) self.assertEqual(parse_duration('6m59s'), 419) self.assertEqual(parse_duration('49s'), 49) @@ -603,6 +647,8 @@ def test_parse_duration(self): self.assertEqual(parse_duration('PT1H0.040S'), 3600.04) self.assertEqual(parse_duration('PT00H03M30SZ'), 210) self.assertEqual(parse_duration('P0Y0M0DT0H4M20.880S'), 260.88) + self.assertEqual(parse_duration('01:02:03:050'), 3723.05) + self.assertEqual(parse_duration('103:050'), 103.05) def test_fix_xml_ampersands(self): self.assertEqual( @@ -656,38 +702,36 @@ def test_urlencode_postdata(self): self.assertTrue(isinstance(data, bytes)) def test_update_url_query(self): - def query_dict(url): - return compat_parse_qs(compat_urlparse.urlparse(url).query) - self.assertEqual(query_dict(update_url_query( + self.assertEqual(parse_qs(update_url_query( 'http://example.com/path', {'quality': ['HD'], 'format': ['mp4']})), - query_dict('http://example.com/path?quality=HD&format=mp4')) - self.assertEqual(query_dict(update_url_query( + parse_qs('http://example.com/path?quality=HD&format=mp4')) + self.assertEqual(parse_qs(update_url_query( 'http://example.com/path', {'system': ['LINUX', 'WINDOWS']})), - query_dict('http://example.com/path?system=LINUX&system=WINDOWS')) - self.assertEqual(query_dict(update_url_query( + parse_qs('http://example.com/path?system=LINUX&system=WINDOWS')) + self.assertEqual(parse_qs(update_url_query( 'http://example.com/path', {'fields': 'id,formats,subtitles'})), - query_dict('http://example.com/path?fields=id,formats,subtitles')) - self.assertEqual(query_dict(update_url_query( + parse_qs('http://example.com/path?fields=id,formats,subtitles')) + self.assertEqual(parse_qs(update_url_query( 'http://example.com/path', {'fields': ('id,formats,subtitles', 'thumbnails')})), - query_dict('http://example.com/path?fields=id,formats,subtitles&fields=thumbnails')) - self.assertEqual(query_dict(update_url_query( + parse_qs('http://example.com/path?fields=id,formats,subtitles&fields=thumbnails')) + self.assertEqual(parse_qs(update_url_query( 'http://example.com/path?manifest=f4m', {'manifest': []})), - query_dict('http://example.com/path')) - self.assertEqual(query_dict(update_url_query( + parse_qs('http://example.com/path')) + self.assertEqual(parse_qs(update_url_query( 'http://example.com/path?system=LINUX&system=WINDOWS', {'system': 'LINUX'})), - query_dict('http://example.com/path?system=LINUX')) - self.assertEqual(query_dict(update_url_query( + parse_qs('http://example.com/path?system=LINUX')) + self.assertEqual(parse_qs(update_url_query( 'http://example.com/path', {'fields': b'id,formats,subtitles'})), - query_dict('http://example.com/path?fields=id,formats,subtitles')) - self.assertEqual(query_dict(update_url_query( + parse_qs('http://example.com/path?fields=id,formats,subtitles')) + self.assertEqual(parse_qs(update_url_query( 'http://example.com/path', {'width': 1080, 'height': 720})), - query_dict('http://example.com/path?width=1080&height=720')) - self.assertEqual(query_dict(update_url_query( + parse_qs('http://example.com/path?width=1080&height=720')) + self.assertEqual(parse_qs(update_url_query( 'http://example.com/path', {'bitrate': 5020.43})), - query_dict('http://example.com/path?bitrate=5020.43')) - self.assertEqual(query_dict(update_url_query( + parse_qs('http://example.com/path?bitrate=5020.43')) + self.assertEqual(parse_qs(update_url_query( 'http://example.com/path', {'test': '第二行тест'})), - query_dict('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82')) + parse_qs('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82')) def test_multipart_encode(self): self.assertEqual( @@ -803,6 +847,8 @@ def test_mimetype2ext(self): self.assertEqual(mimetype2ext('text/vtt'), 'vtt') self.assertEqual(mimetype2ext('text/vtt;charset=utf-8'), 'vtt') self.assertEqual(mimetype2ext('text/html; charset=utf-8'), 'html') + self.assertEqual(mimetype2ext('audio/x-wav'), 'wav') + self.assertEqual(mimetype2ext('audio/x-wav;codec=pcm'), 'wav') def test_month_by_name(self): self.assertEqual(month_by_name(None), None) @@ -817,30 +863,52 @@ def test_parse_codecs(self): self.assertEqual(parse_codecs('avc1.77.30, mp4a.40.2'), { 'vcodec': 'avc1.77.30', 'acodec': 'mp4a.40.2', + 'dynamic_range': None, }) self.assertEqual(parse_codecs('mp4a.40.2'), { 'vcodec': 'none', 'acodec': 'mp4a.40.2', + 'dynamic_range': None, }) self.assertEqual(parse_codecs('mp4a.40.5,avc1.42001e'), { 'vcodec': 'avc1.42001e', 'acodec': 'mp4a.40.5', + 'dynamic_range': None, }) self.assertEqual(parse_codecs('avc3.640028'), { 'vcodec': 'avc3.640028', 'acodec': 'none', + 'dynamic_range': None, }) self.assertEqual(parse_codecs(', h264,,newcodec,aac'), { 'vcodec': 'h264', 'acodec': 'aac', + 'dynamic_range': None, }) self.assertEqual(parse_codecs('av01.0.05M.08'), { 'vcodec': 'av01.0.05M.08', 'acodec': 'none', + 'dynamic_range': None, + }) + self.assertEqual(parse_codecs('vp9.2'), { + 'vcodec': 'vp9.2', + 'acodec': 'none', + 'dynamic_range': 'HDR10', + }) + self.assertEqual(parse_codecs('av01.0.12M.10.0.110.09.16.09.0'), { + 'vcodec': 'av01.0.12M.10', + 'acodec': 'none', + 'dynamic_range': 'HDR10', + }) + self.assertEqual(parse_codecs('dvhe'), { + 'vcodec': 'dvhe', + 'acodec': 'none', + 'dynamic_range': 'DV', }) self.assertEqual(parse_codecs('theora, vorbis'), { 'vcodec': 'theora', 'acodec': 'vorbis', + 'dynamic_range': None, }) self.assertEqual(parse_codecs('unknownvcodec, unknownacodec'), { 'vcodec': 'unknownvcodec', @@ -935,6 +1003,28 @@ def test_js_to_json_edgecases(self): self.assertEqual(d['x'], 1) self.assertEqual(d['y'], 'a') + # Just drop ! prefix for now though this results in a wrong value + on = js_to_json('''{ + a: !0, + b: !1, + c: !!0, + d: !!42.42, + e: !!![], + f: !"abc", + g: !"", + !42: 42 + }''') + self.assertEqual(json.loads(on), { + 'a': 0, + 'b': 1, + 'c': 0, + 'd': 42.42, + 'e': [], + 'f': "abc", + 'g': "", + '42': 42 + }) + on = js_to_json('["abc", "def",]') self.assertEqual(json.loads(on), ['abc', 'def']) @@ -992,6 +1082,15 @@ def test_js_to_json_edgecases(self): on = js_to_json('{42:4.2e1}') self.assertEqual(json.loads(on), {'42': 42.0}) + on = js_to_json('{ "0x40": "0x40" }') + self.assertEqual(json.loads(on), {'0x40': '0x40'}) + + on = js_to_json('{ "040": "040" }') + self.assertEqual(json.loads(on), {'040': '040'}) + + on = js_to_json('[1,//{},\n2]') + self.assertEqual(json.loads(on), [1, 2]) + def test_js_to_json_malformed(self): self.assertEqual(js_to_json('42a1'), '42"a1"') self.assertEqual(js_to_json('42a-1'), '42"a"-1') @@ -1038,7 +1137,7 @@ def test_extract_attributes(self): def test_clean_html(self): self.assertEqual(clean_html('a:\nb'), 'a: b') - self.assertEqual(clean_html('a:\n "b"'), 'a: "b"') + self.assertEqual(clean_html('a:\n "b"'), 'a: "b"') self.assertEqual(clean_html('a
\xa0b'), 'a\nb') def test_intlist_to_bytes(self): @@ -1072,19 +1171,29 @@ def test_parse_count(self): self.assertEqual(parse_count('1000'), 1000) self.assertEqual(parse_count('1.000'), 1000) self.assertEqual(parse_count('1.1k'), 1100) + self.assertEqual(parse_count('1.1 k'), 1100) + self.assertEqual(parse_count('1,1 k'), 1100) self.assertEqual(parse_count('1.1kk'), 1100000) self.assertEqual(parse_count('1.1kk '), 1100000) + self.assertEqual(parse_count('1,1kk'), 1100000) + self.assertEqual(parse_count('100 views'), 100) + self.assertEqual(parse_count('1,100 views'), 1100) self.assertEqual(parse_count('1.1kk views'), 1100000) + self.assertEqual(parse_count('10M views'), 10000000) + self.assertEqual(parse_count('has 10M views'), 10000000) def test_parse_resolution(self): self.assertEqual(parse_resolution(None), {}) self.assertEqual(parse_resolution(''), {}) - self.assertEqual(parse_resolution('1920x1080'), {'width': 1920, 'height': 1080}) - self.assertEqual(parse_resolution('1920×1080'), {'width': 1920, 'height': 1080}) + self.assertEqual(parse_resolution(' 1920x1080'), {'width': 1920, 'height': 1080}) + self.assertEqual(parse_resolution('1920×1080 '), {'width': 1920, 'height': 1080}) self.assertEqual(parse_resolution('1920 x 1080'), {'width': 1920, 'height': 1080}) self.assertEqual(parse_resolution('720p'), {'height': 720}) self.assertEqual(parse_resolution('4k'), {'height': 2160}) self.assertEqual(parse_resolution('8K'), {'height': 4320}) + self.assertEqual(parse_resolution('pre_1920x1080_post'), {'width': 1920, 'height': 1080}) + self.assertEqual(parse_resolution('ep1x2'), {}) + self.assertEqual(parse_resolution('1920, 1080'), {'width': 1920, 'height': 1080}) def test_parse_bitrate(self): self.assertEqual(parse_bitrate(None), None) @@ -1133,21 +1242,72 @@ def test_is_html(self): b'\xFF\xFE\x00\x00<\x00\x00\x00h\x00\x00\x00t\x00\x00\x00m\x00\x00\x00l\x00\x00\x00>\x00\x00\x00\xe4\x00\x00\x00')) def test_render_table(self): + self.assertEqual( + render_table( + ['a', 'empty', 'bcd'], + [[123, '', 4], [9999, '', 51]]), + 'a empty bcd\n' + '123 4\n' + '9999 51') + + self.assertEqual( + render_table( + ['a', 'empty', 'bcd'], + [[123, '', 4], [9999, '', 51]], + hide_empty=True), + 'a bcd\n' + '123 4\n' + '9999 51') + + self.assertEqual( + render_table( + ['\ta', 'bcd'], + [['1\t23', 4], ['\t9999', 51]]), + ' a bcd\n' + '1 23 4\n' + '9999 51') + self.assertEqual( render_table( ['a', 'bcd'], - [[123, 4], [9999, 51]]), + [[123, 4], [9999, 51]], + delim='-'), 'a bcd\n' + '--------\n' '123 4\n' '9999 51') + self.assertEqual( + render_table( + ['a', 'bcd'], + [[123, 4], [9999, 51]], + delim='-', extra_gap=2), + 'a bcd\n' + '----------\n' + '123 4\n' + '9999 51') + def test_match_str(self): - self.assertRaises(ValueError, match_str, 'xy>foobar', {}) + # Unary self.assertFalse(match_str('xy', {'x': 1200})) self.assertTrue(match_str('!xy', {'x': 1200})) self.assertTrue(match_str('x', {'x': 1200})) self.assertFalse(match_str('!x', {'x': 1200})) self.assertTrue(match_str('x', {'x': 0})) + self.assertTrue(match_str('is_live', {'is_live': True})) + self.assertFalse(match_str('is_live', {'is_live': False})) + self.assertFalse(match_str('is_live', {'is_live': None})) + self.assertFalse(match_str('is_live', {})) + self.assertFalse(match_str('!is_live', {'is_live': True})) + self.assertTrue(match_str('!is_live', {'is_live': False})) + self.assertTrue(match_str('!is_live', {'is_live': None})) + self.assertTrue(match_str('!is_live', {})) + self.assertTrue(match_str('title', {'title': 'abc'})) + self.assertTrue(match_str('title', {'title': ''})) + self.assertFalse(match_str('!title', {'title': 'abc'})) + self.assertFalse(match_str('!title', {'title': ''})) + + # Numeric self.assertFalse(match_str('x>0', {'x': 0})) self.assertFalse(match_str('x>0', {})) self.assertTrue(match_str('x>?0', {})) @@ -1155,10 +1315,26 @@ def test_match_str(self): self.assertFalse(match_str('x>2K', {'x': 1200})) self.assertTrue(match_str('x>=1200 & x < 1300', {'x': 1200})) self.assertFalse(match_str('x>=1100 & x < 1200', {'x': 1200})) + self.assertTrue(match_str('x > 1:0:0', {'x': 3700})) + + # String self.assertFalse(match_str('y=a212', {'y': 'foobar42'})) self.assertTrue(match_str('y=foobar42', {'y': 'foobar42'})) self.assertFalse(match_str('y!=foobar42', {'y': 'foobar42'})) self.assertTrue(match_str('y!=foobar2', {'y': 'foobar42'})) + self.assertTrue(match_str('y^=foo', {'y': 'foobar42'})) + self.assertFalse(match_str('y!^=foo', {'y': 'foobar42'})) + self.assertFalse(match_str('y^=bar', {'y': 'foobar42'})) + self.assertTrue(match_str('y!^=bar', {'y': 'foobar42'})) + self.assertRaises(ValueError, match_str, 'x^=42', {'x': 42}) + self.assertTrue(match_str('y*=bar', {'y': 'foobar42'})) + self.assertFalse(match_str('y!*=bar', {'y': 'foobar42'})) + self.assertFalse(match_str('y*=baz', {'y': 'foobar42'})) + self.assertTrue(match_str('y!*=baz', {'y': 'foobar42'})) + self.assertTrue(match_str('y$=42', {'y': 'foobar42'})) + self.assertFalse(match_str('y$=43', {'y': 'foobar42'})) + + # And self.assertFalse(match_str( 'like_count > 100 & dislike_count 100 & dislike_count ?100 & description~='(?i)\bcats \& dogs\b'", + {'description': 'Raining Cats & Dogs'})) + + # Incomplete + self.assertFalse(match_str('id!=foo', {'id': 'foo'}, True)) + self.assertTrue(match_str('x', {'id': 'foo'}, True)) + self.assertTrue(match_str('!x', {'id': 'foo'}, True)) + self.assertFalse(match_str('x', {'id': 'foo'}, False)) def test_parse_dfxp_time_expr(self): self.assertEqual(parse_dfxp_time_expr(None), None) @@ -1259,21 +1452,21 @@ def test_dfxp2srt(self): '''.encode('utf-8') srt_data = '''1 -00:00:02,080 --> 00:00:05,839 +00:00:02,080 --> 00:00:05,840 default stylecustom style 2 -00:00:02,080 --> 00:00:05,839 +00:00:02,080 --> 00:00:05,840 part 1 part 2 3 -00:00:05,839 --> 00:00:09,560 +00:00:05,840 --> 00:00:09,560 line 3 part 3 4 -00:00:09,560 --> 00:00:12,359 +00:00:09,560 --> 00:00:12,360 inner style @@ -1388,53 +1581,220 @@ def test_caesar(self): self.assertEqual(caesar('ebg', 'acegik', -2), 'abc') def test_rot47(self): - self.assertEqual(rot47('youtube-dlc'), r'J@FEF36\5=') - self.assertEqual(rot47('youtube-dlc'), r'*~&%&qt\s{') + self.assertEqual(rot47('yt-dlp'), r'JE\5=A') + self.assertEqual(rot47('YT-DLP'), r'*%\s{!') def test_urshift(self): self.assertEqual(urshift(3, 1), 1) self.assertEqual(urshift(-3, 1), 2147483646) + GET_ELEMENT_BY_CLASS_TEST_STRING = ''' + nice + ''' + def test_get_element_by_class(self): - html = ''' - nice - ''' + html = self.GET_ELEMENT_BY_CLASS_TEST_STRING self.assertEqual(get_element_by_class('foo', html), 'nice') self.assertEqual(get_element_by_class('no-such-class', html), None) + def test_get_element_html_by_class(self): + html = self.GET_ELEMENT_BY_CLASS_TEST_STRING + + self.assertEqual(get_element_html_by_class('foo', html), html.strip()) + self.assertEqual(get_element_by_class('no-such-class', html), None) + + GET_ELEMENT_BY_ATTRIBUTE_TEST_STRING = ''' + + ''' + def test_get_element_by_attribute(self): - html = ''' - nice - ''' + html = self.GET_ELEMENT_BY_CLASS_TEST_STRING self.assertEqual(get_element_by_attribute('class', 'foo bar', html), 'nice') self.assertEqual(get_element_by_attribute('class', 'foo', html), None) self.assertEqual(get_element_by_attribute('class', 'no-such-foo', html), None) - html = ''' - - ''' + html = self.GET_ELEMENT_BY_ATTRIBUTE_TEST_STRING self.assertEqual(get_element_by_attribute('itemprop', 'author', html), 'foo') + def test_get_element_html_by_attribute(self): + html = self.GET_ELEMENT_BY_CLASS_TEST_STRING + + self.assertEqual(get_element_html_by_attribute('class', 'foo bar', html), html.strip()) + self.assertEqual(get_element_html_by_attribute('class', 'foo', html), None) + self.assertEqual(get_element_html_by_attribute('class', 'no-such-foo', html), None) + + html = self.GET_ELEMENT_BY_ATTRIBUTE_TEST_STRING + + self.assertEqual(get_element_html_by_attribute('itemprop', 'author', html), html.strip()) + + GET_ELEMENTS_BY_CLASS_TEST_STRING = ''' + nicealso nice + ''' + GET_ELEMENTS_BY_CLASS_RES = ['nice', 'also nice'] + def test_get_elements_by_class(self): - html = ''' - nicealso nice - ''' + html = self.GET_ELEMENTS_BY_CLASS_TEST_STRING self.assertEqual(get_elements_by_class('foo', html), ['nice', 'also nice']) self.assertEqual(get_elements_by_class('no-such-class', html), []) + def test_get_elements_html_by_class(self): + html = self.GET_ELEMENTS_BY_CLASS_TEST_STRING + + self.assertEqual(get_elements_html_by_class('foo', html), self.GET_ELEMENTS_BY_CLASS_RES) + self.assertEqual(get_elements_html_by_class('no-such-class', html), []) + def test_get_elements_by_attribute(self): - html = ''' - nicealso nice - ''' + html = self.GET_ELEMENTS_BY_CLASS_TEST_STRING self.assertEqual(get_elements_by_attribute('class', 'foo bar', html), ['nice', 'also nice']) self.assertEqual(get_elements_by_attribute('class', 'foo', html), []) self.assertEqual(get_elements_by_attribute('class', 'no-such-foo', html), []) + def test_get_elements_html_by_attribute(self): + html = self.GET_ELEMENTS_BY_CLASS_TEST_STRING + + self.assertEqual(get_elements_html_by_attribute('class', 'foo bar', html), self.GET_ELEMENTS_BY_CLASS_RES) + self.assertEqual(get_elements_html_by_attribute('class', 'foo', html), []) + self.assertEqual(get_elements_html_by_attribute('class', 'no-such-foo', html), []) + + def test_get_elements_text_and_html_by_attribute(self): + html = self.GET_ELEMENTS_BY_CLASS_TEST_STRING + + self.assertEqual( + list(get_elements_text_and_html_by_attribute('class', 'foo bar', html)), + list(zip(['nice', 'also nice'], self.GET_ELEMENTS_BY_CLASS_RES))) + self.assertEqual(list(get_elements_text_and_html_by_attribute('class', 'foo', html)), []) + self.assertEqual(list(get_elements_text_and_html_by_attribute('class', 'no-such-foo', html)), []) + + GET_ELEMENT_BY_TAG_TEST_STRING = ''' + random text lorem ipsum

+
+ this should be returned + this should also be returned +
+ this should also be returned +
+ closing tag above should not trick, so this should also be returned +
+ but this text should not be returned + ''' + GET_ELEMENT_BY_TAG_RES_OUTERDIV_HTML = GET_ELEMENT_BY_TAG_TEST_STRING.strip()[32:276] + GET_ELEMENT_BY_TAG_RES_OUTERDIV_TEXT = GET_ELEMENT_BY_TAG_RES_OUTERDIV_HTML[5:-6] + GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML = GET_ELEMENT_BY_TAG_TEST_STRING.strip()[78:119] + GET_ELEMENT_BY_TAG_RES_INNERSPAN_TEXT = GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML[6:-7] + + def test_get_element_text_and_html_by_tag(self): + html = self.GET_ELEMENT_BY_TAG_TEST_STRING + + self.assertEqual( + get_element_text_and_html_by_tag('div', html), + (self.GET_ELEMENT_BY_TAG_RES_OUTERDIV_TEXT, self.GET_ELEMENT_BY_TAG_RES_OUTERDIV_HTML)) + self.assertEqual( + get_element_text_and_html_by_tag('span', html), + (self.GET_ELEMENT_BY_TAG_RES_INNERSPAN_TEXT, self.GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML)) + self.assertRaises(compat_HTMLParseError, get_element_text_and_html_by_tag, 'article', html) + + def test_iri_to_uri(self): + self.assertEqual( + iri_to_uri('https://www.google.com/search?q=foo&ie=utf-8&oe=utf-8&client=firefox-b'), + 'https://www.google.com/search?q=foo&ie=utf-8&oe=utf-8&client=firefox-b') # Same + self.assertEqual( + iri_to_uri('https://www.google.com/search?q=Käsesoßenrührlöffel'), # German for cheese sauce stirring spoon + 'https://www.google.com/search?q=K%C3%A4seso%C3%9Fenr%C3%BChrl%C3%B6ffel') + self.assertEqual( + iri_to_uri('https://www.google.com/search?q=lt<+gt>+eq%3D+amp%26+percent%25+hash%23+colon%3A+tilde~#trash=?&garbage=#'), + 'https://www.google.com/search?q=lt%3C+gt%3E+eq%3D+amp%26+percent%25+hash%23+colon%3A+tilde~#trash=?&garbage=#') + self.assertEqual( + iri_to_uri('http://правозащита38.рф/category/news/'), + 'http://xn--38-6kcaak9aj5chl4a3g.xn--p1ai/category/news/') + self.assertEqual( + iri_to_uri('http://www.правозащита38.рф/category/news/'), + 'http://www.xn--38-6kcaak9aj5chl4a3g.xn--p1ai/category/news/') + self.assertEqual( + iri_to_uri('https://i❤.ws/emojidomain/👍👏🤝💪'), + 'https://xn--i-7iq.ws/emojidomain/%F0%9F%91%8D%F0%9F%91%8F%F0%9F%A4%9D%F0%9F%92%AA') + self.assertEqual( + iri_to_uri('http://日本語.jp/'), + 'http://xn--wgv71a119e.jp/') + self.assertEqual( + iri_to_uri('http://导航.中国/'), + 'http://xn--fet810g.xn--fiqs8s/') + + def test_clean_podcast_url(self): + self.assertEqual(clean_podcast_url('https://www.podtrac.com/pts/redirect.mp3/chtbl.com/track/5899E/traffic.megaphone.fm/HSW7835899191.mp3'), 'https://traffic.megaphone.fm/HSW7835899191.mp3') + self.assertEqual(clean_podcast_url('https://play.podtrac.com/npr-344098539/edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3'), 'https://edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3') + + def test_LazyList(self): + it = list(range(10)) + + self.assertEqual(list(LazyList(it)), it) + self.assertEqual(LazyList(it).exhaust(), it) + self.assertEqual(LazyList(it)[5], it[5]) + + self.assertEqual(LazyList(it)[5:], it[5:]) + self.assertEqual(LazyList(it)[:5], it[:5]) + self.assertEqual(LazyList(it)[::2], it[::2]) + self.assertEqual(LazyList(it)[1::2], it[1::2]) + self.assertEqual(LazyList(it)[5::-1], it[5::-1]) + self.assertEqual(LazyList(it)[6:2:-2], it[6:2:-2]) + self.assertEqual(LazyList(it)[::-1], it[::-1]) + + self.assertTrue(LazyList(it)) + self.assertFalse(LazyList(range(0))) + self.assertEqual(len(LazyList(it)), len(it)) + self.assertEqual(repr(LazyList(it)), repr(it)) + self.assertEqual(str(LazyList(it)), str(it)) + + self.assertEqual(list(LazyList(it, reverse=True)), it[::-1]) + self.assertEqual(list(reversed(LazyList(it))[::-1]), it) + self.assertEqual(list(reversed(LazyList(it))[1:3:7]), it[::-1][1:3:7]) + + def test_LazyList_laziness(self): + + def test(ll, idx, val, cache): + self.assertEqual(ll[idx], val) + self.assertEqual(getattr(ll, '_LazyList__cache'), list(cache)) + + ll = LazyList(range(10)) + test(ll, 0, 0, range(1)) + test(ll, 5, 5, range(6)) + test(ll, -3, 7, range(10)) + + ll = LazyList(range(10), reverse=True) + test(ll, -1, 0, range(1)) + test(ll, 3, 6, range(10)) + + ll = LazyList(itertools.count()) + test(ll, 10, 10, range(11)) + ll = reversed(ll) + test(ll, -15, 14, range(15)) + + def test_format_bytes(self): + self.assertEqual(format_bytes(0), '0.00B') + self.assertEqual(format_bytes(1000), '1000.00B') + self.assertEqual(format_bytes(1024), '1.00KiB') + self.assertEqual(format_bytes(1024**2), '1.00MiB') + self.assertEqual(format_bytes(1024**3), '1.00GiB') + self.assertEqual(format_bytes(1024**4), '1.00TiB') + self.assertEqual(format_bytes(1024**5), '1.00PiB') + self.assertEqual(format_bytes(1024**6), '1.00EiB') + self.assertEqual(format_bytes(1024**7), '1.00ZiB') + self.assertEqual(format_bytes(1024**8), '1.00YiB') + self.assertEqual(format_bytes(1024**9), '1024.00YiB') + + def test_hide_login_info(self): + self.assertEqual(Config.hide_login_info(['-u', 'foo', '-p', 'bar']), + ['-u', 'PRIVATE', '-p', 'PRIVATE']) + self.assertEqual(Config.hide_login_info(['-u']), ['-u']) + self.assertEqual(Config.hide_login_info(['-u', 'foo', '-u', 'bar']), + ['-u', 'PRIVATE', '-u', 'PRIVATE']) + self.assertEqual(Config.hide_login_info(['--username=foo']), + ['--username=PRIVATE']) + if __name__ == '__main__': unittest.main()