X-Git-Url: https://jfr.im/git/yt-dlp.git/blobdiff_plain/86e5f3ed2e6e71eb81ea4c9e26288f16119ffd0c..61edf57f8f13f6dfd81154174e647eb5fdd26089:/yt_dlp/extractor/iqiyi.py diff --git a/yt_dlp/extractor/iqiyi.py b/yt_dlp/extractor/iqiyi.py index b755aab07..735b44637 100644 --- a/yt_dlp/extractor/iqiyi.py +++ b/yt_dlp/extractor/iqiyi.py @@ -2,22 +2,18 @@ import itertools import re import time +import urllib.parse from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urllib_parse_urlencode, - compat_urllib_parse_unquote -) from .openload import PhantomJSwrapper from ..utils import ( + ExtractorError, clean_html, decode_packed_codes, - ExtractorError, float_or_none, format_field, - get_element_by_id, get_element_by_attribute, + get_element_by_id, int_or_none, js_to_json, ohdave_rsa_encrypt, @@ -34,10 +30,10 @@ def md5_text(text): - return hashlib.md5(text.encode('utf-8')).hexdigest() + return hashlib.md5(text.encode()).hexdigest() -class IqiyiSDK(object): +class IqiyiSDK: def __init__(self, target, ip, timestamp): self.target = target self.ip = ip @@ -45,17 +41,17 @@ def __init__(self, target, ip, timestamp): @staticmethod def split_sum(data): - return compat_str(sum(map(lambda p: int(p, 16), list(data)))) + return str(sum(int(p, 16) for p in data)) @staticmethod def digit_sum(num): if isinstance(num, int): - num = compat_str(num) - return compat_str(sum(map(int, num))) + num = str(num) + return str(sum(map(int, num))) def even_odd(self): - even = self.digit_sum(compat_str(self.timestamp)[::2]) - odd = self.digit_sum(compat_str(self.timestamp)[1::2]) + even = self.digit_sum(str(self.timestamp)[::2]) + odd = self.digit_sum(str(self.timestamp)[1::2]) return even, odd def preprocess(self, chunksize): @@ -69,7 +65,7 @@ def preprocess(self, chunksize): def mod(self, modulus): chunks, ip = self.preprocess(32) - self.target = chunks[0] + ''.join(map(lambda p: compat_str(p % modulus), ip)) + self.target = chunks[0] + ''.join(str(p % modulus) for p in ip) def split(self, chunksize): modulus_map = { @@ -81,7 +77,7 @@ def split(self, chunksize): chunks, ip = self.preprocess(chunksize) ret = '' for i in range(len(chunks)): - ip_part = compat_str(ip[i] % modulus_map[chunksize]) if i < 4 else '' + ip_part = str(ip[i] % modulus_map[chunksize]) if i < 4 else '' if chunksize == 8: ret += ip_part + chunks[i] else: @@ -108,11 +104,11 @@ def date(self, scheme): self.target = md5_text(self.target) d = time.localtime(self.timestamp) strings = { - 'y': compat_str(d.tm_year), + 'y': str(d.tm_year), 'm': '%02d' % d.tm_mon, 'd': '%02d' % d.tm_mday, } - self.target += ''.join(map(lambda c: strings[c], list(scheme))) + self.target += ''.join(strings[c] for c in scheme) def split_time_even_odd(self): even, odd = self.even_odd() @@ -124,14 +120,14 @@ def split_time_odd_even(self): def split_ip_time_sum(self): chunks, ip = self.preprocess(32) - self.target = compat_str(sum(ip)) + chunks[0] + self.digit_sum(self.timestamp) + self.target = str(sum(ip)) + chunks[0] + self.digit_sum(self.timestamp) def split_time_ip_sum(self): chunks, ip = self.preprocess(32) - self.target = self.digit_sum(self.timestamp) + chunks[0] + compat_str(sum(ip)) + self.target = self.digit_sum(self.timestamp) + chunks[0] + str(sum(ip)) -class IqiyiSDKInterpreter(object): +class IqiyiSDKInterpreter: def __init__(self, sdk_code): self.sdk_code = sdk_code @@ -161,7 +157,7 @@ def run(self, target, ip, timestamp): elif function in other_functions: other_functions[function]() else: - raise ExtractorError('Unknown function %s' % function) + raise ExtractorError(f'Unknown function {function}') return sdk.target @@ -181,7 +177,7 @@ class IqiyiIE(InfoExtractor): 'id': '9c1fb1b99d192b21c559e5a1a2cb3c73', 'ext': 'mp4', 'title': '美国德州空中惊现奇异云团 酷似UFO', - } + }, }, { 'url': 'http://www.iqiyi.com/v_19rrhnnclk.html', 'md5': 'b7dc800a4004b1b57749d9abae0472da', @@ -253,8 +249,9 @@ def _perform_login(self, username, password): note='Get token for logging', errnote='Unable to get token for logging') sdk = data['sdk'] timestamp = int(time.time()) - target = '/apis/reglogin/login.action?lang=zh_TW&area_code=null&email=%s&passwd=%s&agenttype=1&from=undefined&keeplogin=0&piccode=&fromurl=&_pos=1' % ( - username, self._rsa_fun(password.encode('utf-8'))) + target = ( + f'/apis/reglogin/login.action?lang=zh_TW&area_code=null&email={username}' + f'&passwd={self._rsa_fun(password.encode())}&agenttype=1&from=undefined&keeplogin=0&piccode=&fromurl=&_pos=1') interp = IqiyiSDKInterpreter(sdk) sign = interp.run(target, data['ip'], timestamp) @@ -268,7 +265,7 @@ def _perform_login(self, username, password): 'bird_t': timestamp, } validation_result = self._download_json( - 'http://kylin.iqiyi.com/validate?' + compat_urllib_parse_urlencode(validation_params), None, + 'http://kylin.iqiyi.com/validate?' + urllib.parse.urlencode(validation_params), None, note='Validate credentials', errnote='Unable to validate credentials') MSG_MAP = { @@ -280,7 +277,7 @@ def _perform_login(self, username, password): if code != 'A00000': msg = MSG_MAP.get(code) if not msg: - msg = 'error %s' % code + msg = f'error {code}' if validation_result.get('msg'): msg += ': ' + validation_result['msg'] self.report_warning('unable to log in: ' + msg) @@ -292,7 +289,7 @@ def get_raw_data(self, tvid, video_id): tm = int(time.time() * 1000) key = 'd5fb4bd9d50c4be6948c97edd7254b0e' - sc = md5_text(compat_str(tm) + key + tvid) + sc = md5_text(str(tm) + key + tvid) params = { 'tvid': tvid, 'vid': video_id, @@ -302,7 +299,7 @@ def get_raw_data(self, tvid, video_id): } return self._download_json( - 'http://cache.m.iqiyi.com/jp/tmts/%s/%s/' % (tvid, video_id), + f'http://cache.m.iqiyi.com/jp/tmts/{tvid}/{video_id}/', video_id, transform_source=lambda s: remove_start(s, 'var tvInfoJs='), query=params, headers=self.geo_verification_headers()) @@ -325,10 +322,10 @@ def _extract_playlist(self, webpage): # Start from 2 because links in the first page are already on webpage for page_num in itertools.count(2): pagelist_page = self._download_webpage( - 'http://cache.video.qiyi.com/jp/avlist/%s/%d/%d/' % (album_id, page_num, PAGE_SIZE), + f'http://cache.video.qiyi.com/jp/avlist/{album_id}/{page_num}/{PAGE_SIZE}/', album_id, - note='Download playlist page %d' % page_num, - errnote='Failed to download playlist page %d' % page_num) + note=f'Download playlist page {page_num}', + errnote=f'Failed to download playlist page {page_num}') pagelist = self._parse_json( remove_start(pagelist_page, 'var tvInfoJs='), album_id) vlist = pagelist['data']['vlist'] @@ -371,7 +368,7 @@ def _real_extract(self, url): for stream in data['vidl']: if 'm3utx' not in stream: continue - vd = compat_str(stream['vd']) + vd = str(stream['vd']) formats.append({ 'url': stream['m3utx'], 'format_id': vd, @@ -385,7 +382,6 @@ def _real_extract(self, url): self._sleep(5, video_id) - self._sort_formats(formats) title = (get_element_by_id('widget-videotitle', webpage) or clean_html(get_element_by_attribute('class', 'mod-play-tit', webpage)) or self._html_search_regex(r']+data-videochanged-title="word"[^>]*>([^<]+)', webpage, 'title')) @@ -421,11 +417,11 @@ class IqIE(InfoExtractor): 'params': { 'format': '500', }, - 'expected_warnings': ['format is restricted'] + 'expected_warnings': ['format is restricted'], }, { # VIP-restricted video 'url': 'https://www.iq.com/play/mermaid-in-the-fog-2021-gbdpx13bs4', - 'only_matching': True + 'only_matching': True, }] _BID_TAGS = { '100': '240P', @@ -441,11 +437,14 @@ class IqIE(InfoExtractor): '1': 'zh_CN', '2': 'zh_TW', '3': 'en', + '4': 'ko', + '5': 'ja', '18': 'th', '21': 'my', '23': 'vi', '24': 'id', '26': 'es', + '27': 'pt', '28': 'ar', } @@ -497,9 +496,10 @@ class IqIE(InfoExtractor): 'tm': tm, 'qdy': 'a', 'qds': 0, - 'k_ft1': 141287244169348, - 'k_ft4': 34359746564, - 'k_ft5': 1, + 'k_ft1': '143486267424900', + 'k_ft4': '1572868', + 'k_ft7': '4', + 'k_ft5': '1', 'bop': JSON.stringify({ 'version': '10.0', 'dfp': dfp @@ -521,20 +521,31 @@ class IqIE(InfoExtractor): ''' def _extract_vms_player_js(self, webpage, video_id): - player_js_cache = self._downloader.cache.load('iq', 'player_js') + player_js_cache = self.cache.load('iq', 'player_js') if player_js_cache: return player_js_cache webpack_js_url = self._proto_relative_url(self._search_regex( - r'