]>
Commit | Line | Data |
---|---|---|
605ec701 | 1 | # coding: utf-8 |
605ec701 P |
2 | from __future__ import unicode_literals |
3 | ||
958d0b65 | 4 | import hashlib |
73f9c286 | 5 | import itertools |
99709cc3 | 6 | import re |
605ec701 | 7 | import time |
958d0b65 YCH |
8 | |
9 | from .common import InfoExtractor | |
8e0548e1 | 10 | from ..compat import ( |
99709cc3 | 11 | compat_str, |
15707c7e | 12 | compat_urllib_parse_urlencode, |
8e0548e1 YCH |
13 | ) |
14 | from ..utils import ( | |
2644e911 | 15 | clean_html, |
f52354a8 | 16 | decode_packed_codes, |
2644e911 YCH |
17 | get_element_by_id, |
18 | get_element_by_attribute, | |
8e0548e1 | 19 | ExtractorError, |
99709cc3 | 20 | ohdave_rsa_encrypt, |
73f9c286 | 21 | remove_start, |
8e0548e1 | 22 | ) |
605ec701 | 23 | |
f1da8610 | 24 | |
99709cc3 YCH |
25 | def md5_text(text): |
26 | return hashlib.md5(text.encode('utf-8')).hexdigest() | |
27 | ||
28 | ||
29 | class IqiyiSDK(object): | |
30 | def __init__(self, target, ip, timestamp): | |
31 | self.target = target | |
32 | self.ip = ip | |
33 | self.timestamp = timestamp | |
34 | ||
35 | @staticmethod | |
36 | def split_sum(data): | |
37 | return compat_str(sum(map(lambda p: int(p, 16), list(data)))) | |
38 | ||
39 | @staticmethod | |
40 | def digit_sum(num): | |
41 | if isinstance(num, int): | |
42 | num = compat_str(num) | |
43 | return compat_str(sum(map(int, num))) | |
44 | ||
45 | def even_odd(self): | |
46 | even = self.digit_sum(compat_str(self.timestamp)[::2]) | |
47 | odd = self.digit_sum(compat_str(self.timestamp)[1::2]) | |
48 | return even, odd | |
49 | ||
50 | def preprocess(self, chunksize): | |
51 | self.target = md5_text(self.target) | |
52 | chunks = [] | |
53 | for i in range(32 // chunksize): | |
54 | chunks.append(self.target[chunksize * i:chunksize * (i + 1)]) | |
55 | if 32 % chunksize: | |
56 | chunks.append(self.target[32 - 32 % chunksize:]) | |
57 | return chunks, list(map(int, self.ip.split('.'))) | |
58 | ||
59 | def mod(self, modulus): | |
60 | chunks, ip = self.preprocess(32) | |
61 | self.target = chunks[0] + ''.join(map(lambda p: compat_str(p % modulus), ip)) | |
62 | ||
63 | def split(self, chunksize): | |
64 | modulus_map = { | |
65 | 4: 256, | |
66 | 5: 10, | |
67 | 8: 100, | |
68 | } | |
69 | ||
70 | chunks, ip = self.preprocess(chunksize) | |
71 | ret = '' | |
72 | for i in range(len(chunks)): | |
73 | ip_part = compat_str(ip[i] % modulus_map[chunksize]) if i < 4 else '' | |
74 | if chunksize == 8: | |
75 | ret += ip_part + chunks[i] | |
76 | else: | |
77 | ret += chunks[i] + ip_part | |
78 | self.target = ret | |
79 | ||
80 | def handle_input16(self): | |
81 | self.target = md5_text(self.target) | |
82 | self.target = self.split_sum(self.target[:16]) + self.target + self.split_sum(self.target[16:]) | |
83 | ||
84 | def handle_input8(self): | |
85 | self.target = md5_text(self.target) | |
86 | ret = '' | |
87 | for i in range(4): | |
88 | part = self.target[8 * i:8 * (i + 1)] | |
89 | ret += self.split_sum(part) + part | |
90 | self.target = ret | |
91 | ||
92 | def handleSum(self): | |
93 | self.target = md5_text(self.target) | |
94 | self.target = self.split_sum(self.target) + self.target | |
95 | ||
96 | def date(self, scheme): | |
97 | self.target = md5_text(self.target) | |
98 | d = time.localtime(self.timestamp) | |
99 | strings = { | |
100 | 'y': compat_str(d.tm_year), | |
101 | 'm': '%02d' % d.tm_mon, | |
102 | 'd': '%02d' % d.tm_mday, | |
103 | } | |
104 | self.target += ''.join(map(lambda c: strings[c], list(scheme))) | |
105 | ||
106 | def split_time_even_odd(self): | |
107 | even, odd = self.even_odd() | |
108 | self.target = odd + md5_text(self.target) + even | |
109 | ||
110 | def split_time_odd_even(self): | |
111 | even, odd = self.even_odd() | |
112 | self.target = even + md5_text(self.target) + odd | |
113 | ||
114 | def split_ip_time_sum(self): | |
115 | chunks, ip = self.preprocess(32) | |
116 | self.target = compat_str(sum(ip)) + chunks[0] + self.digit_sum(self.timestamp) | |
117 | ||
118 | def split_time_ip_sum(self): | |
119 | chunks, ip = self.preprocess(32) | |
120 | self.target = self.digit_sum(self.timestamp) + chunks[0] + compat_str(sum(ip)) | |
121 | ||
122 | ||
123 | class IqiyiSDKInterpreter(object): | |
99709cc3 YCH |
124 | def __init__(self, sdk_code): |
125 | self.sdk_code = sdk_code | |
126 | ||
99709cc3 | 127 | def run(self, target, ip, timestamp): |
f52354a8 | 128 | self.sdk_code = decode_packed_codes(self.sdk_code) |
99709cc3 YCH |
129 | |
130 | functions = re.findall(r'input=([a-zA-Z0-9]+)\(input', self.sdk_code) | |
131 | ||
132 | sdk = IqiyiSDK(target, ip, timestamp) | |
133 | ||
134 | other_functions = { | |
135 | 'handleSum': sdk.handleSum, | |
136 | 'handleInput8': sdk.handle_input8, | |
137 | 'handleInput16': sdk.handle_input16, | |
138 | 'splitTimeEvenOdd': sdk.split_time_even_odd, | |
139 | 'splitTimeOddEven': sdk.split_time_odd_even, | |
140 | 'splitIpTimeSum': sdk.split_ip_time_sum, | |
141 | 'splitTimeIpSum': sdk.split_time_ip_sum, | |
142 | } | |
143 | for function in functions: | |
144 | if re.match(r'mod\d+', function): | |
145 | sdk.mod(int(function[3:])) | |
146 | elif re.match(r'date[ymd]{3}', function): | |
147 | sdk.date(function[4:]) | |
148 | elif re.match(r'split\d+', function): | |
149 | sdk.split(int(function[5:])) | |
150 | elif function in other_functions: | |
151 | other_functions[function]() | |
152 | else: | |
8bdd16b4 | 153 | raise ExtractorError('Unknown function %s' % function) |
99709cc3 YCH |
154 | |
155 | return sdk.target | |
156 | ||
157 | ||
605ec701 P |
158 | class IqiyiIE(InfoExtractor): |
159 | IE_NAME = 'iqiyi' | |
44c514eb | 160 | IE_DESC = '爱奇艺' |
605ec701 | 161 | |
7e176eff | 162 | _VALID_URL = r'https?://(?:(?:[^.]+\.)?iqiyi\.com|www\.pps\.tv)/.+\.html' |
605ec701 | 163 | |
99709cc3 YCH |
164 | _NETRC_MACHINE = 'iqiyi' |
165 | ||
99481135 | 166 | _TESTS = [{ |
f1da8610 | 167 | 'url': 'http://www.iqiyi.com/v_19rrojlavg.html', |
3a212ed6 | 168 | # MD5 checksum differs on my machine and Travis CI |
f1da8610 YCH |
169 | 'info_dict': { |
170 | 'id': '9c1fb1b99d192b21c559e5a1a2cb3c73', | |
5b6ad863 | 171 | 'ext': 'mp4', |
f1da8610 | 172 | 'title': '美国德州空中惊现奇异云团 酷似UFO', |
f1da8610 | 173 | } |
99481135 YCH |
174 | }, { |
175 | 'url': 'http://www.iqiyi.com/v_19rrhnnclk.html', | |
68c22c4c | 176 | 'md5': 'b7dc800a4004b1b57749d9abae0472da', |
99481135 YCH |
177 | 'info_dict': { |
178 | 'id': 'e3f585b550a280af23c98b6cb2be19fb', | |
5b6ad863 | 179 | 'ext': 'mp4', |
68c22c4c YCH |
180 | # This can be either Simplified Chinese or Traditional Chinese |
181 | 'title': r're:^(?:名侦探柯南 国语版:第752集 迫近灰原秘密的黑影 下篇|名偵探柯南 國語版:第752集 迫近灰原秘密的黑影 下篇)$', | |
99481135 | 182 | }, |
fc3996bf | 183 | 'skip': 'Geo-restricted to China', |
59185202 YCH |
184 | }, { |
185 | 'url': 'http://www.iqiyi.com/w_19rt6o8t9p.html', | |
186 | 'only_matching': True, | |
187 | }, { | |
188 | 'url': 'http://www.iqiyi.com/a_19rrhbc6kt.html', | |
189 | 'only_matching': True, | |
190 | }, { | |
191 | 'url': 'http://yule.iqiyi.com/pcb.html', | |
01cb5701 YCH |
192 | 'info_dict': { |
193 | 'id': '4a0af228fddb55ec96398a364248ed7f', | |
194 | 'ext': 'mp4', | |
195 | 'title': '第2017-04-21期 女艺人频遭极端粉丝骚扰', | |
196 | }, | |
8e0548e1 YCH |
197 | }, { |
198 | # VIP-only video. The first 2 parts (6 minutes) are available without login | |
1932476c | 199 | # MD5 sums omitted as values are different on Travis CI and my machine |
8e0548e1 YCH |
200 | 'url': 'http://www.iqiyi.com/v_19rrny4w8w.html', |
201 | 'info_dict': { | |
202 | 'id': 'f3cf468b39dddb30d676f89a91200dc1', | |
2644e911 | 203 | 'ext': 'mp4', |
8e0548e1 YCH |
204 | 'title': '泰坦尼克号', |
205 | }, | |
2644e911 | 206 | 'skip': 'Geo-restricted to China', |
73f9c286 YCH |
207 | }, { |
208 | 'url': 'http://www.iqiyi.com/a_19rrhb8ce1.html', | |
209 | 'info_dict': { | |
210 | 'id': '202918101', | |
211 | 'title': '灌篮高手 国语版', | |
212 | }, | |
213 | 'playlist_count': 101, | |
7e176eff YCH |
214 | }, { |
215 | 'url': 'http://www.pps.tv/w_19rrbav0ph.html', | |
216 | 'only_matching': True, | |
99481135 | 217 | }] |
605ec701 | 218 | |
2644e911 YCH |
219 | _FORMATS_MAP = { |
220 | '96': 1, # 216p, 240p | |
221 | '1': 2, # 336p, 360p | |
222 | '2': 3, # 480p, 504p | |
223 | '21': 4, # 504p | |
224 | '4': 5, # 720p | |
225 | '17': 5, # 720p | |
226 | '5': 6, # 1072p, 1080p | |
227 | '18': 7, # 1080p | |
228 | } | |
08bb8ef2 | 229 | |
99709cc3 YCH |
230 | def _real_initialize(self): |
231 | self._login() | |
232 | ||
57565375 | 233 | @staticmethod |
99709cc3 YCH |
234 | def _rsa_fun(data): |
235 | # public key extracted from http://static.iqiyi.com/js/qiyiV2/20160129180840/jobs/i18n/i18nIndex.js | |
236 | N = 0xab86b6371b5318aaa1d3c9e612a9f1264f372323c8c0f19875b5fc3b3fd3afcc1e5bec527aa94bfa85bffc157e4245aebda05389a5357b75115ac94f074aefcd | |
237 | e = 65537 | |
238 | ||
239 | return ohdave_rsa_encrypt(data, e, N) | |
240 | ||
241 | def _login(self): | |
68217024 | 242 | username, password = self._get_login_info() |
99709cc3 YCH |
243 | |
244 | # No authentication to be performed | |
245 | if not username: | |
246 | return True | |
247 | ||
248 | data = self._download_json( | |
249 | 'http://kylin.iqiyi.com/get_token', None, | |
250 | note='Get token for logging', errnote='Unable to get token for logging') | |
251 | sdk = data['sdk'] | |
252 | timestamp = int(time.time()) | |
253 | target = '/apis/reglogin/login.action?lang=zh_TW&area_code=null&email=%s&passwd=%s&agenttype=1&from=undefined&keeplogin=0&piccode=&fromurl=&_pos=1' % ( | |
254 | username, self._rsa_fun(password.encode('utf-8'))) | |
255 | ||
256 | interp = IqiyiSDKInterpreter(sdk) | |
257 | sign = interp.run(target, data['ip'], timestamp) | |
258 | ||
259 | validation_params = { | |
260 | 'target': target, | |
261 | 'server': 'BEA3AA1908656AABCCFF76582C4C6660', | |
262 | 'token': data['token'], | |
263 | 'bird_src': 'f8d91d57af224da7893dd397d52d811a', | |
264 | 'sign': sign, | |
265 | 'bird_t': timestamp, | |
266 | } | |
267 | validation_result = self._download_json( | |
15707c7e | 268 | 'http://kylin.iqiyi.com/validate?' + compat_urllib_parse_urlencode(validation_params), None, |
99709cc3 YCH |
269 | note='Validate credentials', errnote='Unable to validate credentials') |
270 | ||
271 | MSG_MAP = { | |
272 | 'P00107': 'please login via the web interface and enter the CAPTCHA code', | |
273 | 'P00117': 'bad username or password', | |
274 | } | |
275 | ||
276 | code = validation_result['code'] | |
277 | if code != 'A00000': | |
278 | msg = MSG_MAP.get(code) | |
279 | if not msg: | |
280 | msg = 'error %s' % code | |
281 | if validation_result.get('msg'): | |
282 | msg += ': ' + validation_result['msg'] | |
6a39ee13 | 283 | self.report_warning('unable to log in: ' + msg) |
99709cc3 YCH |
284 | return False |
285 | ||
286 | return True | |
57565375 | 287 | |
5b6ad863 YCH |
288 | def get_raw_data(self, tvid, video_id): |
289 | tm = int(time.time() * 1000) | |
290 | ||
2644e911 YCH |
291 | key = 'd5fb4bd9d50c4be6948c97edd7254b0e' |
292 | sc = md5_text(compat_str(tm) + key + tvid) | |
5b6ad863 | 293 | params = { |
8e0548e1 | 294 | 'tvid': tvid, |
605ec701 | 295 | 'vid': video_id, |
2644e911 | 296 | 'src': '76f90cbd92f94a2e925d83e8ccd22cb7', |
5b6ad863 | 297 | 'sc': sc, |
2644e911 | 298 | 't': tm, |
605ec701 P |
299 | } |
300 | ||
5b6ad863 YCH |
301 | return self._download_json( |
302 | 'http://cache.m.iqiyi.com/jp/tmts/%s/%s/' % (tvid, video_id), | |
303 | video_id, transform_source=lambda s: remove_start(s, 'var tvInfoJs='), | |
38cce791 | 304 | query=params, headers=self.geo_verification_headers()) |
605ec701 | 305 | |
73f9c286 YCH |
306 | def _extract_playlist(self, webpage): |
307 | PAGE_SIZE = 50 | |
308 | ||
309 | links = re.findall( | |
310 | r'<a[^>]+class="site-piclist_pic_link"[^>]+href="(http://www\.iqiyi\.com/.+\.html)"', | |
311 | webpage) | |
312 | if not links: | |
313 | return | |
314 | ||
315 | album_id = self._search_regex( | |
316 | r'albumId\s*:\s*(\d+),', webpage, 'album ID') | |
317 | album_title = self._search_regex( | |
318 | r'data-share-title="([^"]+)"', webpage, 'album title', fatal=False) | |
319 | ||
320 | entries = list(map(self.url_result, links)) | |
321 | ||
322 | # Start from 2 because links in the first page are already on webpage | |
323 | for page_num in itertools.count(2): | |
324 | pagelist_page = self._download_webpage( | |
325 | 'http://cache.video.qiyi.com/jp/avlist/%s/%d/%d/' % (album_id, page_num, PAGE_SIZE), | |
326 | album_id, | |
327 | note='Download playlist page %d' % page_num, | |
328 | errnote='Failed to download playlist page %d' % page_num) | |
329 | pagelist = self._parse_json( | |
330 | remove_start(pagelist_page, 'var tvInfoJs='), album_id) | |
331 | vlist = pagelist['data']['vlist'] | |
332 | for item in vlist: | |
333 | entries.append(self.url_result(item['vurl'])) | |
334 | if len(vlist) < PAGE_SIZE: | |
335 | break | |
336 | ||
337 | return self.playlist_result(entries, album_id, album_title) | |
338 | ||
605ec701 P |
339 | def _real_extract(self, url): |
340 | webpage = self._download_webpage( | |
341 | url, 'temp_id', note='download video page') | |
73f9c286 YCH |
342 | |
343 | # There's no simple way to determine whether an URL is a playlist or not | |
fbf56be2 YCH |
344 | # Sometimes there are playlist links in individual videos, so treat it |
345 | # as a single video first | |
605ec701 | 346 | tvid = self._search_regex( |
01cb5701 | 347 | r'data-(?:player|shareplattrigger)-tvid\s*=\s*[\'"](\d+)', webpage, 'tvid', default=None) |
fbf56be2 YCH |
348 | if tvid is None: |
349 | playlist_result = self._extract_playlist(webpage) | |
350 | if playlist_result: | |
351 | return playlist_result | |
352 | raise ExtractorError('Can\'t find any video') | |
353 | ||
605ec701 | 354 | video_id = self._search_regex( |
01cb5701 | 355 | r'data-(?:player|shareplattrigger)-videoid\s*=\s*[\'"]([a-f\d]+)', webpage, 'video_id') |
5b6ad863 | 356 | |
2644e911 | 357 | formats = [] |
5b6ad863 YCH |
358 | for _ in range(5): |
359 | raw_data = self.get_raw_data(tvid, video_id) | |
360 | ||
361 | if raw_data['code'] != 'A00000': | |
362 | if raw_data['code'] == 'A00111': | |
363 | self.raise_geo_restricted() | |
364 | raise ExtractorError('Unable to load data. Error code: ' + raw_data['code']) | |
365 | ||
366 | data = raw_data['data'] | |
367 | ||
2644e911 YCH |
368 | for stream in data['vidl']: |
369 | if 'm3utx' not in stream: | |
370 | continue | |
371 | vd = compat_str(stream['vd']) | |
372 | formats.append({ | |
373 | 'url': stream['m3utx'], | |
374 | 'format_id': vd, | |
375 | 'ext': 'mp4', | |
f983b875 | 376 | 'quality': self._FORMATS_MAP.get(vd, -1), |
2644e911 YCH |
377 | 'protocol': 'm3u8_native', |
378 | }) | |
379 | ||
380 | if formats: | |
381 | break | |
382 | ||
383 | self._sleep(5, video_id) | |
5b6ad863 | 384 | |
2644e911 | 385 | self._sort_formats(formats) |
3089bc74 S |
386 | title = (get_element_by_id('widget-videotitle', webpage) |
387 | or clean_html(get_element_by_attribute('class', 'mod-play-tit', webpage)) | |
388 | or self._html_search_regex(r'<span[^>]+data-videochanged-title="word"[^>]*>([^<]+)</span>', webpage, 'title')) | |
5b6ad863 YCH |
389 | |
390 | return { | |
391 | 'id': video_id, | |
392 | 'title': title, | |
2644e911 | 393 | 'formats': formats, |
5b6ad863 | 394 | } |