]>
Commit | Line | Data |
---|---|---|
605ec701 | 1 | # coding: utf-8 |
605ec701 P |
2 | from __future__ import unicode_literals |
3 | ||
5b6ad863 | 4 | import binascii |
958d0b65 | 5 | import hashlib |
73f9c286 | 6 | import itertools |
958d0b65 | 7 | import math |
99709cc3 | 8 | import re |
605ec701 | 9 | import time |
958d0b65 YCH |
10 | |
11 | from .common import InfoExtractor | |
8e0548e1 | 12 | from ..compat import ( |
99709cc3 | 13 | compat_str, |
15707c7e | 14 | compat_urllib_parse_urlencode, |
8e0548e1 YCH |
15 | ) |
16 | from ..utils import ( | |
f52354a8 | 17 | decode_packed_codes, |
8e0548e1 | 18 | ExtractorError, |
5b6ad863 | 19 | intlist_to_bytes, |
99709cc3 | 20 | ohdave_rsa_encrypt, |
73f9c286 | 21 | remove_start, |
5b6ad863 | 22 | urshift, |
8e0548e1 | 23 | ) |
605ec701 | 24 | |
f1da8610 | 25 | |
99709cc3 YCH |
26 | def md5_text(text): |
27 | return hashlib.md5(text.encode('utf-8')).hexdigest() | |
28 | ||
29 | ||
30 | class IqiyiSDK(object): | |
31 | def __init__(self, target, ip, timestamp): | |
32 | self.target = target | |
33 | self.ip = ip | |
34 | self.timestamp = timestamp | |
35 | ||
36 | @staticmethod | |
37 | def split_sum(data): | |
38 | return compat_str(sum(map(lambda p: int(p, 16), list(data)))) | |
39 | ||
40 | @staticmethod | |
41 | def digit_sum(num): | |
42 | if isinstance(num, int): | |
43 | num = compat_str(num) | |
44 | return compat_str(sum(map(int, num))) | |
45 | ||
46 | def even_odd(self): | |
47 | even = self.digit_sum(compat_str(self.timestamp)[::2]) | |
48 | odd = self.digit_sum(compat_str(self.timestamp)[1::2]) | |
49 | return even, odd | |
50 | ||
51 | def preprocess(self, chunksize): | |
52 | self.target = md5_text(self.target) | |
53 | chunks = [] | |
54 | for i in range(32 // chunksize): | |
55 | chunks.append(self.target[chunksize * i:chunksize * (i + 1)]) | |
56 | if 32 % chunksize: | |
57 | chunks.append(self.target[32 - 32 % chunksize:]) | |
58 | return chunks, list(map(int, self.ip.split('.'))) | |
59 | ||
60 | def mod(self, modulus): | |
61 | chunks, ip = self.preprocess(32) | |
62 | self.target = chunks[0] + ''.join(map(lambda p: compat_str(p % modulus), ip)) | |
63 | ||
64 | def split(self, chunksize): | |
65 | modulus_map = { | |
66 | 4: 256, | |
67 | 5: 10, | |
68 | 8: 100, | |
69 | } | |
70 | ||
71 | chunks, ip = self.preprocess(chunksize) | |
72 | ret = '' | |
73 | for i in range(len(chunks)): | |
74 | ip_part = compat_str(ip[i] % modulus_map[chunksize]) if i < 4 else '' | |
75 | if chunksize == 8: | |
76 | ret += ip_part + chunks[i] | |
77 | else: | |
78 | ret += chunks[i] + ip_part | |
79 | self.target = ret | |
80 | ||
81 | def handle_input16(self): | |
82 | self.target = md5_text(self.target) | |
83 | self.target = self.split_sum(self.target[:16]) + self.target + self.split_sum(self.target[16:]) | |
84 | ||
85 | def handle_input8(self): | |
86 | self.target = md5_text(self.target) | |
87 | ret = '' | |
88 | for i in range(4): | |
89 | part = self.target[8 * i:8 * (i + 1)] | |
90 | ret += self.split_sum(part) + part | |
91 | self.target = ret | |
92 | ||
93 | def handleSum(self): | |
94 | self.target = md5_text(self.target) | |
95 | self.target = self.split_sum(self.target) + self.target | |
96 | ||
97 | def date(self, scheme): | |
98 | self.target = md5_text(self.target) | |
99 | d = time.localtime(self.timestamp) | |
100 | strings = { | |
101 | 'y': compat_str(d.tm_year), | |
102 | 'm': '%02d' % d.tm_mon, | |
103 | 'd': '%02d' % d.tm_mday, | |
104 | } | |
105 | self.target += ''.join(map(lambda c: strings[c], list(scheme))) | |
106 | ||
107 | def split_time_even_odd(self): | |
108 | even, odd = self.even_odd() | |
109 | self.target = odd + md5_text(self.target) + even | |
110 | ||
111 | def split_time_odd_even(self): | |
112 | even, odd = self.even_odd() | |
113 | self.target = even + md5_text(self.target) + odd | |
114 | ||
115 | def split_ip_time_sum(self): | |
116 | chunks, ip = self.preprocess(32) | |
117 | self.target = compat_str(sum(ip)) + chunks[0] + self.digit_sum(self.timestamp) | |
118 | ||
119 | def split_time_ip_sum(self): | |
120 | chunks, ip = self.preprocess(32) | |
121 | self.target = self.digit_sum(self.timestamp) + chunks[0] + compat_str(sum(ip)) | |
122 | ||
123 | ||
124 | class IqiyiSDKInterpreter(object): | |
99709cc3 YCH |
125 | def __init__(self, sdk_code): |
126 | self.sdk_code = sdk_code | |
127 | ||
99709cc3 | 128 | def run(self, target, ip, timestamp): |
f52354a8 | 129 | self.sdk_code = decode_packed_codes(self.sdk_code) |
99709cc3 YCH |
130 | |
131 | functions = re.findall(r'input=([a-zA-Z0-9]+)\(input', self.sdk_code) | |
132 | ||
133 | sdk = IqiyiSDK(target, ip, timestamp) | |
134 | ||
135 | other_functions = { | |
136 | 'handleSum': sdk.handleSum, | |
137 | 'handleInput8': sdk.handle_input8, | |
138 | 'handleInput16': sdk.handle_input16, | |
139 | 'splitTimeEvenOdd': sdk.split_time_even_odd, | |
140 | 'splitTimeOddEven': sdk.split_time_odd_even, | |
141 | 'splitIpTimeSum': sdk.split_ip_time_sum, | |
142 | 'splitTimeIpSum': sdk.split_time_ip_sum, | |
143 | } | |
144 | for function in functions: | |
145 | if re.match(r'mod\d+', function): | |
146 | sdk.mod(int(function[3:])) | |
147 | elif re.match(r'date[ymd]{3}', function): | |
148 | sdk.date(function[4:]) | |
149 | elif re.match(r'split\d+', function): | |
150 | sdk.split(int(function[5:])) | |
151 | elif function in other_functions: | |
152 | other_functions[function]() | |
153 | else: | |
154 | raise ExtractorError('Unknown funcion %s' % function) | |
155 | ||
156 | return sdk.target | |
157 | ||
158 | ||
605ec701 P |
159 | class IqiyiIE(InfoExtractor): |
160 | IE_NAME = 'iqiyi' | |
44c514eb | 161 | IE_DESC = '爱奇艺' |
605ec701 | 162 | |
7e176eff | 163 | _VALID_URL = r'https?://(?:(?:[^.]+\.)?iqiyi\.com|www\.pps\.tv)/.+\.html' |
605ec701 | 164 | |
99709cc3 YCH |
165 | _NETRC_MACHINE = 'iqiyi' |
166 | ||
99481135 | 167 | _TESTS = [{ |
f1da8610 | 168 | 'url': 'http://www.iqiyi.com/v_19rrojlavg.html', |
5b6ad863 | 169 | 'md5': '470a6c160618577166db1a7aac5a3606', |
f1da8610 YCH |
170 | 'info_dict': { |
171 | 'id': '9c1fb1b99d192b21c559e5a1a2cb3c73', | |
5b6ad863 | 172 | 'ext': 'mp4', |
f1da8610 | 173 | 'title': '美国德州空中惊现奇异云团 酷似UFO', |
f1da8610 | 174 | } |
99481135 YCH |
175 | }, { |
176 | 'url': 'http://www.iqiyi.com/v_19rrhnnclk.html', | |
5b6ad863 | 177 | 'md5': 'f09f0a6a59b2da66a26bf4eda669a4cc', |
99481135 YCH |
178 | 'info_dict': { |
179 | 'id': 'e3f585b550a280af23c98b6cb2be19fb', | |
5b6ad863 YCH |
180 | 'ext': 'mp4', |
181 | 'title': '名侦探柯南 国语版', | |
99481135 | 182 | }, |
fc3996bf | 183 | 'skip': 'Geo-restricted to China', |
59185202 YCH |
184 | }, { |
185 | 'url': 'http://www.iqiyi.com/w_19rt6o8t9p.html', | |
186 | 'only_matching': True, | |
187 | }, { | |
188 | 'url': 'http://www.iqiyi.com/a_19rrhbc6kt.html', | |
189 | 'only_matching': True, | |
190 | }, { | |
191 | 'url': 'http://yule.iqiyi.com/pcb.html', | |
192 | 'only_matching': True, | |
8e0548e1 YCH |
193 | }, { |
194 | # VIP-only video. The first 2 parts (6 minutes) are available without login | |
1932476c | 195 | # MD5 sums omitted as values are different on Travis CI and my machine |
8e0548e1 YCH |
196 | 'url': 'http://www.iqiyi.com/v_19rrny4w8w.html', |
197 | 'info_dict': { | |
198 | 'id': 'f3cf468b39dddb30d676f89a91200dc1', | |
199 | 'title': '泰坦尼克号', | |
200 | }, | |
201 | 'playlist': [{ | |
8e0548e1 YCH |
202 | 'info_dict': { |
203 | 'id': 'f3cf468b39dddb30d676f89a91200dc1_part1', | |
204 | 'ext': 'f4v', | |
205 | 'title': '泰坦尼克号', | |
206 | }, | |
207 | }, { | |
8e0548e1 YCH |
208 | 'info_dict': { |
209 | 'id': 'f3cf468b39dddb30d676f89a91200dc1_part2', | |
210 | 'ext': 'f4v', | |
211 | 'title': '泰坦尼克号', | |
212 | }, | |
213 | }], | |
214 | 'expected_warnings': ['Needs a VIP account for full video'], | |
73f9c286 YCH |
215 | }, { |
216 | 'url': 'http://www.iqiyi.com/a_19rrhb8ce1.html', | |
217 | 'info_dict': { | |
218 | 'id': '202918101', | |
219 | 'title': '灌篮高手 国语版', | |
220 | }, | |
221 | 'playlist_count': 101, | |
7e176eff YCH |
222 | }, { |
223 | 'url': 'http://www.pps.tv/w_19rrbav0ph.html', | |
224 | 'only_matching': True, | |
99481135 | 225 | }] |
605ec701 | 226 | |
08bb8ef2 YCH |
227 | _FORMATS_MAP = [ |
228 | ('1', 'h6'), | |
229 | ('2', 'h5'), | |
230 | ('3', 'h4'), | |
231 | ('4', 'h3'), | |
232 | ('5', 'h2'), | |
233 | ('10', 'h1'), | |
234 | ] | |
235 | ||
99709cc3 YCH |
236 | def _real_initialize(self): |
237 | self._login() | |
238 | ||
57565375 | 239 | @staticmethod |
99709cc3 YCH |
240 | def _rsa_fun(data): |
241 | # public key extracted from http://static.iqiyi.com/js/qiyiV2/20160129180840/jobs/i18n/i18nIndex.js | |
242 | N = 0xab86b6371b5318aaa1d3c9e612a9f1264f372323c8c0f19875b5fc3b3fd3afcc1e5bec527aa94bfa85bffc157e4245aebda05389a5357b75115ac94f074aefcd | |
243 | e = 65537 | |
244 | ||
245 | return ohdave_rsa_encrypt(data, e, N) | |
246 | ||
247 | def _login(self): | |
248 | (username, password) = self._get_login_info() | |
249 | ||
250 | # No authentication to be performed | |
251 | if not username: | |
252 | return True | |
253 | ||
254 | data = self._download_json( | |
255 | 'http://kylin.iqiyi.com/get_token', None, | |
256 | note='Get token for logging', errnote='Unable to get token for logging') | |
257 | sdk = data['sdk'] | |
258 | timestamp = int(time.time()) | |
259 | target = '/apis/reglogin/login.action?lang=zh_TW&area_code=null&email=%s&passwd=%s&agenttype=1&from=undefined&keeplogin=0&piccode=&fromurl=&_pos=1' % ( | |
260 | username, self._rsa_fun(password.encode('utf-8'))) | |
261 | ||
262 | interp = IqiyiSDKInterpreter(sdk) | |
263 | sign = interp.run(target, data['ip'], timestamp) | |
264 | ||
265 | validation_params = { | |
266 | 'target': target, | |
267 | 'server': 'BEA3AA1908656AABCCFF76582C4C6660', | |
268 | 'token': data['token'], | |
269 | 'bird_src': 'f8d91d57af224da7893dd397d52d811a', | |
270 | 'sign': sign, | |
271 | 'bird_t': timestamp, | |
272 | } | |
273 | validation_result = self._download_json( | |
15707c7e | 274 | 'http://kylin.iqiyi.com/validate?' + compat_urllib_parse_urlencode(validation_params), None, |
99709cc3 YCH |
275 | note='Validate credentials', errnote='Unable to validate credentials') |
276 | ||
277 | MSG_MAP = { | |
278 | 'P00107': 'please login via the web interface and enter the CAPTCHA code', | |
279 | 'P00117': 'bad username or password', | |
280 | } | |
281 | ||
282 | code = validation_result['code'] | |
283 | if code != 'A00000': | |
284 | msg = MSG_MAP.get(code) | |
285 | if not msg: | |
286 | msg = 'error %s' % code | |
287 | if validation_result.get('msg'): | |
288 | msg += ': ' + validation_result['msg'] | |
289 | self._downloader.report_warning('unable to log in: ' + msg) | |
290 | return False | |
291 | ||
292 | return True | |
57565375 | 293 | |
5b6ad863 YCH |
294 | @staticmethod |
295 | def _gen_sc(tvid, timestamp): | |
296 | M = [1732584193, -271733879] | |
297 | M.extend([~M[0], ~M[1]]) | |
298 | I_table = [7, 12, 17, 22, 5, 9, 14, 20, 4, 11, 16, 23, 6, 10, 15, 21] | |
299 | C_base = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8388608, 432] | |
300 | ||
301 | def L(n, t): | |
302 | if t is None: | |
303 | t = 0 | |
304 | return trunc(((n >> 1) + (t >> 1) << 1) + (n & 1) + (t & 1)) | |
305 | ||
306 | def trunc(n): | |
307 | n = n % 0x100000000 | |
308 | if n > 0x7fffffff: | |
309 | n -= 0x100000000 | |
310 | return n | |
311 | ||
312 | def transform(string, mod): | |
313 | num = int(string, 16) | |
314 | return (num >> 8 * (i % 4) & 255 ^ i % mod) << ((a & 3) << 3) | |
315 | ||
316 | C = list(C_base) | |
317 | o = list(M) | |
318 | k = str(timestamp - 7) | |
319 | for i in range(13): | |
320 | a = i | |
321 | C[a >> 2] |= ord(k[a]) << 8 * (a % 4) | |
322 | ||
323 | for i in range(16): | |
324 | a = i + 13 | |
325 | start = (i >> 2) * 8 | |
326 | r = '03967743b643f66763d623d637e30733' | |
327 | C[a >> 2] |= transform(''.join(reversed(r[start:start + 8])), 7) | |
328 | ||
329 | for i in range(16): | |
330 | a = i + 29 | |
331 | start = (i >> 2) * 8 | |
332 | r = '7038766939776a32776a32706b337139' | |
333 | C[a >> 2] |= transform(r[start:start + 8], 1) | |
334 | ||
335 | for i in range(9): | |
336 | a = i + 45 | |
337 | if i < len(tvid): | |
338 | C[a >> 2] |= ord(tvid[i]) << 8 * (a % 4) | |
339 | ||
340 | for a in range(64): | |
341 | i = a | |
342 | I = i >> 4 | |
343 | C_index = [i, 5 * i + 1, 3 * i + 5, 7 * i][I] % 16 + urshift(a, 6) | |
344 | m = L(L(o[0], [ | |
345 | trunc(o[1] & o[2]) | trunc(~o[1] & o[3]), | |
346 | trunc(o[3] & o[1]) | trunc(~o[3] & o[2]), | |
347 | o[1] ^ o[2] ^ o[3], | |
348 | o[2] ^ trunc(o[1] | ~o[3]) | |
349 | ][I]), L( | |
350 | trunc(int(abs(math.sin(i + 1)) * 4294967296)), | |
351 | C[C_index] if C_index < len(C) else None)) | |
352 | I = I_table[4 * I + i % 4] | |
353 | o = [o[3], | |
354 | L(o[1], trunc(trunc(m << I) | urshift(m, 32 - I))), | |
355 | o[1], | |
356 | o[2]] | |
357 | ||
358 | new_M = [L(o[0], M[0]), L(o[1], M[1]), L(o[2], M[2]), L(o[3], M[3])] | |
359 | s = [new_M[a >> 3] >> (1 ^ a & 7) * 4 & 15 for a in range(32)] | |
360 | return binascii.hexlify(intlist_to_bytes(s))[1::2].decode('ascii') | |
361 | ||
362 | def get_raw_data(self, tvid, video_id): | |
363 | tm = int(time.time() * 1000) | |
364 | ||
365 | sc = self._gen_sc(tvid, tm) | |
366 | params = { | |
367 | 'platForm': 'h5', | |
368 | 'rate': 1, | |
8e0548e1 | 369 | 'tvid': tvid, |
605ec701 | 370 | 'vid': video_id, |
5b6ad863 YCH |
371 | 'cupid': 'qc_100001_100186', |
372 | 'type': 'mp4', | |
373 | 'nolimit': 0, | |
374 | 'agenttype': 13, | |
375 | 'src': 'd846d0c32d664d32b6b54ea48997a589', | |
376 | 'sc': sc, | |
377 | 't': tm - 7, | |
378 | '__jsT': None, | |
605ec701 P |
379 | } |
380 | ||
5b6ad863 YCH |
381 | headers = {} |
382 | cn_verification_proxy = self._downloader.params.get('cn_verification_proxy') | |
383 | if cn_verification_proxy: | |
384 | headers['Ytdl-request-proxy'] = cn_verification_proxy | |
385 | return self._download_json( | |
386 | 'http://cache.m.iqiyi.com/jp/tmts/%s/%s/' % (tvid, video_id), | |
387 | video_id, transform_source=lambda s: remove_start(s, 'var tvInfoJs='), | |
388 | query=params, headers=headers) | |
605ec701 | 389 | |
73f9c286 YCH |
390 | def _extract_playlist(self, webpage): |
391 | PAGE_SIZE = 50 | |
392 | ||
393 | links = re.findall( | |
394 | r'<a[^>]+class="site-piclist_pic_link"[^>]+href="(http://www\.iqiyi\.com/.+\.html)"', | |
395 | webpage) | |
396 | if not links: | |
397 | return | |
398 | ||
399 | album_id = self._search_regex( | |
400 | r'albumId\s*:\s*(\d+),', webpage, 'album ID') | |
401 | album_title = self._search_regex( | |
402 | r'data-share-title="([^"]+)"', webpage, 'album title', fatal=False) | |
403 | ||
404 | entries = list(map(self.url_result, links)) | |
405 | ||
406 | # Start from 2 because links in the first page are already on webpage | |
407 | for page_num in itertools.count(2): | |
408 | pagelist_page = self._download_webpage( | |
409 | 'http://cache.video.qiyi.com/jp/avlist/%s/%d/%d/' % (album_id, page_num, PAGE_SIZE), | |
410 | album_id, | |
411 | note='Download playlist page %d' % page_num, | |
412 | errnote='Failed to download playlist page %d' % page_num) | |
413 | pagelist = self._parse_json( | |
414 | remove_start(pagelist_page, 'var tvInfoJs='), album_id) | |
415 | vlist = pagelist['data']['vlist'] | |
416 | for item in vlist: | |
417 | entries.append(self.url_result(item['vurl'])) | |
418 | if len(vlist) < PAGE_SIZE: | |
419 | break | |
420 | ||
421 | return self.playlist_result(entries, album_id, album_title) | |
422 | ||
605ec701 P |
423 | def _real_extract(self, url): |
424 | webpage = self._download_webpage( | |
425 | url, 'temp_id', note='download video page') | |
73f9c286 YCH |
426 | |
427 | # There's no simple way to determine whether an URL is a playlist or not | |
428 | # So detect it | |
429 | playlist_result = self._extract_playlist(webpage) | |
430 | if playlist_result: | |
431 | return playlist_result | |
432 | ||
605ec701 | 433 | tvid = self._search_regex( |
29e7e078 | 434 | r'data-player-tvid\s*=\s*[\'"](\d+)', webpage, 'tvid') |
605ec701 | 435 | video_id = self._search_regex( |
29e7e078 | 436 | r'data-player-videoid\s*=\s*[\'"]([a-f\d]+)', webpage, 'video_id') |
5b6ad863 YCH |
437 | |
438 | for _ in range(5): | |
439 | raw_data = self.get_raw_data(tvid, video_id) | |
440 | ||
441 | if raw_data['code'] != 'A00000': | |
442 | if raw_data['code'] == 'A00111': | |
443 | self.raise_geo_restricted() | |
444 | raise ExtractorError('Unable to load data. Error code: ' + raw_data['code']) | |
445 | ||
446 | data = raw_data['data'] | |
447 | ||
448 | # iQiYi sometimes returns Ads | |
449 | if not isinstance(data['playInfo'], dict): | |
450 | self._sleep(5, video_id) | |
451 | continue | |
452 | ||
453 | title = data['playInfo']['an'] | |
454 | break | |
455 | ||
456 | return { | |
457 | 'id': video_id, | |
458 | 'title': title, | |
459 | 'url': data['m3u'], | |
460 | } |