]>
Commit | Line | Data |
---|---|---|
605ec701 | 1 | # coding: utf-8 |
605ec701 P |
2 | from __future__ import unicode_literals |
3 | ||
5b6ad863 | 4 | import binascii |
958d0b65 | 5 | import hashlib |
73f9c286 | 6 | import itertools |
958d0b65 | 7 | import math |
99709cc3 | 8 | import re |
605ec701 | 9 | import time |
958d0b65 YCH |
10 | |
11 | from .common import InfoExtractor | |
8e0548e1 | 12 | from ..compat import ( |
99709cc3 | 13 | compat_str, |
15707c7e | 14 | compat_urllib_parse_urlencode, |
8e0548e1 YCH |
15 | ) |
16 | from ..utils import ( | |
f52354a8 | 17 | decode_packed_codes, |
8e0548e1 | 18 | ExtractorError, |
5b6ad863 | 19 | intlist_to_bytes, |
99709cc3 | 20 | ohdave_rsa_encrypt, |
73f9c286 | 21 | remove_start, |
5b6ad863 | 22 | urshift, |
8e0548e1 | 23 | ) |
605ec701 | 24 | |
f1da8610 | 25 | |
99709cc3 YCH |
26 | def md5_text(text): |
27 | return hashlib.md5(text.encode('utf-8')).hexdigest() | |
28 | ||
29 | ||
30 | class IqiyiSDK(object): | |
31 | def __init__(self, target, ip, timestamp): | |
32 | self.target = target | |
33 | self.ip = ip | |
34 | self.timestamp = timestamp | |
35 | ||
36 | @staticmethod | |
37 | def split_sum(data): | |
38 | return compat_str(sum(map(lambda p: int(p, 16), list(data)))) | |
39 | ||
40 | @staticmethod | |
41 | def digit_sum(num): | |
42 | if isinstance(num, int): | |
43 | num = compat_str(num) | |
44 | return compat_str(sum(map(int, num))) | |
45 | ||
46 | def even_odd(self): | |
47 | even = self.digit_sum(compat_str(self.timestamp)[::2]) | |
48 | odd = self.digit_sum(compat_str(self.timestamp)[1::2]) | |
49 | return even, odd | |
50 | ||
51 | def preprocess(self, chunksize): | |
52 | self.target = md5_text(self.target) | |
53 | chunks = [] | |
54 | for i in range(32 // chunksize): | |
55 | chunks.append(self.target[chunksize * i:chunksize * (i + 1)]) | |
56 | if 32 % chunksize: | |
57 | chunks.append(self.target[32 - 32 % chunksize:]) | |
58 | return chunks, list(map(int, self.ip.split('.'))) | |
59 | ||
60 | def mod(self, modulus): | |
61 | chunks, ip = self.preprocess(32) | |
62 | self.target = chunks[0] + ''.join(map(lambda p: compat_str(p % modulus), ip)) | |
63 | ||
64 | def split(self, chunksize): | |
65 | modulus_map = { | |
66 | 4: 256, | |
67 | 5: 10, | |
68 | 8: 100, | |
69 | } | |
70 | ||
71 | chunks, ip = self.preprocess(chunksize) | |
72 | ret = '' | |
73 | for i in range(len(chunks)): | |
74 | ip_part = compat_str(ip[i] % modulus_map[chunksize]) if i < 4 else '' | |
75 | if chunksize == 8: | |
76 | ret += ip_part + chunks[i] | |
77 | else: | |
78 | ret += chunks[i] + ip_part | |
79 | self.target = ret | |
80 | ||
81 | def handle_input16(self): | |
82 | self.target = md5_text(self.target) | |
83 | self.target = self.split_sum(self.target[:16]) + self.target + self.split_sum(self.target[16:]) | |
84 | ||
85 | def handle_input8(self): | |
86 | self.target = md5_text(self.target) | |
87 | ret = '' | |
88 | for i in range(4): | |
89 | part = self.target[8 * i:8 * (i + 1)] | |
90 | ret += self.split_sum(part) + part | |
91 | self.target = ret | |
92 | ||
93 | def handleSum(self): | |
94 | self.target = md5_text(self.target) | |
95 | self.target = self.split_sum(self.target) + self.target | |
96 | ||
97 | def date(self, scheme): | |
98 | self.target = md5_text(self.target) | |
99 | d = time.localtime(self.timestamp) | |
100 | strings = { | |
101 | 'y': compat_str(d.tm_year), | |
102 | 'm': '%02d' % d.tm_mon, | |
103 | 'd': '%02d' % d.tm_mday, | |
104 | } | |
105 | self.target += ''.join(map(lambda c: strings[c], list(scheme))) | |
106 | ||
107 | def split_time_even_odd(self): | |
108 | even, odd = self.even_odd() | |
109 | self.target = odd + md5_text(self.target) + even | |
110 | ||
111 | def split_time_odd_even(self): | |
112 | even, odd = self.even_odd() | |
113 | self.target = even + md5_text(self.target) + odd | |
114 | ||
115 | def split_ip_time_sum(self): | |
116 | chunks, ip = self.preprocess(32) | |
117 | self.target = compat_str(sum(ip)) + chunks[0] + self.digit_sum(self.timestamp) | |
118 | ||
119 | def split_time_ip_sum(self): | |
120 | chunks, ip = self.preprocess(32) | |
121 | self.target = self.digit_sum(self.timestamp) + chunks[0] + compat_str(sum(ip)) | |
122 | ||
123 | ||
124 | class IqiyiSDKInterpreter(object): | |
99709cc3 YCH |
125 | def __init__(self, sdk_code): |
126 | self.sdk_code = sdk_code | |
127 | ||
99709cc3 | 128 | def run(self, target, ip, timestamp): |
f52354a8 | 129 | self.sdk_code = decode_packed_codes(self.sdk_code) |
99709cc3 YCH |
130 | |
131 | functions = re.findall(r'input=([a-zA-Z0-9]+)\(input', self.sdk_code) | |
132 | ||
133 | sdk = IqiyiSDK(target, ip, timestamp) | |
134 | ||
135 | other_functions = { | |
136 | 'handleSum': sdk.handleSum, | |
137 | 'handleInput8': sdk.handle_input8, | |
138 | 'handleInput16': sdk.handle_input16, | |
139 | 'splitTimeEvenOdd': sdk.split_time_even_odd, | |
140 | 'splitTimeOddEven': sdk.split_time_odd_even, | |
141 | 'splitIpTimeSum': sdk.split_ip_time_sum, | |
142 | 'splitTimeIpSum': sdk.split_time_ip_sum, | |
143 | } | |
144 | for function in functions: | |
145 | if re.match(r'mod\d+', function): | |
146 | sdk.mod(int(function[3:])) | |
147 | elif re.match(r'date[ymd]{3}', function): | |
148 | sdk.date(function[4:]) | |
149 | elif re.match(r'split\d+', function): | |
150 | sdk.split(int(function[5:])) | |
151 | elif function in other_functions: | |
152 | other_functions[function]() | |
153 | else: | |
154 | raise ExtractorError('Unknown funcion %s' % function) | |
155 | ||
156 | return sdk.target | |
157 | ||
158 | ||
605ec701 P |
159 | class IqiyiIE(InfoExtractor): |
160 | IE_NAME = 'iqiyi' | |
44c514eb | 161 | IE_DESC = '爱奇艺' |
605ec701 | 162 | |
7e176eff | 163 | _VALID_URL = r'https?://(?:(?:[^.]+\.)?iqiyi\.com|www\.pps\.tv)/.+\.html' |
605ec701 | 164 | |
99709cc3 YCH |
165 | _NETRC_MACHINE = 'iqiyi' |
166 | ||
99481135 | 167 | _TESTS = [{ |
f1da8610 | 168 | 'url': 'http://www.iqiyi.com/v_19rrojlavg.html', |
5b6ad863 | 169 | 'md5': '470a6c160618577166db1a7aac5a3606', |
f1da8610 YCH |
170 | 'info_dict': { |
171 | 'id': '9c1fb1b99d192b21c559e5a1a2cb3c73', | |
5b6ad863 | 172 | 'ext': 'mp4', |
f1da8610 | 173 | 'title': '美国德州空中惊现奇异云团 酷似UFO', |
f1da8610 | 174 | } |
99481135 YCH |
175 | }, { |
176 | 'url': 'http://www.iqiyi.com/v_19rrhnnclk.html', | |
5b6ad863 | 177 | 'md5': 'f09f0a6a59b2da66a26bf4eda669a4cc', |
99481135 YCH |
178 | 'info_dict': { |
179 | 'id': 'e3f585b550a280af23c98b6cb2be19fb', | |
5b6ad863 YCH |
180 | 'ext': 'mp4', |
181 | 'title': '名侦探柯南 国语版', | |
99481135 | 182 | }, |
c2d1be89 | 183 | 'params': { |
5b6ad863 | 184 | 'cn_verification_proxy': 'http://proxy.uku.im:443/', |
c2d1be89 | 185 | }, |
59185202 YCH |
186 | }, { |
187 | 'url': 'http://www.iqiyi.com/w_19rt6o8t9p.html', | |
188 | 'only_matching': True, | |
189 | }, { | |
190 | 'url': 'http://www.iqiyi.com/a_19rrhbc6kt.html', | |
191 | 'only_matching': True, | |
192 | }, { | |
193 | 'url': 'http://yule.iqiyi.com/pcb.html', | |
194 | 'only_matching': True, | |
8e0548e1 YCH |
195 | }, { |
196 | # VIP-only video. The first 2 parts (6 minutes) are available without login | |
1932476c | 197 | # MD5 sums omitted as values are different on Travis CI and my machine |
8e0548e1 YCH |
198 | 'url': 'http://www.iqiyi.com/v_19rrny4w8w.html', |
199 | 'info_dict': { | |
200 | 'id': 'f3cf468b39dddb30d676f89a91200dc1', | |
201 | 'title': '泰坦尼克号', | |
202 | }, | |
203 | 'playlist': [{ | |
8e0548e1 YCH |
204 | 'info_dict': { |
205 | 'id': 'f3cf468b39dddb30d676f89a91200dc1_part1', | |
206 | 'ext': 'f4v', | |
207 | 'title': '泰坦尼克号', | |
208 | }, | |
209 | }, { | |
8e0548e1 YCH |
210 | 'info_dict': { |
211 | 'id': 'f3cf468b39dddb30d676f89a91200dc1_part2', | |
212 | 'ext': 'f4v', | |
213 | 'title': '泰坦尼克号', | |
214 | }, | |
215 | }], | |
216 | 'expected_warnings': ['Needs a VIP account for full video'], | |
73f9c286 YCH |
217 | }, { |
218 | 'url': 'http://www.iqiyi.com/a_19rrhb8ce1.html', | |
219 | 'info_dict': { | |
220 | 'id': '202918101', | |
221 | 'title': '灌篮高手 国语版', | |
222 | }, | |
223 | 'playlist_count': 101, | |
7e176eff YCH |
224 | }, { |
225 | 'url': 'http://www.pps.tv/w_19rrbav0ph.html', | |
226 | 'only_matching': True, | |
99481135 | 227 | }] |
605ec701 | 228 | |
08bb8ef2 YCH |
229 | _FORMATS_MAP = [ |
230 | ('1', 'h6'), | |
231 | ('2', 'h5'), | |
232 | ('3', 'h4'), | |
233 | ('4', 'h3'), | |
234 | ('5', 'h2'), | |
235 | ('10', 'h1'), | |
236 | ] | |
237 | ||
99709cc3 YCH |
238 | def _real_initialize(self): |
239 | self._login() | |
240 | ||
57565375 | 241 | @staticmethod |
99709cc3 YCH |
242 | def _rsa_fun(data): |
243 | # public key extracted from http://static.iqiyi.com/js/qiyiV2/20160129180840/jobs/i18n/i18nIndex.js | |
244 | N = 0xab86b6371b5318aaa1d3c9e612a9f1264f372323c8c0f19875b5fc3b3fd3afcc1e5bec527aa94bfa85bffc157e4245aebda05389a5357b75115ac94f074aefcd | |
245 | e = 65537 | |
246 | ||
247 | return ohdave_rsa_encrypt(data, e, N) | |
248 | ||
249 | def _login(self): | |
250 | (username, password) = self._get_login_info() | |
251 | ||
252 | # No authentication to be performed | |
253 | if not username: | |
254 | return True | |
255 | ||
256 | data = self._download_json( | |
257 | 'http://kylin.iqiyi.com/get_token', None, | |
258 | note='Get token for logging', errnote='Unable to get token for logging') | |
259 | sdk = data['sdk'] | |
260 | timestamp = int(time.time()) | |
261 | target = '/apis/reglogin/login.action?lang=zh_TW&area_code=null&email=%s&passwd=%s&agenttype=1&from=undefined&keeplogin=0&piccode=&fromurl=&_pos=1' % ( | |
262 | username, self._rsa_fun(password.encode('utf-8'))) | |
263 | ||
264 | interp = IqiyiSDKInterpreter(sdk) | |
265 | sign = interp.run(target, data['ip'], timestamp) | |
266 | ||
267 | validation_params = { | |
268 | 'target': target, | |
269 | 'server': 'BEA3AA1908656AABCCFF76582C4C6660', | |
270 | 'token': data['token'], | |
271 | 'bird_src': 'f8d91d57af224da7893dd397d52d811a', | |
272 | 'sign': sign, | |
273 | 'bird_t': timestamp, | |
274 | } | |
275 | validation_result = self._download_json( | |
15707c7e | 276 | 'http://kylin.iqiyi.com/validate?' + compat_urllib_parse_urlencode(validation_params), None, |
99709cc3 YCH |
277 | note='Validate credentials', errnote='Unable to validate credentials') |
278 | ||
279 | MSG_MAP = { | |
280 | 'P00107': 'please login via the web interface and enter the CAPTCHA code', | |
281 | 'P00117': 'bad username or password', | |
282 | } | |
283 | ||
284 | code = validation_result['code'] | |
285 | if code != 'A00000': | |
286 | msg = MSG_MAP.get(code) | |
287 | if not msg: | |
288 | msg = 'error %s' % code | |
289 | if validation_result.get('msg'): | |
290 | msg += ': ' + validation_result['msg'] | |
291 | self._downloader.report_warning('unable to log in: ' + msg) | |
292 | return False | |
293 | ||
294 | return True | |
57565375 | 295 | |
5b6ad863 YCH |
296 | @staticmethod |
297 | def _gen_sc(tvid, timestamp): | |
298 | M = [1732584193, -271733879] | |
299 | M.extend([~M[0], ~M[1]]) | |
300 | I_table = [7, 12, 17, 22, 5, 9, 14, 20, 4, 11, 16, 23, 6, 10, 15, 21] | |
301 | C_base = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8388608, 432] | |
302 | ||
303 | def L(n, t): | |
304 | if t is None: | |
305 | t = 0 | |
306 | return trunc(((n >> 1) + (t >> 1) << 1) + (n & 1) + (t & 1)) | |
307 | ||
308 | def trunc(n): | |
309 | n = n % 0x100000000 | |
310 | if n > 0x7fffffff: | |
311 | n -= 0x100000000 | |
312 | return n | |
313 | ||
314 | def transform(string, mod): | |
315 | num = int(string, 16) | |
316 | return (num >> 8 * (i % 4) & 255 ^ i % mod) << ((a & 3) << 3) | |
317 | ||
318 | C = list(C_base) | |
319 | o = list(M) | |
320 | k = str(timestamp - 7) | |
321 | for i in range(13): | |
322 | a = i | |
323 | C[a >> 2] |= ord(k[a]) << 8 * (a % 4) | |
324 | ||
325 | for i in range(16): | |
326 | a = i + 13 | |
327 | start = (i >> 2) * 8 | |
328 | r = '03967743b643f66763d623d637e30733' | |
329 | C[a >> 2] |= transform(''.join(reversed(r[start:start + 8])), 7) | |
330 | ||
331 | for i in range(16): | |
332 | a = i + 29 | |
333 | start = (i >> 2) * 8 | |
334 | r = '7038766939776a32776a32706b337139' | |
335 | C[a >> 2] |= transform(r[start:start + 8], 1) | |
336 | ||
337 | for i in range(9): | |
338 | a = i + 45 | |
339 | if i < len(tvid): | |
340 | C[a >> 2] |= ord(tvid[i]) << 8 * (a % 4) | |
341 | ||
342 | for a in range(64): | |
343 | i = a | |
344 | I = i >> 4 | |
345 | C_index = [i, 5 * i + 1, 3 * i + 5, 7 * i][I] % 16 + urshift(a, 6) | |
346 | m = L(L(o[0], [ | |
347 | trunc(o[1] & o[2]) | trunc(~o[1] & o[3]), | |
348 | trunc(o[3] & o[1]) | trunc(~o[3] & o[2]), | |
349 | o[1] ^ o[2] ^ o[3], | |
350 | o[2] ^ trunc(o[1] | ~o[3]) | |
351 | ][I]), L( | |
352 | trunc(int(abs(math.sin(i + 1)) * 4294967296)), | |
353 | C[C_index] if C_index < len(C) else None)) | |
354 | I = I_table[4 * I + i % 4] | |
355 | o = [o[3], | |
356 | L(o[1], trunc(trunc(m << I) | urshift(m, 32 - I))), | |
357 | o[1], | |
358 | o[2]] | |
359 | ||
360 | new_M = [L(o[0], M[0]), L(o[1], M[1]), L(o[2], M[2]), L(o[3], M[3])] | |
361 | s = [new_M[a >> 3] >> (1 ^ a & 7) * 4 & 15 for a in range(32)] | |
362 | return binascii.hexlify(intlist_to_bytes(s))[1::2].decode('ascii') | |
363 | ||
364 | def get_raw_data(self, tvid, video_id): | |
365 | tm = int(time.time() * 1000) | |
366 | ||
367 | sc = self._gen_sc(tvid, tm) | |
368 | params = { | |
369 | 'platForm': 'h5', | |
370 | 'rate': 1, | |
8e0548e1 | 371 | 'tvid': tvid, |
605ec701 | 372 | 'vid': video_id, |
5b6ad863 YCH |
373 | 'cupid': 'qc_100001_100186', |
374 | 'type': 'mp4', | |
375 | 'nolimit': 0, | |
376 | 'agenttype': 13, | |
377 | 'src': 'd846d0c32d664d32b6b54ea48997a589', | |
378 | 'sc': sc, | |
379 | 't': tm - 7, | |
380 | '__jsT': None, | |
605ec701 P |
381 | } |
382 | ||
5b6ad863 YCH |
383 | headers = {} |
384 | cn_verification_proxy = self._downloader.params.get('cn_verification_proxy') | |
385 | if cn_verification_proxy: | |
386 | headers['Ytdl-request-proxy'] = cn_verification_proxy | |
387 | return self._download_json( | |
388 | 'http://cache.m.iqiyi.com/jp/tmts/%s/%s/' % (tvid, video_id), | |
389 | video_id, transform_source=lambda s: remove_start(s, 'var tvInfoJs='), | |
390 | query=params, headers=headers) | |
605ec701 | 391 | |
73f9c286 YCH |
392 | def _extract_playlist(self, webpage): |
393 | PAGE_SIZE = 50 | |
394 | ||
395 | links = re.findall( | |
396 | r'<a[^>]+class="site-piclist_pic_link"[^>]+href="(http://www\.iqiyi\.com/.+\.html)"', | |
397 | webpage) | |
398 | if not links: | |
399 | return | |
400 | ||
401 | album_id = self._search_regex( | |
402 | r'albumId\s*:\s*(\d+),', webpage, 'album ID') | |
403 | album_title = self._search_regex( | |
404 | r'data-share-title="([^"]+)"', webpage, 'album title', fatal=False) | |
405 | ||
406 | entries = list(map(self.url_result, links)) | |
407 | ||
408 | # Start from 2 because links in the first page are already on webpage | |
409 | for page_num in itertools.count(2): | |
410 | pagelist_page = self._download_webpage( | |
411 | 'http://cache.video.qiyi.com/jp/avlist/%s/%d/%d/' % (album_id, page_num, PAGE_SIZE), | |
412 | album_id, | |
413 | note='Download playlist page %d' % page_num, | |
414 | errnote='Failed to download playlist page %d' % page_num) | |
415 | pagelist = self._parse_json( | |
416 | remove_start(pagelist_page, 'var tvInfoJs='), album_id) | |
417 | vlist = pagelist['data']['vlist'] | |
418 | for item in vlist: | |
419 | entries.append(self.url_result(item['vurl'])) | |
420 | if len(vlist) < PAGE_SIZE: | |
421 | break | |
422 | ||
423 | return self.playlist_result(entries, album_id, album_title) | |
424 | ||
605ec701 P |
425 | def _real_extract(self, url): |
426 | webpage = self._download_webpage( | |
427 | url, 'temp_id', note='download video page') | |
73f9c286 YCH |
428 | |
429 | # There's no simple way to determine whether an URL is a playlist or not | |
430 | # So detect it | |
431 | playlist_result = self._extract_playlist(webpage) | |
432 | if playlist_result: | |
433 | return playlist_result | |
434 | ||
605ec701 | 435 | tvid = self._search_regex( |
29e7e078 | 436 | r'data-player-tvid\s*=\s*[\'"](\d+)', webpage, 'tvid') |
605ec701 | 437 | video_id = self._search_regex( |
29e7e078 | 438 | r'data-player-videoid\s*=\s*[\'"]([a-f\d]+)', webpage, 'video_id') |
5b6ad863 YCH |
439 | |
440 | for _ in range(5): | |
441 | raw_data = self.get_raw_data(tvid, video_id) | |
442 | ||
443 | if raw_data['code'] != 'A00000': | |
444 | if raw_data['code'] == 'A00111': | |
445 | self.raise_geo_restricted() | |
446 | raise ExtractorError('Unable to load data. Error code: ' + raw_data['code']) | |
447 | ||
448 | data = raw_data['data'] | |
449 | ||
450 | # iQiYi sometimes returns Ads | |
451 | if not isinstance(data['playInfo'], dict): | |
452 | self._sleep(5, video_id) | |
453 | continue | |
454 | ||
455 | title = data['playInfo']['an'] | |
456 | break | |
457 | ||
458 | return { | |
459 | 'id': video_id, | |
460 | 'title': title, | |
461 | 'url': data['m3u'], | |
462 | } |