]>
Commit | Line | Data |
---|---|---|
605ec701 | 1 | # coding: utf-8 |
605ec701 P |
2 | from __future__ import unicode_literals |
3 | ||
958d0b65 YCH |
4 | import hashlib |
5 | import math | |
8e0548e1 | 6 | import os |
958d0b65 | 7 | import random |
605ec701 | 8 | import time |
605ec701 | 9 | import uuid |
958d0b65 YCH |
10 | |
11 | from .common import InfoExtractor | |
8e0548e1 YCH |
12 | from ..compat import ( |
13 | compat_parse_qs, | |
14 | compat_urllib_parse, | |
15 | compat_urllib_parse_urlparse, | |
16 | ) | |
17 | from ..utils import ( | |
18 | ExtractorError, | |
19 | sanitized_Request, | |
20 | urlencode_postdata, | |
21 | url_basename, | |
22 | ) | |
605ec701 | 23 | |
f1da8610 | 24 | |
605ec701 P |
25 | class IqiyiIE(InfoExtractor): |
26 | IE_NAME = 'iqiyi' | |
44c514eb | 27 | IE_DESC = '爱奇艺' |
605ec701 | 28 | |
bee83e84 | 29 | _VALID_URL = r'http://(?:[^.]+\.)?iqiyi\.com/.+\.html' |
605ec701 | 30 | |
99481135 | 31 | _TESTS = [{ |
f1da8610 YCH |
32 | 'url': 'http://www.iqiyi.com/v_19rrojlavg.html', |
33 | 'md5': '2cb594dc2781e6c941a110d8f358118b', | |
34 | 'info_dict': { | |
35 | 'id': '9c1fb1b99d192b21c559e5a1a2cb3c73', | |
36 | 'title': '美国德州空中惊现奇异云团 酷似UFO', | |
37 | 'ext': 'f4v', | |
38 | } | |
99481135 YCH |
39 | }, { |
40 | 'url': 'http://www.iqiyi.com/v_19rrhnnclk.html', | |
41 | 'info_dict': { | |
42 | 'id': 'e3f585b550a280af23c98b6cb2be19fb', | |
43 | 'title': '名侦探柯南第752集', | |
44 | }, | |
45 | 'playlist': [{ | |
99481135 YCH |
46 | 'info_dict': { |
47 | 'id': 'e3f585b550a280af23c98b6cb2be19fb_part1', | |
48 | 'ext': 'f4v', | |
49 | 'title': '名侦探柯南第752集', | |
50 | }, | |
51 | }, { | |
99481135 YCH |
52 | 'info_dict': { |
53 | 'id': 'e3f585b550a280af23c98b6cb2be19fb_part2', | |
54 | 'ext': 'f4v', | |
55 | 'title': '名侦探柯南第752集', | |
56 | }, | |
57 | }, { | |
99481135 YCH |
58 | 'info_dict': { |
59 | 'id': 'e3f585b550a280af23c98b6cb2be19fb_part3', | |
60 | 'ext': 'f4v', | |
61 | 'title': '名侦探柯南第752集', | |
62 | }, | |
63 | }, { | |
99481135 YCH |
64 | 'info_dict': { |
65 | 'id': 'e3f585b550a280af23c98b6cb2be19fb_part4', | |
66 | 'ext': 'f4v', | |
67 | 'title': '名侦探柯南第752集', | |
68 | }, | |
69 | }, { | |
99481135 YCH |
70 | 'info_dict': { |
71 | 'id': 'e3f585b550a280af23c98b6cb2be19fb_part5', | |
72 | 'ext': 'f4v', | |
73 | 'title': '名侦探柯南第752集', | |
74 | }, | |
75 | }, { | |
99481135 YCH |
76 | 'info_dict': { |
77 | 'id': 'e3f585b550a280af23c98b6cb2be19fb_part6', | |
78 | 'ext': 'f4v', | |
79 | 'title': '名侦探柯南第752集', | |
80 | }, | |
81 | }, { | |
99481135 YCH |
82 | 'info_dict': { |
83 | 'id': 'e3f585b550a280af23c98b6cb2be19fb_part7', | |
84 | 'ext': 'f4v', | |
85 | 'title': '名侦探柯南第752集', | |
86 | }, | |
87 | }, { | |
99481135 YCH |
88 | 'info_dict': { |
89 | 'id': 'e3f585b550a280af23c98b6cb2be19fb_part8', | |
90 | 'ext': 'f4v', | |
91 | 'title': '名侦探柯南第752集', | |
92 | }, | |
93 | }], | |
c2d1be89 YCH |
94 | 'params': { |
95 | 'skip_download': True, | |
96 | }, | |
59185202 YCH |
97 | }, { |
98 | 'url': 'http://www.iqiyi.com/w_19rt6o8t9p.html', | |
99 | 'only_matching': True, | |
100 | }, { | |
101 | 'url': 'http://www.iqiyi.com/a_19rrhbc6kt.html', | |
102 | 'only_matching': True, | |
103 | }, { | |
104 | 'url': 'http://yule.iqiyi.com/pcb.html', | |
105 | 'only_matching': True, | |
8e0548e1 YCH |
106 | }, { |
107 | # VIP-only video. The first 2 parts (6 minutes) are available without login | |
108 | 'url': 'http://www.iqiyi.com/v_19rrny4w8w.html', | |
109 | 'info_dict': { | |
110 | 'id': 'f3cf468b39dddb30d676f89a91200dc1', | |
111 | 'title': '泰坦尼克号', | |
112 | }, | |
113 | 'playlist': [{ | |
114 | 'md5': '436bcde6e1307307d5ba1549715b0bbd', | |
115 | 'info_dict': { | |
116 | 'id': 'f3cf468b39dddb30d676f89a91200dc1_part1', | |
117 | 'ext': 'f4v', | |
118 | 'title': '泰坦尼克号', | |
119 | }, | |
120 | }, { | |
121 | 'md5': 'bfc5e332f4900fde547c69372385649e', | |
122 | 'info_dict': { | |
123 | 'id': 'f3cf468b39dddb30d676f89a91200dc1_part2', | |
124 | 'ext': 'f4v', | |
125 | 'title': '泰坦尼克号', | |
126 | }, | |
127 | }], | |
128 | 'expected_warnings': ['Needs a VIP account for full video'], | |
99481135 | 129 | }] |
605ec701 | 130 | |
08bb8ef2 YCH |
131 | _FORMATS_MAP = [ |
132 | ('1', 'h6'), | |
133 | ('2', 'h5'), | |
134 | ('3', 'h4'), | |
135 | ('4', 'h3'), | |
136 | ('5', 'h2'), | |
137 | ('10', 'h1'), | |
138 | ] | |
139 | ||
57565375 YCH |
140 | @staticmethod |
141 | def md5_text(text): | |
142 | return hashlib.md5(text.encode('utf-8')).hexdigest() | |
143 | ||
8e0548e1 YCH |
144 | def _authenticate_vip_video(self, api_video_url, video_id, tvid, _uuid, do_report_warning): |
145 | auth_params = { | |
146 | # version and platform hard-coded in com/qiyi/player/core/model/remote/AuthenticationRemote.as | |
147 | 'version': '2.0', | |
148 | 'platform': 'b6c13e26323c537d', | |
149 | 'aid': tvid, | |
150 | 'tvid': tvid, | |
151 | 'uid': '', | |
152 | 'deviceId': _uuid, | |
153 | 'playType': 'main', # XXX: always main? | |
154 | 'filename': os.path.splitext(url_basename(api_video_url))[0], | |
155 | } | |
156 | ||
157 | qd_items = compat_parse_qs(compat_urllib_parse_urlparse(api_video_url).query) | |
158 | for key, val in qd_items.items(): | |
159 | auth_params[key] = val[0] | |
160 | ||
161 | auth_req = sanitized_Request( | |
162 | 'http://api.vip.iqiyi.com/services/ckn.action', | |
163 | urlencode_postdata(auth_params)) | |
164 | # iQiyi server throws HTTP 405 error without the following header | |
165 | auth_req.add_header('Content-Type', 'application/x-www-form-urlencoded') | |
166 | auth_result = self._download_json( | |
167 | auth_req, video_id, | |
168 | note='Downloading video authentication JSON', | |
169 | errnote='Unable to download video authentication JSON') | |
170 | if auth_result['code'] == 'Q00506': # requires a VIP account | |
171 | if do_report_warning: | |
172 | self.report_warning('Needs a VIP account for full video') | |
173 | return False | |
174 | ||
175 | return auth_result | |
176 | ||
177 | def construct_video_urls(self, data, video_id, _uuid, tvid): | |
605ec701 P |
178 | def do_xor(x, y): |
179 | a = y % 3 | |
180 | if a == 1: | |
181 | return x ^ 121 | |
182 | if a == 2: | |
183 | return x ^ 72 | |
184 | return x ^ 103 | |
185 | ||
186 | def get_encode_code(l): | |
187 | a = 0 | |
188 | b = l.split('-') | |
189 | c = len(b) | |
190 | s = '' | |
191 | for i in range(c - 1, -1, -1): | |
f1da8610 | 192 | a = do_xor(int(b[c - i - 1], 16), i) |
605ec701 P |
193 | s += chr(a) |
194 | return s[::-1] | |
195 | ||
ffba4edb | 196 | def get_path_key(x, format_id, segment_index): |
605ec701 P |
197 | mg = ')(*&^flash@#$%a' |
198 | tm = self._download_json( | |
ffba4edb YCH |
199 | 'http://data.video.qiyi.com/t?tn=' + str(random.random()), video_id, |
200 | note='Download path key of segment %d for format %s' % (segment_index + 1, format_id) | |
201 | )['t'] | |
f1da8610 | 202 | t = str(int(math.floor(int(tm) / (600.0)))) |
19f93d90 | 203 | return self.md5_text(t + mg + x) |
605ec701 P |
204 | |
205 | video_urls_dict = {} | |
8e0548e1 | 206 | need_vip_warning_report = True |
ffba4edb YCH |
207 | for format_item in data['vp']['tkl'][0]['vs']: |
208 | if 0 < int(format_item['bid']) <= 10: | |
209 | format_id = self.get_format(format_item['bid']) | |
670861bd P |
210 | else: |
211 | continue | |
212 | ||
213 | video_urls = [] | |
605ec701 | 214 | |
ffba4edb YCH |
215 | video_urls_info = format_item['fs'] |
216 | if not format_item['fs'][0]['l'].startswith('/'): | |
217 | t = get_encode_code(format_item['fs'][0]['l']) | |
605ec701 | 218 | if t.endswith('mp4'): |
ffba4edb | 219 | video_urls_info = format_item['flvs'] |
605ec701 | 220 | |
ffba4edb YCH |
221 | for segment_index, segment in enumerate(video_urls_info): |
222 | vl = segment['l'] | |
605ec701 P |
223 | if not vl.startswith('/'): |
224 | vl = get_encode_code(vl) | |
8e0548e1 | 225 | is_vip_video = '/vip/' in vl |
ffba4edb | 226 | filesize = segment['b'] |
605ec701 | 227 | base_url = data['vp']['du'].split('/') |
8e0548e1 YCH |
228 | if not is_vip_video: |
229 | key = get_path_key( | |
230 | vl.split('/')[-1].split('.')[0], format_id, segment_index) | |
231 | base_url.insert(-1, key) | |
605ec701 P |
232 | base_url = '/'.join(base_url) |
233 | param = { | |
234 | 'su': _uuid, | |
235 | 'qyid': uuid.uuid4().hex, | |
236 | 'client': '', | |
237 | 'z': '', | |
238 | 'bt': '', | |
239 | 'ct': '', | |
240 | 'tn': str(int(time.time())) | |
241 | } | |
8e0548e1 YCH |
242 | api_video_url = base_url + vl |
243 | if is_vip_video: | |
244 | api_video_url = api_video_url.replace('.f4v', '.hml') | |
245 | auth_result = self._authenticate_vip_video( | |
246 | api_video_url, video_id, tvid, _uuid, need_vip_warning_report) | |
247 | if auth_result is False: | |
248 | need_vip_warning_report = False | |
249 | break | |
250 | param.update({ | |
251 | 't': auth_result['data']['t'], | |
252 | # cid is hard-coded in com/qiyi/player/core/player/RuntimeData.as | |
253 | 'cid': 'afbe8fd3d73448c9', | |
254 | 'vid': video_id, | |
255 | 'QY00001': auth_result['data']['u'], | |
256 | }) | |
257 | api_video_url += '?' if '?' not in api_video_url else '&' | |
258 | api_video_url += compat_urllib_parse.urlencode(param) | |
ffba4edb YCH |
259 | js = self._download_json( |
260 | api_video_url, video_id, | |
261 | note='Download video info of segment %d for format %s' % (segment_index + 1, format_id)) | |
605ec701 P |
262 | video_url = js['l'] |
263 | video_urls.append( | |
264 | (video_url, filesize)) | |
265 | ||
266 | video_urls_dict[format_id] = video_urls | |
267 | return video_urls_dict | |
268 | ||
269 | def get_format(self, bid): | |
08bb8ef2 YCH |
270 | matched_format_ids = [_format_id for _bid, _format_id in self._FORMATS_MAP if _bid == str(bid)] |
271 | return matched_format_ids[0] if len(matched_format_ids) else None | |
670861bd P |
272 | |
273 | def get_bid(self, format_id): | |
08bb8ef2 YCH |
274 | matched_bids = [_bid for _bid, _format_id in self._FORMATS_MAP if _format_id == format_id] |
275 | return matched_bids[0] if len(matched_bids) else None | |
605ec701 P |
276 | |
277 | def get_raw_data(self, tvid, video_id, enc_key, _uuid): | |
278 | tm = str(int(time.time())) | |
57565375 | 279 | tail = tm + tvid |
605ec701 P |
280 | param = { |
281 | 'key': 'fvip', | |
19f93d90 | 282 | 'src': self.md5_text('youtube-dl'), |
605ec701 P |
283 | 'tvId': tvid, |
284 | 'vid': video_id, | |
285 | 'vinfo': 1, | |
286 | 'tm': tm, | |
6a959f2e | 287 | 'enc': self.md5_text(enc_key + tail), |
605ec701 P |
288 | 'qyid': _uuid, |
289 | 'tn': random.random(), | |
290 | 'um': 0, | |
57565375 | 291 | 'authkey': self.md5_text(self.md5_text('') + tail), |
8e0548e1 | 292 | 'k_tag': 1, |
605ec701 P |
293 | } |
294 | ||
295 | api_url = 'http://cache.video.qiyi.com/vms' + '?' + \ | |
296 | compat_urllib_parse.urlencode(param) | |
297 | raw_data = self._download_json(api_url, video_id) | |
298 | return raw_data | |
299 | ||
300 | def get_enc_key(self, swf_url, video_id): | |
57565375 | 301 | # TODO: automatic key extraction |
6b45f9ab YCH |
302 | # last update at 2016-01-22 for Zombie::bite |
303 | enc_key = '6ab6d0280511493ba85594779759d4ed' | |
605ec701 P |
304 | return enc_key |
305 | ||
306 | def _real_extract(self, url): | |
307 | webpage = self._download_webpage( | |
308 | url, 'temp_id', note='download video page') | |
309 | tvid = self._search_regex( | |
29e7e078 | 310 | r'data-player-tvid\s*=\s*[\'"](\d+)', webpage, 'tvid') |
605ec701 | 311 | video_id = self._search_regex( |
29e7e078 | 312 | r'data-player-videoid\s*=\s*[\'"]([a-f\d]+)', webpage, 'video_id') |
605ec701 | 313 | swf_url = self._search_regex( |
9c5f685e | 314 | r'(http://[^\'"]+MainPlayer[^.]+\.swf)', webpage, 'swf player URL') |
605ec701 P |
315 | _uuid = uuid.uuid4().hex |
316 | ||
317 | enc_key = self.get_enc_key(swf_url, video_id) | |
318 | ||
319 | raw_data = self.get_raw_data(tvid, video_id, enc_key, _uuid) | |
aacda28b YCH |
320 | |
321 | if raw_data['code'] != 'A000000': | |
322 | raise ExtractorError('Unable to load data. Error code: ' + raw_data['code']) | |
323 | ||
605ec701 P |
324 | data = raw_data['data'] |
325 | ||
326 | title = data['vi']['vn'] | |
327 | ||
328 | # generate video_urls_dict | |
670861bd | 329 | video_urls_dict = self.construct_video_urls( |
8e0548e1 | 330 | data, video_id, _uuid, tvid) |
605ec701 P |
331 | |
332 | # construct info | |
333 | entries = [] | |
334 | for format_id in video_urls_dict: | |
335 | video_urls = video_urls_dict[format_id] | |
336 | for i, video_url_info in enumerate(video_urls): | |
f1da8610 | 337 | if len(entries) < i + 1: |
605ec701 P |
338 | entries.append({'formats': []}) |
339 | entries[i]['formats'].append( | |
340 | { | |
341 | 'url': video_url_info[0], | |
342 | 'filesize': video_url_info[-1], | |
343 | 'format_id': format_id, | |
670861bd | 344 | 'preference': int(self.get_bid(format_id)) |
605ec701 P |
345 | } |
346 | ) | |
347 | ||
348 | for i in range(len(entries)): | |
670861bd | 349 | self._sort_formats(entries[i]['formats']) |
605ec701 P |
350 | entries[i].update( |
351 | { | |
c4ee8702 | 352 | 'id': '%s_part%d' % (video_id, i + 1), |
605ec701 P |
353 | 'title': title, |
354 | } | |
355 | ) | |
356 | ||
357 | if len(entries) > 1: | |
358 | info = { | |
359 | '_type': 'multi_video', | |
360 | 'id': video_id, | |
361 | 'title': title, | |
362 | 'entries': entries, | |
363 | } | |
364 | else: | |
365 | info = entries[0] | |
366 | info['id'] = video_id | |
367 | info['title'] = title | |
368 | ||
369 | return info |