]>
Commit | Line | Data |
---|---|---|
605ec701 | 1 | # coding: utf-8 |
605ec701 P |
2 | from __future__ import unicode_literals |
3 | ||
958d0b65 YCH |
4 | import hashlib |
5 | import math | |
6 | import random | |
605ec701 P |
7 | import re |
8 | import time | |
605ec701 | 9 | import uuid |
605ec701 | 10 | import zlib |
958d0b65 YCH |
11 | |
12 | from .common import InfoExtractor | |
13 | from ..compat import compat_urllib_parse | |
14 | from ..utils import ExtractorError | |
605ec701 | 15 | |
f1da8610 | 16 | |
605ec701 P |
17 | class IqiyiIE(InfoExtractor): |
18 | IE_NAME = 'iqiyi' | |
19 | ||
20 | _VALID_URL = r'http://(?:www\.)iqiyi.com/.+?\.html' | |
21 | ||
22 | _TEST = { | |
f1da8610 YCH |
23 | 'url': 'http://www.iqiyi.com/v_19rrojlavg.html', |
24 | 'md5': '2cb594dc2781e6c941a110d8f358118b', | |
25 | 'info_dict': { | |
26 | 'id': '9c1fb1b99d192b21c559e5a1a2cb3c73', | |
27 | 'title': '美国德州空中惊现奇异云团 酷似UFO', | |
28 | 'ext': 'f4v', | |
29 | } | |
605ec701 P |
30 | } |
31 | ||
7012620e | 32 | def construct_video_urls(self, data, video_id, _uuid): |
605ec701 P |
33 | def do_xor(x, y): |
34 | a = y % 3 | |
35 | if a == 1: | |
36 | return x ^ 121 | |
37 | if a == 2: | |
38 | return x ^ 72 | |
39 | return x ^ 103 | |
40 | ||
41 | def get_encode_code(l): | |
42 | a = 0 | |
43 | b = l.split('-') | |
44 | c = len(b) | |
45 | s = '' | |
46 | for i in range(c - 1, -1, -1): | |
f1da8610 | 47 | a = do_xor(int(b[c - i - 1], 16), i) |
605ec701 P |
48 | s += chr(a) |
49 | return s[::-1] | |
50 | ||
ffba4edb | 51 | def get_path_key(x, format_id, segment_index): |
605ec701 P |
52 | mg = ')(*&^flash@#$%a' |
53 | tm = self._download_json( | |
ffba4edb YCH |
54 | 'http://data.video.qiyi.com/t?tn=' + str(random.random()), video_id, |
55 | note='Download path key of segment %d for format %s' % (segment_index + 1, format_id) | |
56 | )['t'] | |
f1da8610 YCH |
57 | t = str(int(math.floor(int(tm) / (600.0)))) |
58 | return hashlib.md5((t + mg + x).encode('utf8')).hexdigest() | |
605ec701 P |
59 | |
60 | video_urls_dict = {} | |
ffba4edb YCH |
61 | for format_item in data['vp']['tkl'][0]['vs']: |
62 | if 0 < int(format_item['bid']) <= 10: | |
63 | format_id = self.get_format(format_item['bid']) | |
670861bd P |
64 | else: |
65 | continue | |
66 | ||
67 | video_urls = [] | |
605ec701 | 68 | |
ffba4edb YCH |
69 | video_urls_info = format_item['fs'] |
70 | if not format_item['fs'][0]['l'].startswith('/'): | |
71 | t = get_encode_code(format_item['fs'][0]['l']) | |
605ec701 | 72 | if t.endswith('mp4'): |
ffba4edb | 73 | video_urls_info = format_item['flvs'] |
605ec701 | 74 | |
ffba4edb YCH |
75 | for segment_index, segment in enumerate(video_urls_info): |
76 | vl = segment['l'] | |
605ec701 P |
77 | if not vl.startswith('/'): |
78 | vl = get_encode_code(vl) | |
79 | key = get_path_key( | |
ffba4edb YCH |
80 | vl.split('/')[-1].split('.')[0], format_id, segment_index) |
81 | filesize = segment['b'] | |
605ec701 P |
82 | base_url = data['vp']['du'].split('/') |
83 | base_url.insert(-1, key) | |
84 | base_url = '/'.join(base_url) | |
85 | param = { | |
86 | 'su': _uuid, | |
87 | 'qyid': uuid.uuid4().hex, | |
88 | 'client': '', | |
89 | 'z': '', | |
90 | 'bt': '', | |
91 | 'ct': '', | |
92 | 'tn': str(int(time.time())) | |
93 | } | |
94 | api_video_url = base_url + vl + '?' + \ | |
95 | compat_urllib_parse.urlencode(param) | |
ffba4edb YCH |
96 | js = self._download_json( |
97 | api_video_url, video_id, | |
98 | note='Download video info of segment %d for format %s' % (segment_index + 1, format_id)) | |
605ec701 P |
99 | video_url = js['l'] |
100 | video_urls.append( | |
101 | (video_url, filesize)) | |
102 | ||
103 | video_urls_dict[format_id] = video_urls | |
104 | return video_urls_dict | |
105 | ||
106 | def get_format(self, bid): | |
670861bd | 107 | _dict = { |
f1da8610 YCH |
108 | '1': 'h6', |
109 | '2': 'h5', | |
110 | '3': 'h4', | |
111 | '4': 'h3', | |
112 | '5': 'h2', | |
113 | '10': 'h1' | |
670861bd P |
114 | } |
115 | return _dict.get(str(bid), None) | |
116 | ||
117 | def get_bid(self, format_id): | |
118 | _dict = { | |
f1da8610 YCH |
119 | 'h6': '1', |
120 | 'h5': '2', | |
121 | 'h4': '3', | |
122 | 'h3': '4', | |
123 | 'h2': '5', | |
124 | 'h1': '10', | |
125 | 'best': 'best' | |
605ec701 | 126 | } |
670861bd | 127 | return _dict.get(format_id, None) |
605ec701 P |
128 | |
129 | def get_raw_data(self, tvid, video_id, enc_key, _uuid): | |
130 | tm = str(int(time.time())) | |
131 | param = { | |
132 | 'key': 'fvip', | |
133 | 'src': hashlib.md5(b'youtube-dl').hexdigest(), | |
134 | 'tvId': tvid, | |
135 | 'vid': video_id, | |
136 | 'vinfo': 1, | |
137 | 'tm': tm, | |
138 | 'enc': hashlib.md5( | |
139 | (enc_key + tm + tvid).encode('utf8')).hexdigest(), | |
140 | 'qyid': _uuid, | |
141 | 'tn': random.random(), | |
142 | 'um': 0, | |
143 | 'authkey': hashlib.md5( | |
144 | (tm + tvid).encode('utf8')).hexdigest() | |
145 | } | |
146 | ||
147 | api_url = 'http://cache.video.qiyi.com/vms' + '?' + \ | |
148 | compat_urllib_parse.urlencode(param) | |
149 | raw_data = self._download_json(api_url, video_id) | |
150 | return raw_data | |
151 | ||
152 | def get_enc_key(self, swf_url, video_id): | |
153 | req = self._request_webpage( | |
154 | swf_url, video_id, note='download swf content') | |
155 | cn = req.read() | |
156 | cn = zlib.decompress(cn[8:]) | |
157 | pt = re.compile(b'MixerRemote\x08(?P<enc_key>.+?)\$&vv') | |
158 | enc_key = self._search_regex(pt, cn, 'enc_key').decode('utf8') | |
159 | return enc_key | |
160 | ||
161 | def _real_extract(self, url): | |
162 | webpage = self._download_webpage( | |
163 | url, 'temp_id', note='download video page') | |
164 | tvid = self._search_regex( | |
29e7e078 | 165 | r'data-player-tvid\s*=\s*[\'"](\d+)', webpage, 'tvid') |
605ec701 | 166 | video_id = self._search_regex( |
29e7e078 | 167 | r'data-player-videoid\s*=\s*[\'"]([a-f\d]+)', webpage, 'video_id') |
605ec701 | 168 | swf_url = self._search_regex( |
29e7e078 | 169 | r'(http://.+?MainPlayer.+?\.swf)', webpage, 'swf player URL') |
605ec701 P |
170 | _uuid = uuid.uuid4().hex |
171 | ||
172 | enc_key = self.get_enc_key(swf_url, video_id) | |
173 | ||
174 | raw_data = self.get_raw_data(tvid, video_id, enc_key, _uuid) | |
aacda28b YCH |
175 | |
176 | if raw_data['code'] != 'A000000': | |
177 | raise ExtractorError('Unable to load data. Error code: ' + raw_data['code']) | |
178 | ||
605ec701 P |
179 | if not raw_data['data']['vp']['tkl']: |
180 | raise ExtractorError('No support iQiqy VIP video') | |
181 | ||
182 | data = raw_data['data'] | |
183 | ||
184 | title = data['vi']['vn'] | |
185 | ||
186 | # generate video_urls_dict | |
670861bd | 187 | video_urls_dict = self.construct_video_urls( |
7012620e | 188 | data, video_id, _uuid) |
605ec701 P |
189 | |
190 | # construct info | |
191 | entries = [] | |
192 | for format_id in video_urls_dict: | |
193 | video_urls = video_urls_dict[format_id] | |
194 | for i, video_url_info in enumerate(video_urls): | |
f1da8610 | 195 | if len(entries) < i + 1: |
605ec701 P |
196 | entries.append({'formats': []}) |
197 | entries[i]['formats'].append( | |
198 | { | |
199 | 'url': video_url_info[0], | |
200 | 'filesize': video_url_info[-1], | |
201 | 'format_id': format_id, | |
670861bd | 202 | 'preference': int(self.get_bid(format_id)) |
605ec701 P |
203 | } |
204 | ) | |
205 | ||
206 | for i in range(len(entries)): | |
670861bd | 207 | self._sort_formats(entries[i]['formats']) |
605ec701 P |
208 | entries[i].update( |
209 | { | |
c4ee8702 | 210 | 'id': '%s_part%d' % (video_id, i + 1), |
605ec701 P |
211 | 'title': title, |
212 | } | |
213 | ) | |
214 | ||
215 | if len(entries) > 1: | |
216 | info = { | |
217 | '_type': 'multi_video', | |
218 | 'id': video_id, | |
219 | 'title': title, | |
220 | 'entries': entries, | |
221 | } | |
222 | else: | |
223 | info = entries[0] | |
224 | info['id'] = video_id | |
225 | info['title'] = title | |
226 | ||
227 | return info |