]>
Commit | Line | Data |
---|---|---|
1 | # coding: utf-8 | |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import base64 | |
5 | import random | |
6 | import string | |
7 | import time | |
8 | ||
9 | from .common import InfoExtractor | |
10 | from ..compat import ( | |
11 | compat_urllib_parse, | |
12 | compat_ord, | |
13 | ) | |
14 | from ..utils import ( | |
15 | ExtractorError, | |
16 | sanitized_Request, | |
17 | ) | |
18 | ||
19 | ||
20 | class YoukuIE(InfoExtractor): | |
21 | IE_NAME = 'youku' | |
22 | IE_DESC = '优酷' | |
23 | _VALID_URL = r'''(?x) | |
24 | (?: | |
25 | http://(?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)| | |
26 | youku:) | |
27 | (?P<id>[A-Za-z0-9]+)(?:\.html|/v\.swf|) | |
28 | ''' | |
29 | ||
30 | _TESTS = [{ | |
31 | # MD5 is unstable | |
32 | 'url': 'http://v.youku.com/v_show/id_XMTc1ODE5Njcy.html', | |
33 | 'info_dict': { | |
34 | 'id': 'XMTc1ODE5Njcy_part1', | |
35 | 'title': '★Smile﹗♡ Git Fresh -Booty Music舞蹈.', | |
36 | 'ext': 'flv' | |
37 | } | |
38 | }, { | |
39 | 'url': 'http://player.youku.com/player.php/sid/XNDgyMDQ2NTQw/v.swf', | |
40 | 'only_matching': True, | |
41 | }, { | |
42 | 'url': 'http://v.youku.com/v_show/id_XODgxNjg1Mzk2_ev_1.html', | |
43 | 'info_dict': { | |
44 | 'id': 'XODgxNjg1Mzk2', | |
45 | 'title': '武媚娘传奇 85', | |
46 | }, | |
47 | 'playlist_count': 11, | |
48 | 'skip': 'Available in China only', | |
49 | }, { | |
50 | 'url': 'http://v.youku.com/v_show/id_XMTI1OTczNDM5Mg==.html', | |
51 | 'info_dict': { | |
52 | 'id': 'XMTI1OTczNDM5Mg', | |
53 | 'title': '花千骨 04', | |
54 | }, | |
55 | 'playlist_count': 13, | |
56 | }, { | |
57 | 'url': 'http://v.youku.com/v_show/id_XNjA1NzA2Njgw.html', | |
58 | 'note': 'Video protected with password', | |
59 | 'info_dict': { | |
60 | 'id': 'XNjA1NzA2Njgw', | |
61 | 'title': '邢義田复旦讲座之想象中的胡人—从“左衽孔子”说起', | |
62 | }, | |
63 | 'playlist_count': 19, | |
64 | 'params': { | |
65 | 'videopassword': '100600', | |
66 | }, | |
67 | }] | |
68 | ||
69 | def construct_video_urls(self, data): | |
70 | # get sid, token | |
71 | def yk_t(s1, s2): | |
72 | ls = list(range(256)) | |
73 | t = 0 | |
74 | for i in range(256): | |
75 | t = (t + ls[i] + compat_ord(s1[i % len(s1)])) % 256 | |
76 | ls[i], ls[t] = ls[t], ls[i] | |
77 | s = bytearray() | |
78 | x, y = 0, 0 | |
79 | for i in range(len(s2)): | |
80 | y = (y + 1) % 256 | |
81 | x = (x + ls[y]) % 256 | |
82 | ls[x], ls[y] = ls[y], ls[x] | |
83 | s.append(compat_ord(s2[i]) ^ ls[(ls[x] + ls[y]) % 256]) | |
84 | return bytes(s) | |
85 | ||
86 | sid, token = yk_t( | |
87 | b'becaf9be', base64.b64decode(data['security']['encrypt_string'].encode('ascii')) | |
88 | ).decode('ascii').split('_') | |
89 | ||
90 | # get oip | |
91 | oip = data['security']['ip'] | |
92 | ||
93 | fileid_dict = {} | |
94 | for stream in data['stream']: | |
95 | format = stream.get('stream_type') | |
96 | fileid = stream['stream_fileid'] | |
97 | fileid_dict[format] = fileid | |
98 | ||
99 | def get_fileid(format, n): | |
100 | number = hex(int(str(n), 10))[2:].upper() | |
101 | if len(number) == 1: | |
102 | number = '0' + number | |
103 | streamfileids = fileid_dict[format] | |
104 | fileid = streamfileids[0:8] + number + streamfileids[10:] | |
105 | return fileid | |
106 | ||
107 | # get ep | |
108 | def generate_ep(format, n): | |
109 | fileid = get_fileid(format, n) | |
110 | ep_t = yk_t( | |
111 | b'bf7e5f01', | |
112 | ('%s_%s_%s' % (sid, fileid, token)).encode('ascii') | |
113 | ) | |
114 | ep = base64.b64encode(ep_t).decode('ascii') | |
115 | return ep | |
116 | ||
117 | # generate video_urls | |
118 | video_urls_dict = {} | |
119 | for stream in data['stream']: | |
120 | format = stream.get('stream_type') | |
121 | video_urls = [] | |
122 | for dt in stream['segs']: | |
123 | n = str(stream['segs'].index(dt)) | |
124 | param = { | |
125 | 'K': dt['key'], | |
126 | 'hd': self.get_hd(format), | |
127 | 'myp': 0, | |
128 | 'ypp': 0, | |
129 | 'ctype': 12, | |
130 | 'ev': 1, | |
131 | 'token': token, | |
132 | 'oip': oip, | |
133 | 'ep': generate_ep(format, n) | |
134 | } | |
135 | video_url = \ | |
136 | 'http://k.youku.com/player/getFlvPath/' + \ | |
137 | 'sid/' + sid + \ | |
138 | '_00' + \ | |
139 | '/st/' + self.parse_ext_l(format) + \ | |
140 | '/fileid/' + get_fileid(format, n) + '?' + \ | |
141 | compat_urllib_parse.urlencode(param) | |
142 | video_urls.append(video_url) | |
143 | video_urls_dict[format] = video_urls | |
144 | ||
145 | return video_urls_dict | |
146 | ||
147 | @staticmethod | |
148 | def get_ysuid(): | |
149 | return '%d%s' % (int(time.time()), ''.join([ | |
150 | random.choice(string.ascii_letters) for i in range(3)])) | |
151 | ||
152 | def get_hd(self, fm): | |
153 | hd_id_dict = { | |
154 | '3gp': '0', | |
155 | '3gphd': '1', | |
156 | 'flv': '0', | |
157 | 'flvhd': '0', | |
158 | 'mp4': '1', | |
159 | 'mp4hd': '1', | |
160 | 'mp4hd2': '1', | |
161 | 'mp4hd3': '1', | |
162 | 'hd2': '2', | |
163 | 'hd3': '3', | |
164 | } | |
165 | return hd_id_dict[fm] | |
166 | ||
167 | def parse_ext_l(self, fm): | |
168 | ext_dict = { | |
169 | '3gp': 'flv', | |
170 | '3gphd': 'mp4', | |
171 | 'flv': 'flv', | |
172 | 'flvhd': 'flv', | |
173 | 'mp4': 'mp4', | |
174 | 'mp4hd': 'mp4', | |
175 | 'mp4hd2': 'flv', | |
176 | 'mp4hd3': 'flv', | |
177 | 'hd2': 'flv', | |
178 | 'hd3': 'flv', | |
179 | } | |
180 | return ext_dict[fm] | |
181 | ||
182 | def get_format_name(self, fm): | |
183 | _dict = { | |
184 | '3gp': 'h6', | |
185 | '3gphd': 'h5', | |
186 | 'flv': 'h4', | |
187 | 'flvhd': 'h4', | |
188 | 'mp4': 'h3', | |
189 | 'mp4hd': 'h3', | |
190 | 'mp4hd2': 'h4', | |
191 | 'mp4hd3': 'h4', | |
192 | 'hd2': 'h2', | |
193 | 'hd3': 'h1', | |
194 | } | |
195 | return _dict[fm] | |
196 | ||
197 | def _real_extract(self, url): | |
198 | video_id = self._match_id(url) | |
199 | ||
200 | self._set_cookie('youku.com', '__ysuid', self.get_ysuid()) | |
201 | ||
202 | def retrieve_data(req_url, note): | |
203 | headers = { | |
204 | 'Referer': req_url, | |
205 | } | |
206 | self._set_cookie('youku.com', 'xreferrer', 'http://www.youku.com') | |
207 | req = sanitized_Request(req_url, headers=headers) | |
208 | ||
209 | cn_verification_proxy = self._downloader.params.get('cn_verification_proxy') | |
210 | if cn_verification_proxy: | |
211 | req.add_header('Ytdl-request-proxy', cn_verification_proxy) | |
212 | ||
213 | raw_data = self._download_json(req, video_id, note=note) | |
214 | ||
215 | return raw_data['data'] | |
216 | ||
217 | video_password = self._downloader.params.get('videopassword', None) | |
218 | ||
219 | # request basic data | |
220 | basic_data_url = "http://play.youku.com/play/get.json?vid=%s&ct=12" % video_id | |
221 | if video_password: | |
222 | basic_data_url += '&pwd=%s' % video_password | |
223 | ||
224 | data = retrieve_data(basic_data_url, 'Downloading JSON metadata') | |
225 | ||
226 | error = data.get('error') | |
227 | if error: | |
228 | error_note = error.get('note') | |
229 | if error_note is not None and '因版权原因无法观看此视频' in error_note: | |
230 | raise ExtractorError( | |
231 | 'Youku said: Sorry, this video is available in China only', expected=True) | |
232 | else: | |
233 | msg = 'Youku server reported error %i' % error.get('code') | |
234 | if error_note is not None: | |
235 | msg += ': ' + error_note | |
236 | raise ExtractorError(msg) | |
237 | ||
238 | # get video title | |
239 | title = data['video']['title'] | |
240 | ||
241 | # generate video_urls_dict | |
242 | video_urls_dict = self.construct_video_urls(data) | |
243 | ||
244 | # construct info | |
245 | entries = [{ | |
246 | 'id': '%s_part%d' % (video_id, i + 1), | |
247 | 'title': title, | |
248 | 'formats': [], | |
249 | # some formats are not available for all parts, we have to detect | |
250 | # which one has all | |
251 | } for i in range(max(len(v.get('segs')) for v in data['stream']))] | |
252 | for stream in data['stream']: | |
253 | fm = stream.get('stream_type') | |
254 | video_urls = video_urls_dict[fm] | |
255 | for video_url, seg, entry in zip(video_urls, stream['segs'], entries): | |
256 | entry['formats'].append({ | |
257 | 'url': video_url, | |
258 | 'format_id': self.get_format_name(fm), | |
259 | 'ext': self.parse_ext_l(fm), | |
260 | 'filesize': int(seg['size']), | |
261 | }) | |
262 | ||
263 | return { | |
264 | '_type': 'multi_video', | |
265 | 'id': video_id, | |
266 | 'title': title, | |
267 | 'entries': entries, | |
268 | } |