]>
Commit | Line | Data |
---|---|---|
ddbd9035 | 1 | # coding: utf-8 |
8a32b82e PH |
2 | from __future__ import unicode_literals |
3 | ||
f9355dc9 | 4 | import base64 |
9c286cfa PH |
5 | |
6 | from .common import InfoExtractor | |
f9355dc9 | 7 | from ..utils import ExtractorError |
9c286cfa | 8 | |
c203be3f YCH |
9 | from ..compat import ( |
10 | compat_urllib_parse, | |
11 | compat_ord, | |
5228b756 | 12 | compat_urllib_request, |
c203be3f | 13 | ) |
1498940b | 14 | |
aed473cc | 15 | |
9c286cfa | 16 | class YoukuIE(InfoExtractor): |
f9355dc9 | 17 | IE_NAME = 'youku' |
246995db | 18 | IE_DESC = '优酷' |
8a32b82e PH |
19 | _VALID_URL = r'''(?x) |
20 | (?: | |
21 | http://(?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)| | |
22 | youku:) | |
23 | (?P<id>[A-Za-z0-9]+)(?:\.html|/v\.swf|) | |
24 | ''' | |
f9355dc9 | 25 | |
ee697992 | 26 | _TESTS = [{ |
aed473cc YCH |
27 | 'url': 'http://v.youku.com/v_show/id_XMTc1ODE5Njcy.html', |
28 | 'md5': '5f3af4192eabacc4501508d54a8cabd7', | |
29 | 'info_dict': { | |
f1e66cb2 | 30 | 'id': 'XMTc1ODE5Njcy_part1', |
aed473cc YCH |
31 | 'title': '★Smile﹗♡ Git Fresh -Booty Music舞蹈.', |
32 | 'ext': 'flv' | |
33 | } | |
ee697992 YCH |
34 | }, { |
35 | 'url': 'http://player.youku.com/player.php/sid/XNDgyMDQ2NTQw/v.swf', | |
36 | 'only_matching': True, | |
f1e66cb2 YCH |
37 | }, { |
38 | 'url': 'http://v.youku.com/v_show/id_XODgxNjg1Mzk2_ev_1.html', | |
39 | 'info_dict': { | |
40 | 'id': 'XODgxNjg1Mzk2', | |
41 | 'title': '武媚娘传奇 85', | |
42 | }, | |
43 | 'playlist_count': 11, | |
5228b756 YCH |
44 | }, { |
45 | 'url': 'http://v.youku.com/v_show/id_XMTI1OTczNDM5Mg==.html', | |
46 | 'info_dict': { | |
47 | 'id': 'XMTI1OTczNDM5Mg', | |
48 | 'title': '花千骨 04', | |
49 | }, | |
50 | 'playlist_count': 13, | |
51 | 'skip': 'Available in China only', | |
ee697992 | 52 | }] |
67f51b3d | 53 | |
f9355dc9 P |
54 | def construct_video_urls(self, data1, data2): |
55 | # get sid, token | |
56 | def yk_t(s1, s2): | |
57 | ls = list(range(256)) | |
58 | t = 0 | |
59 | for i in range(256): | |
c203be3f | 60 | t = (t + ls[i] + compat_ord(s1[i % len(s1)])) % 256 |
f9355dc9 | 61 | ls[i], ls[t] = ls[t], ls[i] |
c203be3f | 62 | s = bytearray() |
ca452466 | 63 | x, y = 0, 0 |
f9355dc9 P |
64 | for i in range(len(s2)): |
65 | y = (y + 1) % 256 | |
66 | x = (x + ls[y]) % 256 | |
67 | ls[x], ls[y] = ls[y], ls[x] | |
c203be3f YCH |
68 | s.append(compat_ord(s2[i]) ^ ls[(ls[x] + ls[y]) % 256]) |
69 | return bytes(s) | |
f9355dc9 P |
70 | |
71 | sid, token = yk_t( | |
c203be3f YCH |
72 | b'becaf9be', base64.b64decode(data2['ep'].encode('ascii')) |
73 | ).decode('ascii').split('_') | |
f9355dc9 P |
74 | |
75 | # get oip | |
76 | oip = data2['ip'] | |
77 | ||
78 | # get fileid | |
79 | string_ls = list( | |
80 | 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890') | |
81 | shuffled_string_ls = [] | |
82 | seed = data1['seed'] | |
83 | N = len(string_ls) | |
84 | for ii in range(N): | |
85 | seed = (seed * 0xd3 + 0x754f) % 0x10000 | |
86 | idx = seed * len(string_ls) // 0x10000 | |
87 | shuffled_string_ls.append(string_ls[idx]) | |
88 | del string_ls[idx] | |
89 | ||
90 | fileid_dict = {} | |
91 | for format in data1['streamtypes']: | |
92 | streamfileid = [ | |
93 | int(i) for i in data1['streamfileids'][format].strip('*').split('*')] | |
94 | fileid = ''.join( | |
95 | [shuffled_string_ls[i] for i in streamfileid]) | |
96 | fileid_dict[format] = fileid[:8] + '%s' + fileid[10:] | |
97 | ||
98 | def get_fileid(format, n): | |
99 | fileid = fileid_dict[format] % hex(int(n))[2:].upper().zfill(2) | |
100 | return fileid | |
101 | ||
102 | # get ep | |
103 | def generate_ep(format, n): | |
104 | fileid = get_fileid(format, n) | |
105 | ep_t = yk_t( | |
c203be3f YCH |
106 | b'bf7e5f01', |
107 | ('%s_%s_%s' % (sid, fileid, token)).encode('ascii') | |
ca452466 | 108 | ) |
c203be3f | 109 | ep = base64.b64encode(ep_t).decode('ascii') |
f9355dc9 P |
110 | return ep |
111 | ||
112 | # generate video_urls | |
113 | video_urls_dict = {} | |
114 | for format in data1['streamtypes']: | |
115 | video_urls = [] | |
116 | for dt in data1['segs'][format]: | |
117 | n = str(int(dt['no'])) | |
1498940b P |
118 | param = { |
119 | 'K': dt['k'], | |
120 | 'hd': self.get_hd(format), | |
121 | 'myp': 0, | |
122 | 'ts': dt['seconds'], | |
123 | 'ypp': 0, | |
124 | 'ctype': 12, | |
125 | 'ev': 1, | |
126 | 'token': token, | |
127 | 'oip': oip, | |
128 | 'ep': generate_ep(format, n) | |
129 | } | |
f9355dc9 P |
130 | video_url = \ |
131 | 'http://k.youku.com/player/getFlvPath/' + \ | |
132 | 'sid/' + sid + \ | |
aed473cc | 133 | '_' + str(int(n) + 1).zfill(2) + \ |
f9355dc9 | 134 | '/st/' + self.parse_ext_l(format) + \ |
aed473cc | 135 | '/fileid/' + get_fileid(format, n) + '?' + \ |
1498940b | 136 | compat_urllib_parse.urlencode(param) |
f9355dc9 P |
137 | video_urls.append(video_url) |
138 | video_urls_dict[format] = video_urls | |
139 | ||
140 | return video_urls_dict | |
141 | ||
142 | def get_hd(self, fm): | |
143 | hd_id_dict = { | |
aed473cc YCH |
144 | 'flv': '0', |
145 | 'mp4': '1', | |
146 | 'hd2': '2', | |
147 | 'hd3': '3', | |
148 | '3gp': '0', | |
149 | '3gphd': '1' | |
f9355dc9 P |
150 | } |
151 | return hd_id_dict[fm] | |
152 | ||
153 | def parse_ext_l(self, fm): | |
154 | ext_dict = { | |
aed473cc YCH |
155 | 'flv': 'flv', |
156 | 'mp4': 'mp4', | |
157 | 'hd2': 'flv', | |
158 | 'hd3': 'flv', | |
159 | '3gp': 'flv', | |
160 | '3gphd': 'mp4' | |
f9355dc9 P |
161 | } |
162 | return ext_dict[fm] | |
9c286cfa | 163 | |
08f7db20 P |
164 | def get_format_name(self, fm): |
165 | _dict = { | |
aed473cc YCH |
166 | '3gp': 'h6', |
167 | '3gphd': 'h5', | |
168 | 'flv': 'h4', | |
169 | 'mp4': 'h3', | |
170 | 'hd2': 'h2', | |
171 | 'hd3': 'h1' | |
08f7db20 P |
172 | } |
173 | return _dict[fm] | |
174 | ||
9c286cfa | 175 | def _real_extract(self, url): |
9383e66f | 176 | video_id = self._match_id(url) |
9c286cfa | 177 | |
5228b756 YCH |
178 | def retrieve_data(req_url, note): |
179 | req = compat_urllib_request.Request(req_url) | |
9c286cfa | 180 | |
5228b756 YCH |
181 | cn_verification_proxy = self._downloader.params.get('cn_verification_proxy') |
182 | if cn_verification_proxy: | |
183 | req.add_header('Ytdl-request-proxy', cn_verification_proxy) | |
184 | ||
185 | raw_data = self._download_json(req, video_id, note=note) | |
186 | return raw_data['data'][0] | |
187 | ||
188 | # request basic data | |
189 | data1 = retrieve_data( | |
190 | 'http://v.youku.com/player/getPlayList/VideoIDS/%s' % video_id, | |
191 | 'Downloading JSON metadata 1') | |
192 | data2 = retrieve_data( | |
193 | 'http://v.youku.com/player/getPlayList/VideoIDS/%s/Pf/4/ctype/12/ev/1' % video_id, | |
194 | 'Downloading JSON metadata 2') | |
8a32b82e | 195 | |
f9355dc9 | 196 | error_code = data1.get('error_code') |
8a32b82e | 197 | if error_code: |
f9355dc9 | 198 | error = data1.get('error') |
04e75966 YCH |
199 | if error is not None and '因版权原因无法观看此视频' in error: |
200 | raise ExtractorError( | |
201 | 'Youku said: Sorry, this video is available in China only', expected=True) | |
202 | else: | |
203 | msg = 'Youku server reported error %i' % error_code | |
204 | if error is not None: | |
205 | msg += ': ' + error | |
206 | raise ExtractorError(msg) | |
f9355dc9 P |
207 | |
208 | title = data1['title'] | |
209 | ||
210 | # generate video_urls_dict | |
211 | video_urls_dict = self.construct_video_urls(data1, data2) | |
212 | ||
213 | # construct info | |
f3aecb27 JMF |
214 | entries = [{ |
215 | 'id': '%s_part%d' % (video_id, i + 1), | |
216 | 'title': title, | |
217 | 'formats': [], | |
218 | # some formats are not available for all parts, we have to detect | |
219 | # which one has all | |
220 | } for i in range(max(len(v) for v in data1['segs'].values()))] | |
f9355dc9 | 221 | for fm in data1['streamtypes']: |
f9355dc9 | 222 | video_urls = video_urls_dict[fm] |
f3aecb27 JMF |
223 | for video_url, seg, entry in zip(video_urls, data1['segs'][fm], entries): |
224 | entry['formats'].append({ | |
225 | 'url': video_url, | |
a155b7e7 YCH |
226 | 'format_id': self.get_format_name(fm), |
227 | 'ext': self.parse_ext_l(fm), | |
f3aecb27 | 228 | 'filesize': int(seg['size']), |
a155b7e7 | 229 | }) |
f9355dc9 | 230 | |
f1e66cb2 YCH |
231 | return { |
232 | '_type': 'multi_video', | |
233 | 'id': video_id, | |
234 | 'title': title, | |
235 | 'entries': entries, | |
236 | } |