]>
Commit | Line | Data |
---|---|---|
ddbd9035 | 1 | # coding: utf-8 |
8a32b82e PH |
2 | from __future__ import unicode_literals |
3 | ||
f9355dc9 | 4 | import base64 |
034caf70 YCH |
5 | import random |
6 | import string | |
7 | import time | |
9c286cfa PH |
8 | |
9 | from .common import InfoExtractor | |
c203be3f YCH |
10 | from ..compat import ( |
11 | compat_urllib_parse, | |
12 | compat_ord, | |
5c2266df S |
13 | ) |
14 | from ..utils import ( | |
15 | ExtractorError, | |
16 | sanitized_Request, | |
c203be3f | 17 | ) |
1498940b | 18 | |
aed473cc | 19 | |
9c286cfa | 20 | class YoukuIE(InfoExtractor): |
f9355dc9 | 21 | IE_NAME = 'youku' |
246995db | 22 | IE_DESC = '优酷' |
8a32b82e PH |
23 | _VALID_URL = r'''(?x) |
24 | (?: | |
25 | http://(?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)| | |
26 | youku:) | |
27 | (?P<id>[A-Za-z0-9]+)(?:\.html|/v\.swf|) | |
28 | ''' | |
f9355dc9 | 29 | |
ee697992 | 30 | _TESTS = [{ |
c683454e | 31 | # MD5 is unstable |
aed473cc | 32 | 'url': 'http://v.youku.com/v_show/id_XMTc1ODE5Njcy.html', |
aed473cc | 33 | 'info_dict': { |
f1e66cb2 | 34 | 'id': 'XMTc1ODE5Njcy_part1', |
aed473cc YCH |
35 | 'title': '★Smile﹗♡ Git Fresh -Booty Music舞蹈.', |
36 | 'ext': 'flv' | |
37 | } | |
ee697992 YCH |
38 | }, { |
39 | 'url': 'http://player.youku.com/player.php/sid/XNDgyMDQ2NTQw/v.swf', | |
40 | 'only_matching': True, | |
f1e66cb2 YCH |
41 | }, { |
42 | 'url': 'http://v.youku.com/v_show/id_XODgxNjg1Mzk2_ev_1.html', | |
43 | 'info_dict': { | |
44 | 'id': 'XODgxNjg1Mzk2', | |
45 | 'title': '武媚娘传奇 85', | |
46 | }, | |
47 | 'playlist_count': 11, | |
4d77550c | 48 | 'skip': 'Available in China only', |
5228b756 YCH |
49 | }, { |
50 | 'url': 'http://v.youku.com/v_show/id_XMTI1OTczNDM5Mg==.html', | |
51 | 'info_dict': { | |
52 | 'id': 'XMTI1OTczNDM5Mg', | |
53 | 'title': '花千骨 04', | |
54 | }, | |
55 | 'playlist_count': 13, | |
33eae08f P |
56 | }, { |
57 | 'url': 'http://v.youku.com/v_show/id_XNjA1NzA2Njgw.html', | |
58 | 'note': 'Video protected with password', | |
59 | 'info_dict': { | |
60 | 'id': 'XNjA1NzA2Njgw', | |
5ddc127d | 61 | 'title': '邢義田复旦讲座之想象中的胡人—从“左衽孔子”说起', |
33eae08f | 62 | }, |
cd5d7542 | 63 | 'playlist_count': 19, |
33eae08f P |
64 | 'params': { |
65 | 'videopassword': '100600', | |
66 | }, | |
ee697992 | 67 | }] |
67f51b3d | 68 | |
7e37c394 | 69 | def construct_video_urls(self, data): |
f9355dc9 P |
70 | # get sid, token |
71 | def yk_t(s1, s2): | |
72 | ls = list(range(256)) | |
73 | t = 0 | |
74 | for i in range(256): | |
c203be3f | 75 | t = (t + ls[i] + compat_ord(s1[i % len(s1)])) % 256 |
f9355dc9 | 76 | ls[i], ls[t] = ls[t], ls[i] |
c203be3f | 77 | s = bytearray() |
ca452466 | 78 | x, y = 0, 0 |
f9355dc9 P |
79 | for i in range(len(s2)): |
80 | y = (y + 1) % 256 | |
81 | x = (x + ls[y]) % 256 | |
82 | ls[x], ls[y] = ls[y], ls[x] | |
c203be3f YCH |
83 | s.append(compat_ord(s2[i]) ^ ls[(ls[x] + ls[y]) % 256]) |
84 | return bytes(s) | |
f9355dc9 P |
85 | |
86 | sid, token = yk_t( | |
7e37c394 | 87 | b'becaf9be', base64.b64decode(data['security']['encrypt_string'].encode('ascii')) |
c203be3f | 88 | ).decode('ascii').split('_') |
f9355dc9 P |
89 | |
90 | # get oip | |
7e37c394 | 91 | oip = data['security']['ip'] |
f9355dc9 | 92 | |
f9355dc9 | 93 | fileid_dict = {} |
7e37c394 | 94 | for stream in data['stream']: |
fdf01663 | 95 | format = stream.get('stream_type') |
fdf01663 C |
96 | fileid = stream['stream_fileid'] |
97 | fileid_dict[format] = fileid | |
f9355dc9 P |
98 | |
99 | def get_fileid(format, n): | |
5333842a C |
100 | number = hex(int(str(n), 10))[2:].upper() |
101 | if len(number) == 1: | |
102 | number = '0' + number | |
103 | streamfileids = fileid_dict[format] | |
104 | fileid = streamfileids[0:8] + number + streamfileids[10:] | |
f9355dc9 P |
105 | return fileid |
106 | ||
107 | # get ep | |
108 | def generate_ep(format, n): | |
109 | fileid = get_fileid(format, n) | |
110 | ep_t = yk_t( | |
c203be3f YCH |
111 | b'bf7e5f01', |
112 | ('%s_%s_%s' % (sid, fileid, token)).encode('ascii') | |
ca452466 | 113 | ) |
c203be3f | 114 | ep = base64.b64encode(ep_t).decode('ascii') |
f9355dc9 P |
115 | return ep |
116 | ||
117 | # generate video_urls | |
118 | video_urls_dict = {} | |
7e37c394 | 119 | for stream in data['stream']: |
fdf01663 | 120 | format = stream.get('stream_type') |
f9355dc9 | 121 | video_urls = [] |
fdf01663 | 122 | for dt in stream['segs']: |
98c3806b | 123 | n = str(stream['segs'].index(dt)) |
1498940b | 124 | param = { |
fdf01663 | 125 | 'K': dt['key'], |
1498940b P |
126 | 'hd': self.get_hd(format), |
127 | 'myp': 0, | |
1498940b P |
128 | 'ypp': 0, |
129 | 'ctype': 12, | |
130 | 'ev': 1, | |
131 | 'token': token, | |
132 | 'oip': oip, | |
133 | 'ep': generate_ep(format, n) | |
134 | } | |
f9355dc9 P |
135 | video_url = \ |
136 | 'http://k.youku.com/player/getFlvPath/' + \ | |
137 | 'sid/' + sid + \ | |
f133fd32 | 138 | '_00' + \ |
f9355dc9 | 139 | '/st/' + self.parse_ext_l(format) + \ |
aed473cc | 140 | '/fileid/' + get_fileid(format, n) + '?' + \ |
1498940b | 141 | compat_urllib_parse.urlencode(param) |
f9355dc9 P |
142 | video_urls.append(video_url) |
143 | video_urls_dict[format] = video_urls | |
144 | ||
145 | return video_urls_dict | |
146 | ||
034caf70 YCH |
147 | @staticmethod |
148 | def get_ysuid(): | |
149 | return '%d%s' % (int(time.time()), ''.join([ | |
150 | random.choice(string.ascii_letters) for i in range(3)])) | |
151 | ||
f9355dc9 P |
152 | def get_hd(self, fm): |
153 | hd_id_dict = { | |
aed473cc | 154 | '3gp': '0', |
fdf01663 | 155 | '3gphd': '1', |
dbb7d7e2 | 156 | 'flv': '0', |
8696a7fd | 157 | 'flvhd': '0', |
dbb7d7e2 | 158 | 'mp4': '1', |
8696a7fd | 159 | 'mp4hd': '1', |
dbb7d7e2 | 160 | 'mp4hd2': '1', |
deb1e8d2 | 161 | 'mp4hd3': '1', |
dbb7d7e2 YCH |
162 | 'hd2': '2', |
163 | 'hd3': '3', | |
f9355dc9 P |
164 | } |
165 | return hd_id_dict[fm] | |
166 | ||
167 | def parse_ext_l(self, fm): | |
168 | ext_dict = { | |
dbb7d7e2 YCH |
169 | '3gp': 'flv', |
170 | '3gphd': 'mp4', | |
aed473cc | 171 | 'flv': 'flv', |
dbb7d7e2 | 172 | 'flvhd': 'flv', |
aed473cc | 173 | 'mp4': 'mp4', |
98c3806b | 174 | 'mp4hd': 'mp4', |
8696a7fd C |
175 | 'mp4hd2': 'flv', |
176 | 'mp4hd3': 'flv', | |
aed473cc YCH |
177 | 'hd2': 'flv', |
178 | 'hd3': 'flv', | |
f9355dc9 P |
179 | } |
180 | return ext_dict[fm] | |
9c286cfa | 181 | |
08f7db20 P |
182 | def get_format_name(self, fm): |
183 | _dict = { | |
aed473cc YCH |
184 | '3gp': 'h6', |
185 | '3gphd': 'h5', | |
186 | 'flv': 'h4', | |
dbb7d7e2 | 187 | 'flvhd': 'h4', |
aed473cc | 188 | 'mp4': 'h3', |
8696a7fd | 189 | 'mp4hd': 'h3', |
dbb7d7e2 | 190 | 'mp4hd2': 'h4', |
8696a7fd | 191 | 'mp4hd3': 'h4', |
dbb7d7e2 YCH |
192 | 'hd2': 'h2', |
193 | 'hd3': 'h1', | |
08f7db20 P |
194 | } |
195 | return _dict[fm] | |
196 | ||
9c286cfa | 197 | def _real_extract(self, url): |
9383e66f | 198 | video_id = self._match_id(url) |
9c286cfa | 199 | |
034caf70 YCH |
200 | self._set_cookie('youku.com', '__ysuid', self.get_ysuid()) |
201 | ||
5228b756 | 202 | def retrieve_data(req_url, note): |
51094b1b | 203 | headers = { |
f133fd32 YCH |
204 | 'Referer': req_url, |
205 | } | |
206 | self._set_cookie('youku.com', 'xreferrer', 'http://www.youku.com') | |
207 | req = sanitized_Request(req_url, headers=headers) | |
9c286cfa | 208 | |
5228b756 YCH |
209 | cn_verification_proxy = self._downloader.params.get('cn_verification_proxy') |
210 | if cn_verification_proxy: | |
211 | req.add_header('Ytdl-request-proxy', cn_verification_proxy) | |
212 | ||
213 | raw_data = self._download_json(req, video_id, note=note) | |
51094b1b | 214 | |
fdf01663 | 215 | return raw_data['data'] |
51094b1b | 216 | |
33eae08f P |
217 | video_password = self._downloader.params.get('videopassword', None) |
218 | ||
5228b756 | 219 | # request basic data |
51094b1b | 220 | basic_data_url = "http://play.youku.com/play/get.json?vid=%s&ct=12" % video_id |
33eae08f | 221 | if video_password: |
14c17caf | 222 | basic_data_url += '&pwd=%s' % video_password |
cb3d2eb9 | 223 | |
ade23409 | 224 | data = retrieve_data(basic_data_url, 'Downloading JSON metadata') |
8a32b82e | 225 | |
7e37c394 | 226 | error = data.get('error') |
14c17caf C |
227 | if error: |
228 | error_note = error.get('note') | |
229 | if error_note is not None and '因版权原因无法观看此视频' in error_note: | |
04e75966 YCH |
230 | raise ExtractorError( |
231 | 'Youku said: Sorry, this video is available in China only', expected=True) | |
232 | else: | |
14c17caf | 233 | msg = 'Youku server reported error %i' % error.get('code') |
35e22b6b | 234 | if error_note is not None: |
14c17caf | 235 | msg += ': ' + error_note |
04e75966 | 236 | raise ExtractorError(msg) |
f9355dc9 | 237 | |
f133fd32 | 238 | # get video title |
7e37c394 | 239 | title = data['video']['title'] |
f9355dc9 P |
240 | |
241 | # generate video_urls_dict | |
7e37c394 | 242 | video_urls_dict = self.construct_video_urls(data) |
f9355dc9 P |
243 | |
244 | # construct info | |
f3aecb27 JMF |
245 | entries = [{ |
246 | 'id': '%s_part%d' % (video_id, i + 1), | |
247 | 'title': title, | |
248 | 'formats': [], | |
249 | # some formats are not available for all parts, we have to detect | |
250 | # which one has all | |
7e37c394 C |
251 | } for i in range(max(len(v.get('segs')) for v in data['stream']))] |
252 | for stream in data['stream']: | |
fdf01663 | 253 | fm = stream.get('stream_type') |
f9355dc9 | 254 | video_urls = video_urls_dict[fm] |
fdf01663 | 255 | for video_url, seg, entry in zip(video_urls, stream['segs'], entries): |
f3aecb27 JMF |
256 | entry['formats'].append({ |
257 | 'url': video_url, | |
a155b7e7 YCH |
258 | 'format_id': self.get_format_name(fm), |
259 | 'ext': self.parse_ext_l(fm), | |
f3aecb27 | 260 | 'filesize': int(seg['size']), |
a155b7e7 | 261 | }) |
f9355dc9 | 262 | |
f1e66cb2 YCH |
263 | return { |
264 | '_type': 'multi_video', | |
265 | 'id': video_id, | |
266 | 'title': title, | |
267 | 'entries': entries, | |
268 | } |