]>
Commit | Line | Data |
---|---|---|
ddbd9035 | 1 | # coding: utf-8 |
8a32b82e PH |
2 | from __future__ import unicode_literals |
3 | ||
f9355dc9 | 4 | import base64 |
411c590a | 5 | import itertools |
034caf70 | 6 | import random |
411c590a | 7 | import re |
034caf70 YCH |
8 | import string |
9 | import time | |
9c286cfa PH |
10 | |
11 | from .common import InfoExtractor | |
c203be3f | 12 | from ..compat import ( |
15707c7e | 13 | compat_urllib_parse_urlencode, |
c203be3f | 14 | compat_ord, |
5c2266df S |
15 | ) |
16 | from ..utils import ( | |
17 | ExtractorError, | |
411c590a | 18 | get_element_by_attribute, |
c203be3f | 19 | ) |
1498940b | 20 | |
aed473cc | 21 | |
9c286cfa | 22 | class YoukuIE(InfoExtractor): |
f9355dc9 | 23 | IE_NAME = 'youku' |
246995db | 24 | IE_DESC = '优酷' |
8a32b82e PH |
25 | _VALID_URL = r'''(?x) |
26 | (?: | |
27 | http://(?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)| | |
28 | youku:) | |
29 | (?P<id>[A-Za-z0-9]+)(?:\.html|/v\.swf|) | |
30 | ''' | |
f9355dc9 | 31 | |
ee697992 | 32 | _TESTS = [{ |
c683454e | 33 | # MD5 is unstable |
aed473cc | 34 | 'url': 'http://v.youku.com/v_show/id_XMTc1ODE5Njcy.html', |
aed473cc | 35 | 'info_dict': { |
f1e66cb2 | 36 | 'id': 'XMTc1ODE5Njcy_part1', |
aed473cc YCH |
37 | 'title': '★Smile﹗♡ Git Fresh -Booty Music舞蹈.', |
38 | 'ext': 'flv' | |
39 | } | |
ee697992 YCH |
40 | }, { |
41 | 'url': 'http://player.youku.com/player.php/sid/XNDgyMDQ2NTQw/v.swf', | |
42 | 'only_matching': True, | |
f1e66cb2 YCH |
43 | }, { |
44 | 'url': 'http://v.youku.com/v_show/id_XODgxNjg1Mzk2_ev_1.html', | |
45 | 'info_dict': { | |
46 | 'id': 'XODgxNjg1Mzk2', | |
47 | 'title': '武媚娘传奇 85', | |
48 | }, | |
49 | 'playlist_count': 11, | |
4d77550c | 50 | 'skip': 'Available in China only', |
5228b756 YCH |
51 | }, { |
52 | 'url': 'http://v.youku.com/v_show/id_XMTI1OTczNDM5Mg==.html', | |
53 | 'info_dict': { | |
54 | 'id': 'XMTI1OTczNDM5Mg', | |
55 | 'title': '花千骨 04', | |
56 | }, | |
57 | 'playlist_count': 13, | |
33eae08f P |
58 | }, { |
59 | 'url': 'http://v.youku.com/v_show/id_XNjA1NzA2Njgw.html', | |
60 | 'note': 'Video protected with password', | |
61 | 'info_dict': { | |
62 | 'id': 'XNjA1NzA2Njgw', | |
5ddc127d | 63 | 'title': '邢義田复旦讲座之想象中的胡人—从“左衽孔子”说起', |
33eae08f | 64 | }, |
cd5d7542 | 65 | 'playlist_count': 19, |
33eae08f P |
66 | 'params': { |
67 | 'videopassword': '100600', | |
68 | }, | |
eb01e97e YCH |
69 | }, { |
70 | # /play/get.json contains streams with "channel_type":"tail" | |
71 | 'url': 'http://v.youku.com/v_show/id_XOTUxMzg4NDMy.html', | |
72 | 'info_dict': { | |
73 | 'id': 'XOTUxMzg4NDMy', | |
74 | 'title': '我的世界☆明月庄主☆车震猎杀☆杀人艺术Minecraft', | |
75 | }, | |
76 | 'playlist_count': 6, | |
ee697992 | 77 | }] |
67f51b3d | 78 | |
7e37c394 | 79 | def construct_video_urls(self, data): |
f9355dc9 P |
80 | # get sid, token |
81 | def yk_t(s1, s2): | |
82 | ls = list(range(256)) | |
83 | t = 0 | |
84 | for i in range(256): | |
c203be3f | 85 | t = (t + ls[i] + compat_ord(s1[i % len(s1)])) % 256 |
f9355dc9 | 86 | ls[i], ls[t] = ls[t], ls[i] |
c203be3f | 87 | s = bytearray() |
ca452466 | 88 | x, y = 0, 0 |
f9355dc9 P |
89 | for i in range(len(s2)): |
90 | y = (y + 1) % 256 | |
91 | x = (x + ls[y]) % 256 | |
92 | ls[x], ls[y] = ls[y], ls[x] | |
c203be3f YCH |
93 | s.append(compat_ord(s2[i]) ^ ls[(ls[x] + ls[y]) % 256]) |
94 | return bytes(s) | |
f9355dc9 P |
95 | |
96 | sid, token = yk_t( | |
7e37c394 | 97 | b'becaf9be', base64.b64decode(data['security']['encrypt_string'].encode('ascii')) |
c203be3f | 98 | ).decode('ascii').split('_') |
f9355dc9 P |
99 | |
100 | # get oip | |
7e37c394 | 101 | oip = data['security']['ip'] |
f9355dc9 | 102 | |
f9355dc9 | 103 | fileid_dict = {} |
7e37c394 | 104 | for stream in data['stream']: |
eb01e97e YCH |
105 | if stream.get('channel_type') == 'tail': |
106 | continue | |
fdf01663 | 107 | format = stream.get('stream_type') |
fdf01663 C |
108 | fileid = stream['stream_fileid'] |
109 | fileid_dict[format] = fileid | |
f9355dc9 P |
110 | |
111 | def get_fileid(format, n): | |
5333842a C |
112 | number = hex(int(str(n), 10))[2:].upper() |
113 | if len(number) == 1: | |
114 | number = '0' + number | |
115 | streamfileids = fileid_dict[format] | |
116 | fileid = streamfileids[0:8] + number + streamfileids[10:] | |
f9355dc9 P |
117 | return fileid |
118 | ||
119 | # get ep | |
120 | def generate_ep(format, n): | |
121 | fileid = get_fileid(format, n) | |
122 | ep_t = yk_t( | |
c203be3f YCH |
123 | b'bf7e5f01', |
124 | ('%s_%s_%s' % (sid, fileid, token)).encode('ascii') | |
ca452466 | 125 | ) |
c203be3f | 126 | ep = base64.b64encode(ep_t).decode('ascii') |
f9355dc9 P |
127 | return ep |
128 | ||
129 | # generate video_urls | |
130 | video_urls_dict = {} | |
7e37c394 | 131 | for stream in data['stream']: |
eb01e97e YCH |
132 | if stream.get('channel_type') == 'tail': |
133 | continue | |
fdf01663 | 134 | format = stream.get('stream_type') |
f9355dc9 | 135 | video_urls = [] |
fdf01663 | 136 | for dt in stream['segs']: |
98c3806b | 137 | n = str(stream['segs'].index(dt)) |
1498940b | 138 | param = { |
fdf01663 | 139 | 'K': dt['key'], |
1498940b P |
140 | 'hd': self.get_hd(format), |
141 | 'myp': 0, | |
1498940b P |
142 | 'ypp': 0, |
143 | 'ctype': 12, | |
144 | 'ev': 1, | |
145 | 'token': token, | |
146 | 'oip': oip, | |
147 | 'ep': generate_ep(format, n) | |
148 | } | |
f9355dc9 P |
149 | video_url = \ |
150 | 'http://k.youku.com/player/getFlvPath/' + \ | |
151 | 'sid/' + sid + \ | |
f133fd32 | 152 | '_00' + \ |
f9355dc9 | 153 | '/st/' + self.parse_ext_l(format) + \ |
aed473cc | 154 | '/fileid/' + get_fileid(format, n) + '?' + \ |
15707c7e | 155 | compat_urllib_parse_urlencode(param) |
f9355dc9 P |
156 | video_urls.append(video_url) |
157 | video_urls_dict[format] = video_urls | |
158 | ||
159 | return video_urls_dict | |
160 | ||
034caf70 YCH |
161 | @staticmethod |
162 | def get_ysuid(): | |
163 | return '%d%s' % (int(time.time()), ''.join([ | |
164 | random.choice(string.ascii_letters) for i in range(3)])) | |
165 | ||
f9355dc9 P |
166 | def get_hd(self, fm): |
167 | hd_id_dict = { | |
aed473cc | 168 | '3gp': '0', |
fdf01663 | 169 | '3gphd': '1', |
dbb7d7e2 | 170 | 'flv': '0', |
8696a7fd | 171 | 'flvhd': '0', |
dbb7d7e2 | 172 | 'mp4': '1', |
8696a7fd | 173 | 'mp4hd': '1', |
dbb7d7e2 | 174 | 'mp4hd2': '1', |
deb1e8d2 | 175 | 'mp4hd3': '1', |
dbb7d7e2 YCH |
176 | 'hd2': '2', |
177 | 'hd3': '3', | |
f9355dc9 P |
178 | } |
179 | return hd_id_dict[fm] | |
180 | ||
181 | def parse_ext_l(self, fm): | |
182 | ext_dict = { | |
dbb7d7e2 YCH |
183 | '3gp': 'flv', |
184 | '3gphd': 'mp4', | |
aed473cc | 185 | 'flv': 'flv', |
dbb7d7e2 | 186 | 'flvhd': 'flv', |
aed473cc | 187 | 'mp4': 'mp4', |
98c3806b | 188 | 'mp4hd': 'mp4', |
8696a7fd C |
189 | 'mp4hd2': 'flv', |
190 | 'mp4hd3': 'flv', | |
aed473cc YCH |
191 | 'hd2': 'flv', |
192 | 'hd3': 'flv', | |
f9355dc9 P |
193 | } |
194 | return ext_dict[fm] | |
9c286cfa | 195 | |
08f7db20 P |
196 | def get_format_name(self, fm): |
197 | _dict = { | |
aed473cc YCH |
198 | '3gp': 'h6', |
199 | '3gphd': 'h5', | |
200 | 'flv': 'h4', | |
dbb7d7e2 | 201 | 'flvhd': 'h4', |
aed473cc | 202 | 'mp4': 'h3', |
8696a7fd | 203 | 'mp4hd': 'h3', |
dbb7d7e2 | 204 | 'mp4hd2': 'h4', |
8696a7fd | 205 | 'mp4hd3': 'h4', |
dbb7d7e2 YCH |
206 | 'hd2': 'h2', |
207 | 'hd3': 'h1', | |
08f7db20 P |
208 | } |
209 | return _dict[fm] | |
210 | ||
9c286cfa | 211 | def _real_extract(self, url): |
9383e66f | 212 | video_id = self._match_id(url) |
9c286cfa | 213 | |
034caf70 YCH |
214 | self._set_cookie('youku.com', '__ysuid', self.get_ysuid()) |
215 | ||
5228b756 | 216 | def retrieve_data(req_url, note): |
51094b1b | 217 | headers = { |
f133fd32 YCH |
218 | 'Referer': req_url, |
219 | } | |
38cce791 | 220 | headers.update(self.geo_verification_headers()) |
f133fd32 | 221 | self._set_cookie('youku.com', 'xreferrer', 'http://www.youku.com') |
9c286cfa | 222 | |
38cce791 | 223 | raw_data = self._download_json(req_url, video_id, note=note, headers=headers) |
51094b1b | 224 | |
fdf01663 | 225 | return raw_data['data'] |
51094b1b | 226 | |
d800609c | 227 | video_password = self._downloader.params.get('videopassword') |
33eae08f | 228 | |
5228b756 | 229 | # request basic data |
611c1dd9 | 230 | basic_data_url = 'http://play.youku.com/play/get.json?vid=%s&ct=12' % video_id |
33eae08f | 231 | if video_password: |
14c17caf | 232 | basic_data_url += '&pwd=%s' % video_password |
cb3d2eb9 | 233 | |
ade23409 | 234 | data = retrieve_data(basic_data_url, 'Downloading JSON metadata') |
8a32b82e | 235 | |
7e37c394 | 236 | error = data.get('error') |
14c17caf C |
237 | if error: |
238 | error_note = error.get('note') | |
239 | if error_note is not None and '因版权原因无法观看此视频' in error_note: | |
04e75966 YCH |
240 | raise ExtractorError( |
241 | 'Youku said: Sorry, this video is available in China only', expected=True) | |
cc799437 S |
242 | elif error_note and '该视频被设为私密' in error_note: |
243 | raise ExtractorError( | |
244 | 'Youku said: Sorry, this video is private', expected=True) | |
04e75966 | 245 | else: |
14c17caf | 246 | msg = 'Youku server reported error %i' % error.get('code') |
35e22b6b | 247 | if error_note is not None: |
14c17caf | 248 | msg += ': ' + error_note |
04e75966 | 249 | raise ExtractorError(msg) |
f9355dc9 | 250 | |
f133fd32 | 251 | # get video title |
7e37c394 | 252 | title = data['video']['title'] |
f9355dc9 P |
253 | |
254 | # generate video_urls_dict | |
7e37c394 | 255 | video_urls_dict = self.construct_video_urls(data) |
f9355dc9 P |
256 | |
257 | # construct info | |
f3aecb27 JMF |
258 | entries = [{ |
259 | 'id': '%s_part%d' % (video_id, i + 1), | |
260 | 'title': title, | |
261 | 'formats': [], | |
262 | # some formats are not available for all parts, we have to detect | |
263 | # which one has all | |
7e37c394 C |
264 | } for i in range(max(len(v.get('segs')) for v in data['stream']))] |
265 | for stream in data['stream']: | |
eb01e97e YCH |
266 | if stream.get('channel_type') == 'tail': |
267 | continue | |
fdf01663 | 268 | fm = stream.get('stream_type') |
f9355dc9 | 269 | video_urls = video_urls_dict[fm] |
fdf01663 | 270 | for video_url, seg, entry in zip(video_urls, stream['segs'], entries): |
f3aecb27 JMF |
271 | entry['formats'].append({ |
272 | 'url': video_url, | |
a155b7e7 YCH |
273 | 'format_id': self.get_format_name(fm), |
274 | 'ext': self.parse_ext_l(fm), | |
f3aecb27 | 275 | 'filesize': int(seg['size']), |
9ed6d8c6 YCH |
276 | 'width': stream.get('width'), |
277 | 'height': stream.get('height'), | |
a155b7e7 | 278 | }) |
f9355dc9 | 279 | |
f1e66cb2 YCH |
280 | return { |
281 | '_type': 'multi_video', | |
282 | 'id': video_id, | |
283 | 'title': title, | |
284 | 'entries': entries, | |
285 | } | |
411c590a YCH |
286 | |
287 | ||
288 | class YoukuShowIE(InfoExtractor): | |
289 | _VALID_URL = r'https?://(?:www\.)?youku\.com/show_page/id_(?P<id>[0-9a-z]+)\.html' | |
290 | IE_NAME = 'youku:show' | |
291 | ||
292 | _TEST = { | |
293 | 'url': 'http://www.youku.com/show_page/id_zc7c670be07ff11e48b3f.html', | |
294 | 'info_dict': { | |
295 | 'id': 'zc7c670be07ff11e48b3f', | |
296 | 'title': '花千骨 未删减版', | |
297 | 'description': 'md5:578d4f2145ae3f9128d9d4d863312910', | |
298 | }, | |
299 | 'playlist_count': 50, | |
300 | } | |
301 | ||
302 | _PAGE_SIZE = 40 | |
303 | ||
304 | def _find_videos_in_page(self, webpage): | |
305 | videos = re.findall( | |
306 | r'<li><a[^>]+href="(?P<url>https?://v\.youku\.com/[^"]+)"[^>]+title="(?P<title>[^"]+)"', webpage) | |
307 | return [ | |
308 | self.url_result(video_url, YoukuIE.ie_key(), title) | |
309 | for video_url, title in videos] | |
310 | ||
311 | def _real_extract(self, url): | |
312 | show_id = self._match_id(url) | |
313 | webpage = self._download_webpage(url, show_id) | |
314 | ||
315 | entries = self._find_videos_in_page(webpage) | |
316 | ||
317 | playlist_title = self._html_search_regex( | |
318 | r'<span[^>]+class="name">([^<]+)</span>', webpage, 'playlist title', fatal=False) | |
319 | detail_div = get_element_by_attribute('class', 'detail', webpage) or '' | |
320 | playlist_description = self._html_search_regex( | |
321 | r'<span[^>]+style="display:none"[^>]*>([^<]+)</span>', | |
322 | detail_div, 'playlist description', fatal=False) | |
323 | ||
324 | for idx in itertools.count(1): | |
325 | episodes_page = self._download_webpage( | |
326 | 'http://www.youku.com/show_episode/id_%s.html' % show_id, | |
327 | show_id, query={'divid': 'reload_%d' % (idx * self._PAGE_SIZE + 1)}, | |
328 | note='Downloading episodes page %d' % idx) | |
329 | new_entries = self._find_videos_in_page(episodes_page) | |
330 | entries.extend(new_entries) | |
331 | if len(new_entries) < self._PAGE_SIZE: | |
332 | break | |
333 | ||
334 | return self.playlist_result(entries, show_id, playlist_title, playlist_description) |