]> jfr.im git - yt-dlp.git/blob - youtube_dl/extractor/youku.py
[youku] Cleanup and PEP8
[yt-dlp.git] / youtube_dl / extractor / youku.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import base64
5
6 from .common import InfoExtractor
7 from ..compat import (
8 compat_urllib_parse,
9 compat_ord,
10 )
11 from ..utils import (
12 ExtractorError,
13 sanitized_Request,
14 )
15
16
17 class YoukuIE(InfoExtractor):
18 IE_NAME = 'youku'
19 IE_DESC = '优酷'
20 _VALID_URL = r'''(?x)
21 (?:
22 http://(?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)|
23 youku:)
24 (?P<id>[A-Za-z0-9]+)(?:\.html|/v\.swf|)
25 '''
26
27 _TESTS = [{
28 'url': 'http://v.youku.com/v_show/id_XMTc1ODE5Njcy.html',
29 'md5': '5f3af4192eabacc4501508d54a8cabd7',
30 'info_dict': {
31 'id': 'XMTc1ODE5Njcy_part1',
32 'title': '★Smile﹗♡ Git Fresh -Booty Music舞蹈.',
33 'ext': 'flv'
34 }
35 }, {
36 'url': 'http://player.youku.com/player.php/sid/XNDgyMDQ2NTQw/v.swf',
37 'only_matching': True,
38 }, {
39 'url': 'http://v.youku.com/v_show/id_XODgxNjg1Mzk2_ev_1.html',
40 'info_dict': {
41 'id': 'XODgxNjg1Mzk2',
42 'title': '武媚娘传奇 85',
43 },
44 'playlist_count': 11,
45 }, {
46 'url': 'http://v.youku.com/v_show/id_XMTI1OTczNDM5Mg==.html',
47 'info_dict': {
48 'id': 'XMTI1OTczNDM5Mg',
49 'title': '花千骨 04',
50 },
51 'playlist_count': 13,
52 'skip': 'Available in China only',
53 }, {
54 'url': 'http://v.youku.com/v_show/id_XNjA1NzA2Njgw.html',
55 'note': 'Video protected with password',
56 'info_dict': {
57 'id': 'XNjA1NzA2Njgw',
58 'title': '邢義田复旦讲座之想象中的胡人—从“左衽孔子”说起',
59 },
60 'playlist_count': 19,
61 'params': {
62 'videopassword': '100600',
63 },
64 }]
65
66 def construct_video_urls(self, data):
67 # get sid, token
68 def yk_t(s1, s2):
69 ls = list(range(256))
70 t = 0
71 for i in range(256):
72 t = (t + ls[i] + compat_ord(s1[i % len(s1)])) % 256
73 ls[i], ls[t] = ls[t], ls[i]
74 s = bytearray()
75 x, y = 0, 0
76 for i in range(len(s2)):
77 y = (y + 1) % 256
78 x = (x + ls[y]) % 256
79 ls[x], ls[y] = ls[y], ls[x]
80 s.append(compat_ord(s2[i]) ^ ls[(ls[x] + ls[y]) % 256])
81 return bytes(s)
82
83 sid, token = yk_t(
84 b'becaf9be', base64.b64decode(data['security']['encrypt_string'].encode('ascii'))
85 ).decode('ascii').split('_')
86
87 # get oip
88 oip = data['security']['ip']
89
90 fileid_dict = {}
91 for stream in data['stream']:
92 format = stream.get('stream_type')
93 fileid = stream['stream_fileid']
94 fileid_dict[format] = fileid
95
96 def get_fileid(format, n):
97 number = hex(int(str(n), 10))[2:].upper()
98 if len(number) == 1:
99 number = '0' + number
100 streamfileids = fileid_dict[format]
101 fileid = streamfileids[0:8] + number + streamfileids[10:]
102 return fileid
103
104 # get ep
105 def generate_ep(format, n):
106 fileid = get_fileid(format, n)
107 ep_t = yk_t(
108 b'bf7e5f01',
109 ('%s_%s_%s' % (sid, fileid, token)).encode('ascii')
110 )
111 ep = base64.b64encode(ep_t).decode('ascii')
112 return ep
113
114 # generate video_urls
115 video_urls_dict = {}
116 for stream in data['stream']:
117 format = stream.get('stream_type')
118 video_urls = []
119 for dt in stream['segs']:
120 n = str(stream['segs'].index(dt))
121 param = {
122 'K': dt['key'],
123 'hd': self.get_hd(format),
124 'myp': 0,
125 'ypp': 0,
126 'ctype': 12,
127 'ev': 1,
128 'token': token,
129 'oip': oip,
130 'ep': generate_ep(format, n)
131 }
132 video_url = \
133 'http://k.youku.com/player/getFlvPath/' + \
134 'sid/' + sid + \
135 '_00' + \
136 '/st/' + self.parse_ext_l(format) + \
137 '/fileid/' + get_fileid(format, n) + '?' + \
138 compat_urllib_parse.urlencode(param)
139 video_urls.append(video_url)
140 video_urls_dict[format] = video_urls
141
142 return video_urls_dict
143
144 def get_hd(self, fm):
145 hd_id_dict = {
146 'flv': '0',
147 'mp4': '1',
148 'hd2': '2',
149 'hd3': '3',
150 '3gp': '0',
151 '3gphd': '1',
152 'flvhd': '0',
153 'mp4hd': '1',
154 'mp4hd2': '1'
155 }
156 return hd_id_dict[fm]
157
158 def parse_ext_l(self, fm):
159 ext_dict = {
160 'flv': 'flv',
161 'mp4': 'mp4',
162 'mp4hd': 'mp4',
163 'mp4hd2': 'flv',
164 'mp4hd3': 'flv',
165 'hd2': 'flv',
166 'hd3': 'flv',
167 '3gp': 'flv',
168 '3gphd': 'mp4',
169 'flvhd': 'flv'
170 }
171 return ext_dict[fm]
172
173 def get_format_name(self, fm):
174 _dict = {
175 '3gp': 'h6',
176 '3gphd': 'h5',
177 'flvhd': 'h4',
178 'flv': 'h4',
179 'mp4': 'h3',
180 'hd2': 'h2',
181 'hd3': 'h1',
182 'mp4hd': 'h3',
183 'mp4hd3': 'h4',
184 'mp4hd2': 'h4'
185 }
186 return _dict[fm]
187
188 def _real_extract(self, url):
189 video_id = self._match_id(url)
190
191 def retrieve_data(req_url, note):
192 headers = {
193 'Referer': req_url,
194 }
195 self._set_cookie('youku.com', 'xreferrer', 'http://www.youku.com')
196 req = sanitized_Request(req_url, headers=headers)
197
198 cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
199 if cn_verification_proxy:
200 req.add_header('Ytdl-request-proxy', cn_verification_proxy)
201
202 raw_data = self._download_json(req, video_id, note=note)
203
204 return raw_data['data']
205
206 video_password = self._downloader.params.get('videopassword', None)
207
208 # request basic data
209 basic_data_url = "http://play.youku.com/play/get.json?vid=%s&ct=12" % video_id
210 if video_password:
211 basic_data_url += '&pwd=%s' % video_password
212
213 data = retrieve_data(
214 basic_data_url,
215 'Downloading JSON metadata 1')
216
217 error = data.get('error')
218 if error:
219 error_note = error.get('note')
220 if error_note is not None and '因版权原因无法观看此视频' in error_note:
221 raise ExtractorError(
222 'Youku said: Sorry, this video is available in China only', expected=True)
223 else:
224 msg = 'Youku server reported error %i' % error.get('code')
225 if error is not None:
226 msg += ': ' + error_note
227 raise ExtractorError(msg)
228
229 # get video title
230 title = data['video']['title']
231
232 # generate video_urls_dict
233 video_urls_dict = self.construct_video_urls(data)
234
235 # construct info
236 entries = [{
237 'id': '%s_part%d' % (video_id, i + 1),
238 'title': title,
239 'formats': [],
240 # some formats are not available for all parts, we have to detect
241 # which one has all
242 } for i in range(max(len(v.get('segs')) for v in data['stream']))]
243 for stream in data['stream']:
244 fm = stream.get('stream_type')
245 video_urls = video_urls_dict[fm]
246 for video_url, seg, entry in zip(video_urls, stream['segs'], entries):
247 entry['formats'].append({
248 'url': video_url,
249 'format_id': self.get_format_name(fm),
250 'ext': self.parse_ext_l(fm),
251 'filesize': int(seg['size']),
252 })
253
254 return {
255 '_type': 'multi_video',
256 'id': video_id,
257 'title': title,
258 'entries': entries,
259 }