]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/youku.py
Merge pull request #8061 from dstftw/introduce-chapter-and-series-fields
[yt-dlp.git] / youtube_dl / extractor / youku.py
CommitLineData
ddbd9035 1# coding: utf-8
8a32b82e
PH
2from __future__ import unicode_literals
3
f9355dc9 4import base64
034caf70
YCH
5import random
6import string
7import time
9c286cfa
PH
8
9from .common import InfoExtractor
c203be3f
YCH
10from ..compat import (
11 compat_urllib_parse,
12 compat_ord,
5c2266df
S
13)
14from ..utils import (
15 ExtractorError,
16 sanitized_Request,
c203be3f 17)
1498940b 18
aed473cc 19
9c286cfa 20class YoukuIE(InfoExtractor):
f9355dc9 21 IE_NAME = 'youku'
246995db 22 IE_DESC = '优酷'
8a32b82e
PH
23 _VALID_URL = r'''(?x)
24 (?:
25 http://(?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)|
26 youku:)
27 (?P<id>[A-Za-z0-9]+)(?:\.html|/v\.swf|)
28 '''
f9355dc9 29
ee697992 30 _TESTS = [{
c683454e 31 # MD5 is unstable
aed473cc 32 'url': 'http://v.youku.com/v_show/id_XMTc1ODE5Njcy.html',
aed473cc 33 'info_dict': {
f1e66cb2 34 'id': 'XMTc1ODE5Njcy_part1',
aed473cc
YCH
35 'title': '★Smile﹗♡ Git Fresh -Booty Music舞蹈.',
36 'ext': 'flv'
37 }
ee697992
YCH
38 }, {
39 'url': 'http://player.youku.com/player.php/sid/XNDgyMDQ2NTQw/v.swf',
40 'only_matching': True,
f1e66cb2
YCH
41 }, {
42 'url': 'http://v.youku.com/v_show/id_XODgxNjg1Mzk2_ev_1.html',
43 'info_dict': {
44 'id': 'XODgxNjg1Mzk2',
45 'title': '武媚娘传奇 85',
46 },
47 'playlist_count': 11,
4d77550c 48 'skip': 'Available in China only',
5228b756
YCH
49 }, {
50 'url': 'http://v.youku.com/v_show/id_XMTI1OTczNDM5Mg==.html',
51 'info_dict': {
52 'id': 'XMTI1OTczNDM5Mg',
53 'title': '花千骨 04',
54 },
55 'playlist_count': 13,
33eae08f
P
56 }, {
57 'url': 'http://v.youku.com/v_show/id_XNjA1NzA2Njgw.html',
58 'note': 'Video protected with password',
59 'info_dict': {
60 'id': 'XNjA1NzA2Njgw',
5ddc127d 61 'title': '邢義田复旦讲座之想象中的胡人—从“左衽孔子”说起',
33eae08f 62 },
cd5d7542 63 'playlist_count': 19,
33eae08f
P
64 'params': {
65 'videopassword': '100600',
66 },
ee697992 67 }]
67f51b3d 68
7e37c394 69 def construct_video_urls(self, data):
f9355dc9
P
70 # get sid, token
71 def yk_t(s1, s2):
72 ls = list(range(256))
73 t = 0
74 for i in range(256):
c203be3f 75 t = (t + ls[i] + compat_ord(s1[i % len(s1)])) % 256
f9355dc9 76 ls[i], ls[t] = ls[t], ls[i]
c203be3f 77 s = bytearray()
ca452466 78 x, y = 0, 0
f9355dc9
P
79 for i in range(len(s2)):
80 y = (y + 1) % 256
81 x = (x + ls[y]) % 256
82 ls[x], ls[y] = ls[y], ls[x]
c203be3f
YCH
83 s.append(compat_ord(s2[i]) ^ ls[(ls[x] + ls[y]) % 256])
84 return bytes(s)
f9355dc9
P
85
86 sid, token = yk_t(
7e37c394 87 b'becaf9be', base64.b64decode(data['security']['encrypt_string'].encode('ascii'))
c203be3f 88 ).decode('ascii').split('_')
f9355dc9
P
89
90 # get oip
7e37c394 91 oip = data['security']['ip']
f9355dc9 92
f9355dc9 93 fileid_dict = {}
7e37c394 94 for stream in data['stream']:
fdf01663 95 format = stream.get('stream_type')
fdf01663
C
96 fileid = stream['stream_fileid']
97 fileid_dict[format] = fileid
f9355dc9
P
98
99 def get_fileid(format, n):
5333842a
C
100 number = hex(int(str(n), 10))[2:].upper()
101 if len(number) == 1:
102 number = '0' + number
103 streamfileids = fileid_dict[format]
104 fileid = streamfileids[0:8] + number + streamfileids[10:]
f9355dc9
P
105 return fileid
106
107 # get ep
108 def generate_ep(format, n):
109 fileid = get_fileid(format, n)
110 ep_t = yk_t(
c203be3f
YCH
111 b'bf7e5f01',
112 ('%s_%s_%s' % (sid, fileid, token)).encode('ascii')
ca452466 113 )
c203be3f 114 ep = base64.b64encode(ep_t).decode('ascii')
f9355dc9
P
115 return ep
116
117 # generate video_urls
118 video_urls_dict = {}
7e37c394 119 for stream in data['stream']:
fdf01663 120 format = stream.get('stream_type')
f9355dc9 121 video_urls = []
fdf01663 122 for dt in stream['segs']:
98c3806b 123 n = str(stream['segs'].index(dt))
1498940b 124 param = {
fdf01663 125 'K': dt['key'],
1498940b
P
126 'hd': self.get_hd(format),
127 'myp': 0,
1498940b
P
128 'ypp': 0,
129 'ctype': 12,
130 'ev': 1,
131 'token': token,
132 'oip': oip,
133 'ep': generate_ep(format, n)
134 }
f9355dc9
P
135 video_url = \
136 'http://k.youku.com/player/getFlvPath/' + \
137 'sid/' + sid + \
f133fd32 138 '_00' + \
f9355dc9 139 '/st/' + self.parse_ext_l(format) + \
aed473cc 140 '/fileid/' + get_fileid(format, n) + '?' + \
1498940b 141 compat_urllib_parse.urlencode(param)
f9355dc9
P
142 video_urls.append(video_url)
143 video_urls_dict[format] = video_urls
144
145 return video_urls_dict
146
034caf70
YCH
147 @staticmethod
148 def get_ysuid():
149 return '%d%s' % (int(time.time()), ''.join([
150 random.choice(string.ascii_letters) for i in range(3)]))
151
f9355dc9
P
152 def get_hd(self, fm):
153 hd_id_dict = {
aed473cc 154 '3gp': '0',
fdf01663 155 '3gphd': '1',
dbb7d7e2 156 'flv': '0',
8696a7fd 157 'flvhd': '0',
dbb7d7e2 158 'mp4': '1',
8696a7fd 159 'mp4hd': '1',
dbb7d7e2 160 'mp4hd2': '1',
deb1e8d2 161 'mp4hd3': '1',
dbb7d7e2
YCH
162 'hd2': '2',
163 'hd3': '3',
f9355dc9
P
164 }
165 return hd_id_dict[fm]
166
167 def parse_ext_l(self, fm):
168 ext_dict = {
dbb7d7e2
YCH
169 '3gp': 'flv',
170 '3gphd': 'mp4',
aed473cc 171 'flv': 'flv',
dbb7d7e2 172 'flvhd': 'flv',
aed473cc 173 'mp4': 'mp4',
98c3806b 174 'mp4hd': 'mp4',
8696a7fd
C
175 'mp4hd2': 'flv',
176 'mp4hd3': 'flv',
aed473cc
YCH
177 'hd2': 'flv',
178 'hd3': 'flv',
f9355dc9
P
179 }
180 return ext_dict[fm]
9c286cfa 181
08f7db20
P
182 def get_format_name(self, fm):
183 _dict = {
aed473cc
YCH
184 '3gp': 'h6',
185 '3gphd': 'h5',
186 'flv': 'h4',
dbb7d7e2 187 'flvhd': 'h4',
aed473cc 188 'mp4': 'h3',
8696a7fd 189 'mp4hd': 'h3',
dbb7d7e2 190 'mp4hd2': 'h4',
8696a7fd 191 'mp4hd3': 'h4',
dbb7d7e2
YCH
192 'hd2': 'h2',
193 'hd3': 'h1',
08f7db20
P
194 }
195 return _dict[fm]
196
9c286cfa 197 def _real_extract(self, url):
9383e66f 198 video_id = self._match_id(url)
9c286cfa 199
034caf70
YCH
200 self._set_cookie('youku.com', '__ysuid', self.get_ysuid())
201
5228b756 202 def retrieve_data(req_url, note):
51094b1b 203 headers = {
f133fd32
YCH
204 'Referer': req_url,
205 }
206 self._set_cookie('youku.com', 'xreferrer', 'http://www.youku.com')
207 req = sanitized_Request(req_url, headers=headers)
9c286cfa 208
5228b756
YCH
209 cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
210 if cn_verification_proxy:
211 req.add_header('Ytdl-request-proxy', cn_verification_proxy)
212
213 raw_data = self._download_json(req, video_id, note=note)
51094b1b 214
fdf01663 215 return raw_data['data']
51094b1b 216
33eae08f
P
217 video_password = self._downloader.params.get('videopassword', None)
218
5228b756 219 # request basic data
51094b1b 220 basic_data_url = "http://play.youku.com/play/get.json?vid=%s&ct=12" % video_id
33eae08f 221 if video_password:
14c17caf 222 basic_data_url += '&pwd=%s' % video_password
cb3d2eb9 223
ade23409 224 data = retrieve_data(basic_data_url, 'Downloading JSON metadata')
8a32b82e 225
7e37c394 226 error = data.get('error')
14c17caf
C
227 if error:
228 error_note = error.get('note')
229 if error_note is not None and '因版权原因无法观看此视频' in error_note:
04e75966
YCH
230 raise ExtractorError(
231 'Youku said: Sorry, this video is available in China only', expected=True)
232 else:
14c17caf 233 msg = 'Youku server reported error %i' % error.get('code')
35e22b6b 234 if error_note is not None:
14c17caf 235 msg += ': ' + error_note
04e75966 236 raise ExtractorError(msg)
f9355dc9 237
f133fd32 238 # get video title
7e37c394 239 title = data['video']['title']
f9355dc9
P
240
241 # generate video_urls_dict
7e37c394 242 video_urls_dict = self.construct_video_urls(data)
f9355dc9
P
243
244 # construct info
f3aecb27
JMF
245 entries = [{
246 'id': '%s_part%d' % (video_id, i + 1),
247 'title': title,
248 'formats': [],
249 # some formats are not available for all parts, we have to detect
250 # which one has all
7e37c394
C
251 } for i in range(max(len(v.get('segs')) for v in data['stream']))]
252 for stream in data['stream']:
fdf01663 253 fm = stream.get('stream_type')
f9355dc9 254 video_urls = video_urls_dict[fm]
fdf01663 255 for video_url, seg, entry in zip(video_urls, stream['segs'], entries):
f3aecb27
JMF
256 entry['formats'].append({
257 'url': video_url,
a155b7e7
YCH
258 'format_id': self.get_format_name(fm),
259 'ext': self.parse_ext_l(fm),
f3aecb27 260 'filesize': int(seg['size']),
a155b7e7 261 })
f9355dc9 262
f1e66cb2
YCH
263 return {
264 '_type': 'multi_video',
265 'id': video_id,
266 'title': title,
267 'entries': entries,
268 }