]>
Commit | Line | Data |
---|---|---|
ddbd9035 PH |
1 | # coding: utf-8 |
2 | ||
9c286cfa PH |
3 | import json |
4 | import math | |
5 | import random | |
6 | import re | |
7 | import time | |
8 | ||
9 | from .common import InfoExtractor | |
10 | from ..utils import ( | |
11 | ExtractorError, | |
12 | ) | |
13 | ||
14 | ||
15 | class YoukuIE(InfoExtractor): | |
9378ae6e | 16 | _VALID_URL = r'(?:(?:http://)?(?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)|youku:)(?P<ID>[A-Za-z0-9]+)(?:\.html|/v\.swf|)' |
67f51b3d PH |
17 | _TEST = { |
18 | u"url": u"http://v.youku.com/v_show/id_XNDgyMDQ2NTQw.html", | |
19 | u"file": u"XNDgyMDQ2NTQw_part00.flv", | |
20 | u"md5": u"ffe3f2e435663dc2d1eea34faeff5b5b", | |
a56f9de1 | 21 | u"params": {u"test": False}, |
67f51b3d PH |
22 | u"info_dict": { |
23 | u"title": u"youtube-dl test video \"'/\\ä↭𝕐" | |
24 | } | |
25 | } | |
26 | ||
9c286cfa PH |
27 | |
28 | def _gen_sid(self): | |
29 | nowTime = int(time.time() * 1000) | |
30 | random1 = random.randint(1000,1998) | |
31 | random2 = random.randint(1000,9999) | |
32 | ||
33 | return "%d%d%d" %(nowTime,random1,random2) | |
34 | ||
35 | def _get_file_ID_mix_string(self, seed): | |
36 | mixed = [] | |
37 | source = list("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890") | |
38 | seed = float(seed) | |
39 | for i in range(len(source)): | |
a56f9de1 JMF |
40 | seed = (seed * 211 + 30031) % 65536 |
41 | index = math.floor(seed / 65536 * len(source)) | |
9c286cfa PH |
42 | mixed.append(source[int(index)]) |
43 | source.remove(source[int(index)]) | |
44 | #return ''.join(mixed) | |
45 | return mixed | |
46 | ||
47 | def _get_file_id(self, fileId, seed): | |
48 | mixed = self._get_file_ID_mix_string(seed) | |
49 | ids = fileId.split('*') | |
50 | realId = [] | |
51 | for ch in ids: | |
52 | if ch: | |
53 | realId.append(mixed[int(ch)]) | |
54 | return ''.join(realId) | |
55 | ||
56 | def _real_extract(self, url): | |
57 | mobj = re.match(self._VALID_URL, url) | |
58 | if mobj is None: | |
59 | raise ExtractorError(u'Invalid URL: %s' % url) | |
60 | video_id = mobj.group('ID') | |
61 | ||
62 | info_url = 'http://v.youku.com/player/getPlayList/VideoIDS/' + video_id | |
63 | ||
64 | jsondata = self._download_webpage(info_url, video_id) | |
65 | ||
66 | self.report_extraction(video_id) | |
67 | try: | |
68 | config = json.loads(jsondata) | |
4a2080e4 | 69 | error_code = config['data'][0].get('error_code') |
70 | if error_code: | |
71 | # -8 means blocked outside China. | |
72 | error = config['data'][0].get('error') # Chinese and English, separated by newline. | |
73 | raise ExtractorError(error or u'Server reported error %i' % error_code, | |
74 | expected=True) | |
9c286cfa PH |
75 | |
76 | video_title = config['data'][0]['title'] | |
77 | seed = config['data'][0]['seed'] | |
78 | ||
79 | format = self._downloader.params.get('format', None) | |
80 | supported_format = list(config['data'][0]['streamfileids'].keys()) | |
81 | ||
82 | if format is None or format == 'best': | |
83 | if 'hd2' in supported_format: | |
84 | format = 'hd2' | |
85 | else: | |
86 | format = 'flv' | |
87 | ext = u'flv' | |
88 | elif format == 'worst': | |
89 | format = 'mp4' | |
90 | ext = u'mp4' | |
91 | else: | |
92 | format = 'flv' | |
93 | ext = u'flv' | |
94 | ||
95 | ||
96 | fileid = config['data'][0]['streamfileids'][format] | |
97 | keys = [s['k'] for s in config['data'][0]['segs'][format]] | |
4a2080e4 | 98 | # segs is usually a dictionary, but an empty *list* if an error occured. |
9c286cfa PH |
99 | except (UnicodeDecodeError, ValueError, KeyError): |
100 | raise ExtractorError(u'Unable to extract info section') | |
101 | ||
102 | files_info=[] | |
103 | sid = self._gen_sid() | |
104 | fileid = self._get_file_id(fileid, seed) | |
105 | ||
106 | #column 8,9 of fileid represent the segment number | |
107 | #fileid[7:9] should be changed | |
108 | for index, key in enumerate(keys): | |
109 | ||
110 | temp_fileid = '%s%02X%s' % (fileid[0:8], index, fileid[10:]) | |
111 | download_url = 'http://f.youku.com/player/getFlvPath/sid/%s_%02X/st/flv/fileid/%s?k=%s' % (sid, index, temp_fileid, key) | |
112 | ||
113 | info = { | |
114 | 'id': '%s_part%02d' % (video_id, index), | |
115 | 'url': download_url, | |
116 | 'uploader': None, | |
117 | 'upload_date': None, | |
118 | 'title': video_title, | |
119 | 'ext': ext, | |
120 | } | |
121 | files_info.append(info) | |
122 | ||
123 | return files_info |