]>
Commit | Line | Data |
---|---|---|
c8434e83 | 1 | # encoding: utf-8 |
38a40276 | 2 | from __future__ import unicode_literals |
3 | ||
c8434e83 | 4 | import re, base64, zlib |
5 | from hashlib import sha1 | |
6 | from math import pow, sqrt, floor | |
7 | from .common import InfoExtractor | |
8 | from ..utils import ( | |
9 | ExtractorError, | |
10 | compat_urllib_parse, | |
11 | compat_urllib_request, | |
12 | bytes_to_intlist, | |
13 | intlist_to_bytes, | |
14 | unified_strdate, | |
15 | clean_html, | |
16 | ) | |
17 | from ..aes import ( | |
18 | aes_cbc_decrypt, | |
19 | inc, | |
20 | ) | |
21 | ||
22 | class CrunchyrollIE(InfoExtractor): | |
38a40276 | 23 | _VALID_URL = r'(?:https?://)?(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)' |
c8434e83 | 24 | _TESTS = [{ |
38a40276 | 25 | 'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513', |
26 | 'file': '645513.flv', | |
27 | #'md5': 'b1639fd6ddfaa43788c85f6d1dddd412', | |
28 | 'info_dict': { | |
29 | 'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!', | |
30 | 'description': 'md5:2d17137920c64f2f49981a7797d275ef', | |
31 | 'thumbnail': 'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg', | |
32 | 'uploader': 'Yomiuri Telecasting Corporation (YTV)', | |
33 | 'upload_date': '20131013', | |
c8434e83 | 34 | }, |
38a40276 | 35 | 'params': { |
c8434e83 | 36 | # rtmp |
38a40276 | 37 | 'skip_download': True, |
c8434e83 | 38 | }, |
39 | }] | |
40 | ||
41 | _FORMAT_IDS = { | |
38a40276 | 42 | '360': ('60', '106'), |
43 | '480': ('61', '106'), | |
44 | '720': ('62', '106'), | |
45 | '1080': ('80', '108'), | |
c8434e83 | 46 | } |
47 | ||
48 | def _decrypt_subtitles(self, data, iv, id): | |
49 | data = bytes_to_intlist(data) | |
50 | iv = bytes_to_intlist(iv) | |
51 | id = int(id) | |
52 | ||
53 | def obfuscate_key_aux(count, modulo, start): | |
54 | output = list(start) | |
55 | for _ in range(count): | |
56 | output.append(output[-1] + output[-2]) | |
57 | # cut off start values | |
58 | output = output[2:] | |
59 | output = list(map(lambda x: x % modulo + 33, output)) | |
60 | return output | |
61 | ||
62 | def obfuscate_key(key): | |
63 | num1 = int(floor(pow(2, 25) * sqrt(6.9))) | |
64 | num2 = (num1 ^ key) << 5 | |
65 | num3 = key ^ num1 | |
66 | num4 = num3 ^ (num3 >> 3) ^ num2 | |
67 | prefix = intlist_to_bytes(obfuscate_key_aux(20, 97, (1, 2))) | |
38a40276 | 68 | shaHash = bytes_to_intlist(sha1(prefix + str(num4).encode('ascii')).digest()) |
c8434e83 | 69 | # Extend 160 Bit hash to 256 Bit |
70 | return shaHash + [0] * 12 | |
71 | ||
72 | key = obfuscate_key(id) | |
73 | class Counter: | |
74 | __value = iv | |
75 | def next_value(self): | |
76 | temp = self.__value | |
77 | self.__value = inc(self.__value) | |
78 | return temp | |
79 | decrypted_data = intlist_to_bytes(aes_cbc_decrypt(data, key, iv)) | |
80 | return zlib.decompress(decrypted_data) | |
81 | ||
82 | def _convert_subtitles_to_srt(self, subtitles): | |
83 | i=1 | |
38a40276 | 84 | output = '' |
c8434e83 | 85 | for start, end, text in re.findall(r'<event [^>]*?start="([^"]+)" [^>]*?end="([^"]+)" [^>]*?text="([^"]+)"[^>]*?>', subtitles): |
38a40276 | 86 | start = start.replace('.', ',') |
87 | end = end.replace('.', ',') | |
c8434e83 | 88 | text = clean_html(text) |
38a40276 | 89 | text = text.replace('\\N', '\n') |
c8434e83 | 90 | if not text: |
91 | continue | |
38a40276 | 92 | output += '%d\n%s --> %s\n%s\n\n' % (i, start, end, text) |
c8434e83 | 93 | i+=1 |
94 | return output | |
95 | ||
96 | def _real_extract(self,url): | |
97 | mobj = re.match(self._VALID_URL, url) | |
38a40276 | 98 | video_id = mobj.group('video_id') |
99 | ||
100 | if mobj.group('prefix') == 'm': | |
101 | mobile_webpage = self._download_webpage(url, video_id, 'Downloading mobile webpage') | |
102 | webpage_url = self._search_regex(r'<link rel="canonical" href="([^"]+)" />', mobile_webpage, 'webpage_url') | |
103 | else: | |
104 | webpage_url = 'http://www.' + mobj.group('url') | |
c8434e83 | 105 | |
38a40276 | 106 | webpage = self._download_webpage(webpage_url, video_id, 'Downloading webpage') |
107 | note_m = self._html_search_regex(r'<div class="showmedia-trailer-notice">(.+?)</div>', webpage, 'trailer-notice', default='') | |
c8434e83 | 108 | if note_m: |
109 | raise ExtractorError(note_m) | |
110 | ||
38a40276 | 111 | video_title = self._html_search_regex(r'<h1[^>]*>(.+?)</h1>', webpage, 'video_title', flags=re.DOTALL) |
112 | video_title = re.sub(r' {2,}', ' ', video_title) | |
113 | video_description = self._html_search_regex(r'"description":"([^"]+)', webpage, 'video_description', default='') | |
c8434e83 | 114 | if not video_description: |
115 | video_description = None | |
38a40276 | 116 | video_upload_date = self._html_search_regex(r'<div>Availability for free users:(.+?)</div>', webpage, 'video_upload_date', fatal=False, flags=re.DOTALL) |
c8434e83 | 117 | if video_upload_date: |
118 | video_upload_date = unified_strdate(video_upload_date) | |
38a40276 | 119 | video_uploader = self._html_search_regex(r'<div>\s*Publisher:(.+?)</div>', webpage, 'video_uploader', fatal=False, flags=re.DOTALL) |
c8434e83 | 120 | |
38a40276 | 121 | playerdata_url = compat_urllib_parse.unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url')) |
c8434e83 | 122 | playerdata_req = compat_urllib_request.Request(playerdata_url) |
38a40276 | 123 | playerdata_req.data = compat_urllib_parse.urlencode({'current_page': webpage_url}) |
124 | playerdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded') | |
125 | playerdata = self._download_webpage(playerdata_req, video_id, note='Downloading media info') | |
c8434e83 | 126 | |
38a40276 | 127 | stream_id = self._search_regex(r'<media_id>([^<]+)', playerdata, 'stream_id') |
128 | video_thumbnail = self._search_regex(r'<episode_image_url>([^<]+)', playerdata, 'thumbnail', fatal=False) | |
c8434e83 | 129 | |
130 | formats = [] | |
131 | for fmt in re.findall(r'\?p([0-9]{3,4})=1', webpage): | |
132 | stream_quality, stream_format = self._FORMAT_IDS[fmt] | |
38a40276 | 133 | video_format = fmt+'p' |
134 | streamdata_req = compat_urllib_request.Request('http://www.crunchyroll.com/xml/') | |
c8434e83 | 135 | # urlencode doesn't work! |
38a40276 | 136 | streamdata_req.data = 'req=RpcApiVideoEncode%5FGetStreamInfo&video%5Fencode%5Fquality='+stream_quality+'&media%5Fid='+stream_id+'&video%5Fformat='+stream_format |
137 | streamdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded') | |
138 | streamdata_req.add_header('Content-Length', str(len(streamdata_req.data))) | |
139 | streamdata = self._download_webpage(streamdata_req, video_id, note='Downloading media info for '+video_format) | |
140 | video_url = self._search_regex(r'<host>([^<]+)', streamdata, 'video_url') | |
141 | video_play_path = self._search_regex(r'<file>([^<]+)', streamdata, 'video_play_path') | |
c8434e83 | 142 | formats.append({ |
38a40276 | 143 | 'url': video_url, |
144 | 'play_path': video_play_path, | |
145 | 'ext': 'flv', | |
146 | 'format': video_format, | |
147 | 'format_id': video_format, | |
c8434e83 | 148 | }) |
149 | ||
150 | subtitles = {} | |
151 | for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage): | |
38a40276 | 152 | sub_page = self._download_webpage('http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id='+sub_id,\ |
153 | video_id, note='Downloading subtitles for '+sub_name) | |
154 | id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False) | |
155 | iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False) | |
156 | data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False) | |
c8434e83 | 157 | if not id or not iv or not data: |
158 | continue | |
159 | id = int(id) | |
160 | iv = base64.b64decode(iv) | |
161 | data = base64.b64decode(data) | |
162 | ||
38a40276 | 163 | subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8') |
164 | lang_code = self._search_regex(r'lang_code=\'([^\']+)', subtitle, 'subtitle_lang_code', fatal=False) | |
c8434e83 | 165 | if not lang_code: |
166 | continue | |
167 | subtitles[lang_code] = self._convert_subtitles_to_srt(subtitle) | |
168 | ||
169 | return { | |
38a40276 | 170 | 'id': video_id, |
171 | 'title': video_title, | |
172 | 'description': video_description, | |
173 | 'thumbnail': video_thumbnail, | |
174 | 'uploader': video_uploader, | |
175 | 'upload_date': video_upload_date, | |
176 | 'subtitles': subtitles, | |
177 | 'formats': formats, | |
c8434e83 | 178 | } |