]>
Commit | Line | Data |
---|---|---|
c8434e83 | 1 | # encoding: utf-8 |
38a40276 | 2 | from __future__ import unicode_literals |
3 | ||
34440095 | 4 | import re |
1d430674 | 5 | import json |
34440095 S |
6 | import base64 |
7 | import zlib | |
78272a07 | 8 | import xml.etree.ElementTree |
34440095 | 9 | |
c8434e83 | 10 | from hashlib import sha1 |
11 | from math import pow, sqrt, floor | |
11b3ce85 | 12 | from .subtitles import SubtitlesInfoExtractor |
1cc79574 | 13 | from ..compat import ( |
c8434e83 | 14 | compat_urllib_parse, |
15 | compat_urllib_request, | |
1cc79574 PH |
16 | ) |
17 | from ..utils import ( | |
18 | ExtractorError, | |
c8434e83 | 19 | bytes_to_intlist, |
20 | intlist_to_bytes, | |
21 | unified_strdate, | |
723e04d0 | 22 | urlencode_postdata, |
c8434e83 | 23 | ) |
24 | from ..aes import ( | |
25 | aes_cbc_decrypt, | |
26 | inc, | |
27 | ) | |
8230018c | 28 | from .common import InfoExtractor |
c8434e83 | 29 | |
34440095 | 30 | |
11b3ce85 | 31 | class CrunchyrollIE(SubtitlesInfoExtractor): |
a8896c5a S |
32 | _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)' |
33 | _TESTS = [{ | |
38a40276 | 34 | 'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513', |
38a40276 | 35 | 'info_dict': { |
34440095 S |
36 | 'id': '645513', |
37 | 'ext': 'flv', | |
38a40276 | 38 | 'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!', |
39 | 'description': 'md5:2d17137920c64f2f49981a7797d275ef', | |
40 | 'thumbnail': 'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg', | |
41 | 'uploader': 'Yomiuri Telecasting Corporation (YTV)', | |
42 | 'upload_date': '20131013', | |
b1edd7a4 | 43 | 'url': 're:(?!.*&)', |
c8434e83 | 44 | }, |
38a40276 | 45 | 'params': { |
c8434e83 | 46 | # rtmp |
38a40276 | 47 | 'skip_download': True, |
c8434e83 | 48 | }, |
a8896c5a S |
49 | }, { |
50 | 'url': 'http://www.crunchyroll.fr/girl-friend-beta/episode-11-goodbye-la-mode-661697', | |
51 | 'only_matching': True, | |
52 | }] | |
c8434e83 | 53 | |
54 | _FORMAT_IDS = { | |
38a40276 | 55 | '360': ('60', '106'), |
56 | '480': ('61', '106'), | |
57 | '720': ('62', '106'), | |
58 | '1080': ('80', '108'), | |
c8434e83 | 59 | } |
60 | ||
723e04d0 A |
61 | def _login(self): |
62 | (username, password) = self._get_login_info() | |
63 | if username is None: | |
64 | return | |
65 | self.report_login() | |
66 | login_url = 'https://www.crunchyroll.com/?a=formhandler' | |
67 | data = urlencode_postdata({ | |
68 | 'formname': 'RpcApiUser_Login', | |
69 | 'name': username, | |
70 | 'password': password, | |
71 | }) | |
72 | login_request = compat_urllib_request.Request(login_url, data) | |
73 | login_request.add_header('Content-Type', 'application/x-www-form-urlencoded') | |
74 | self._download_webpage(login_request, None, False, 'Wrong login info') | |
75 | ||
76 | def _real_initialize(self): | |
77 | self._login() | |
78 | ||
c8434e83 | 79 | def _decrypt_subtitles(self, data, iv, id): |
80 | data = bytes_to_intlist(data) | |
81 | iv = bytes_to_intlist(iv) | |
82 | id = int(id) | |
83 | ||
84 | def obfuscate_key_aux(count, modulo, start): | |
85 | output = list(start) | |
86 | for _ in range(count): | |
87 | output.append(output[-1] + output[-2]) | |
88 | # cut off start values | |
89 | output = output[2:] | |
90 | output = list(map(lambda x: x % modulo + 33, output)) | |
91 | return output | |
92 | ||
93 | def obfuscate_key(key): | |
94 | num1 = int(floor(pow(2, 25) * sqrt(6.9))) | |
95 | num2 = (num1 ^ key) << 5 | |
96 | num3 = key ^ num1 | |
97 | num4 = num3 ^ (num3 >> 3) ^ num2 | |
98 | prefix = intlist_to_bytes(obfuscate_key_aux(20, 97, (1, 2))) | |
38a40276 | 99 | shaHash = bytes_to_intlist(sha1(prefix + str(num4).encode('ascii')).digest()) |
c8434e83 | 100 | # Extend 160 Bit hash to 256 Bit |
101 | return shaHash + [0] * 12 | |
34440095 | 102 | |
c8434e83 | 103 | key = obfuscate_key(id) |
5f6a1245 | 104 | |
c8434e83 | 105 | class Counter: |
106 | __value = iv | |
5f6a1245 | 107 | |
c8434e83 | 108 | def next_value(self): |
109 | temp = self.__value | |
110 | self.__value = inc(self.__value) | |
111 | return temp | |
112 | decrypted_data = intlist_to_bytes(aes_cbc_decrypt(data, key, iv)) | |
113 | return zlib.decompress(decrypted_data) | |
114 | ||
d65d6286 | 115 | def _convert_subtitles_to_srt(self, sub_root): |
38a40276 | 116 | output = '' |
d65d6286 JMF |
117 | |
118 | for i, event in enumerate(sub_root.findall('./events/event'), 1): | |
119 | start = event.attrib['start'].replace('.', ',') | |
120 | end = event.attrib['end'].replace('.', ',') | |
121 | text = event.attrib['text'].replace('\\N', '\n') | |
38a40276 | 122 | output += '%d\n%s --> %s\n%s\n\n' % (i, start, end, text) |
c8434e83 | 123 | return output |
124 | ||
d65d6286 | 125 | def _convert_subtitles_to_ass(self, sub_root): |
78272a07 A |
126 | output = '' |
127 | ||
128 | def ass_bool(strvalue): | |
129 | assvalue = '0' | |
130 | if strvalue == '1': | |
131 | assvalue = '-1' | |
132 | return assvalue | |
133 | ||
78272a07 A |
134 | output = '[Script Info]\n' |
135 | output += 'Title: %s\n' % sub_root.attrib["title"] | |
136 | output += 'ScriptType: v4.00+\n' | |
137 | output += 'WrapStyle: %s\n' % sub_root.attrib["wrap_style"] | |
138 | output += 'PlayResX: %s\n' % sub_root.attrib["play_res_x"] | |
139 | output += 'PlayResY: %s\n' % sub_root.attrib["play_res_y"] | |
140 | output += """ScaledBorderAndShadow: yes | |
141 | ||
142 | [V4+ Styles] | |
143 | Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding | |
144 | """ | |
145 | for style in sub_root.findall('./styles/style'): | |
146 | output += 'Style: ' + style.attrib["name"] | |
147 | output += ',' + style.attrib["font_name"] | |
148 | output += ',' + style.attrib["font_size"] | |
149 | output += ',' + style.attrib["primary_colour"] | |
150 | output += ',' + style.attrib["secondary_colour"] | |
151 | output += ',' + style.attrib["outline_colour"] | |
152 | output += ',' + style.attrib["back_colour"] | |
153 | output += ',' + ass_bool(style.attrib["bold"]) | |
154 | output += ',' + ass_bool(style.attrib["italic"]) | |
155 | output += ',' + ass_bool(style.attrib["underline"]) | |
156 | output += ',' + ass_bool(style.attrib["strikeout"]) | |
157 | output += ',' + style.attrib["scale_x"] | |
158 | output += ',' + style.attrib["scale_y"] | |
159 | output += ',' + style.attrib["spacing"] | |
160 | output += ',' + style.attrib["angle"] | |
161 | output += ',' + style.attrib["border_style"] | |
162 | output += ',' + style.attrib["outline"] | |
163 | output += ',' + style.attrib["shadow"] | |
164 | output += ',' + style.attrib["alignment"] | |
165 | output += ',' + style.attrib["margin_l"] | |
166 | output += ',' + style.attrib["margin_r"] | |
167 | output += ',' + style.attrib["margin_v"] | |
168 | output += ',' + style.attrib["encoding"] | |
169 | output += '\n' | |
170 | ||
171 | output += """ | |
172 | [Events] | |
173 | Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text | |
174 | """ | |
175 | for event in sub_root.findall('./events/event'): | |
176 | output += 'Dialogue: 0' | |
177 | output += ',' + event.attrib["start"] | |
178 | output += ',' + event.attrib["end"] | |
179 | output += ',' + event.attrib["style"] | |
180 | output += ',' + event.attrib["name"] | |
181 | output += ',' + event.attrib["margin_l"] | |
182 | output += ',' + event.attrib["margin_r"] | |
183 | output += ',' + event.attrib["margin_v"] | |
184 | output += ',' + event.attrib["effect"] | |
185 | output += ',' + event.attrib["text"] | |
186 | output += '\n' | |
187 | ||
188 | return output | |
189 | ||
5f6a1245 | 190 | def _real_extract(self, url): |
c8434e83 | 191 | mobj = re.match(self._VALID_URL, url) |
38a40276 | 192 | video_id = mobj.group('video_id') |
193 | ||
194 | if mobj.group('prefix') == 'm': | |
195 | mobile_webpage = self._download_webpage(url, video_id, 'Downloading mobile webpage') | |
196 | webpage_url = self._search_regex(r'<link rel="canonical" href="([^"]+)" />', mobile_webpage, 'webpage_url') | |
197 | else: | |
198 | webpage_url = 'http://www.' + mobj.group('url') | |
c8434e83 | 199 | |
38a40276 | 200 | webpage = self._download_webpage(webpage_url, video_id, 'Downloading webpage') |
201 | note_m = self._html_search_regex(r'<div class="showmedia-trailer-notice">(.+?)</div>', webpage, 'trailer-notice', default='') | |
c8434e83 | 202 | if note_m: |
203 | raise ExtractorError(note_m) | |
204 | ||
1d430674 S |
205 | mobj = re.search(r'Page\.messaging_box_controller\.addItems\(\[(?P<msg>{.+?})\]\)', webpage) |
206 | if mobj: | |
207 | msg = json.loads(mobj.group('msg')) | |
208 | if msg.get('type') == 'error': | |
209 | raise ExtractorError('crunchyroll returned error: %s' % msg['message_body'], expected=True) | |
210 | ||
38a40276 | 211 | video_title = self._html_search_regex(r'<h1[^>]*>(.+?)</h1>', webpage, 'video_title', flags=re.DOTALL) |
212 | video_title = re.sub(r' {2,}', ' ', video_title) | |
213 | video_description = self._html_search_regex(r'"description":"([^"]+)', webpage, 'video_description', default='') | |
c8434e83 | 214 | if not video_description: |
215 | video_description = None | |
38a40276 | 216 | video_upload_date = self._html_search_regex(r'<div>Availability for free users:(.+?)</div>', webpage, 'video_upload_date', fatal=False, flags=re.DOTALL) |
c8434e83 | 217 | if video_upload_date: |
218 | video_upload_date = unified_strdate(video_upload_date) | |
38a40276 | 219 | video_uploader = self._html_search_regex(r'<div>\s*Publisher:(.+?)</div>', webpage, 'video_uploader', fatal=False, flags=re.DOTALL) |
c8434e83 | 220 | |
38a40276 | 221 | playerdata_url = compat_urllib_parse.unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url')) |
c8434e83 | 222 | playerdata_req = compat_urllib_request.Request(playerdata_url) |
38a40276 | 223 | playerdata_req.data = compat_urllib_parse.urlencode({'current_page': webpage_url}) |
224 | playerdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded') | |
225 | playerdata = self._download_webpage(playerdata_req, video_id, note='Downloading media info') | |
34440095 | 226 | |
38a40276 | 227 | stream_id = self._search_regex(r'<media_id>([^<]+)', playerdata, 'stream_id') |
228 | video_thumbnail = self._search_regex(r'<episode_image_url>([^<]+)', playerdata, 'thumbnail', fatal=False) | |
c8434e83 | 229 | |
230 | formats = [] | |
a221f229 | 231 | for fmt in re.findall(r'showmedia\.([0-9]{3,4})p', webpage): |
c8434e83 | 232 | stream_quality, stream_format = self._FORMAT_IDS[fmt] |
2514d263 | 233 | video_format = fmt + 'p' |
38a40276 | 234 | streamdata_req = compat_urllib_request.Request('http://www.crunchyroll.com/xml/') |
c8434e83 | 235 | # urlencode doesn't work! |
2514d263 | 236 | streamdata_req.data = 'req=RpcApiVideoEncode%5FGetStreamInfo&video%5Fencode%5Fquality=' + stream_quality + '&media%5Fid=' + stream_id + '&video%5Fformat=' + stream_format |
38a40276 | 237 | streamdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded') |
238 | streamdata_req.add_header('Content-Length', str(len(streamdata_req.data))) | |
b1edd7a4 PH |
239 | streamdata = self._download_xml( |
240 | streamdata_req, video_id, | |
241 | note='Downloading media info for %s' % video_format) | |
242 | video_url = streamdata.find('.//host').text | |
243 | video_play_path = streamdata.find('.//file').text | |
c8434e83 | 244 | formats.append({ |
38a40276 | 245 | 'url': video_url, |
b1edd7a4 | 246 | 'play_path': video_play_path, |
38a40276 | 247 | 'ext': 'flv', |
248 | 'format': video_format, | |
249 | 'format_id': video_format, | |
c8434e83 | 250 | }) |
251 | ||
252 | subtitles = {} | |
78272a07 | 253 | sub_format = self._downloader.params.get('subtitlesformat', 'srt') |
c8434e83 | 254 | for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage): |
b74e86f4 PH |
255 | sub_page = self._download_webpage( |
256 | 'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id, | |
257 | video_id, note='Downloading subtitles for ' + sub_name) | |
38a40276 | 258 | id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False) |
259 | iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False) | |
260 | data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False) | |
c8434e83 | 261 | if not id or not iv or not data: |
262 | continue | |
263 | id = int(id) | |
264 | iv = base64.b64decode(iv) | |
265 | data = base64.b64decode(data) | |
266 | ||
38a40276 | 267 | subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8') |
70cb7392 | 268 | lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False) |
c8434e83 | 269 | if not lang_code: |
270 | continue | |
d65d6286 | 271 | sub_root = xml.etree.ElementTree.fromstring(subtitle) |
78272a07 | 272 | if sub_format == 'ass': |
d65d6286 | 273 | subtitles[lang_code] = self._convert_subtitles_to_ass(sub_root) |
78272a07 | 274 | else: |
d65d6286 | 275 | subtitles[lang_code] = self._convert_subtitles_to_srt(sub_root) |
c8434e83 | 276 | |
11b3ce85 NJ |
277 | if self._downloader.params.get('listsubtitles', False): |
278 | self._list_available_subtitles(video_id, subtitles) | |
279 | return | |
280 | ||
c8434e83 | 281 | return { |
8bcc8756 JW |
282 | 'id': video_id, |
283 | 'title': video_title, | |
38a40276 | 284 | 'description': video_description, |
8bcc8756 JW |
285 | 'thumbnail': video_thumbnail, |
286 | 'uploader': video_uploader, | |
38a40276 | 287 | 'upload_date': video_upload_date, |
8bcc8756 JW |
288 | 'subtitles': subtitles, |
289 | 'formats': formats, | |
d0a72674 | 290 | } |
8230018c GS |
291 | |
292 | ||
293 | class CrunchyrollShowPlaylistIE(InfoExtractor): | |
294 | IE_NAME = "crunchyroll:playlist" | |
09e5d6a6 | 295 | _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login))(?P<id>[\w\-]+))/?$' |
8230018c GS |
296 | |
297 | _TESTS = [{ | |
09e5d6a6 PH |
298 | 'url': 'http://www.crunchyroll.com/a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi', |
299 | 'info_dict': { | |
300 | 'id': 'a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi', | |
301 | 'title': 'A Bridge to the Starry Skies - Hoshizora e Kakaru Hashi' | |
8230018c | 302 | }, |
09e5d6a6 | 303 | 'playlist_count': 13, |
8230018c GS |
304 | }] |
305 | ||
8230018c | 306 | def _real_extract(self, url): |
09e5d6a6 PH |
307 | show_id = self._match_id(url) |
308 | ||
309 | webpage = self._download_webpage(url, show_id) | |
310 | title = self._html_search_regex( | |
311 | r'(?s)<h1[^>]*>\s*<span itemprop="name">(.*?)</span>', | |
312 | webpage, 'title') | |
313 | episode_paths = re.findall( | |
314 | r'(?s)<li id="showview_videos_media_[0-9]+"[^>]+>.*?<a href="([^"]+)"', | |
315 | webpage) | |
316 | entries = [ | |
317 | self.url_result('http://www.crunchyroll.com' + ep, 'Crunchyroll') | |
318 | for ep in episode_paths | |
319 | ] | |
320 | entries.reverse() | |
321 | ||
8230018c | 322 | return { |
09e5d6a6 PH |
323 | '_type': 'playlist', |
324 | 'id': show_id, | |
325 | 'title': title, | |
326 | 'entries': entries, | |
327 | } |