]>
Commit | Line | Data |
---|---|---|
c8434e83 | 1 | # encoding: utf-8 |
38a40276 | 2 | from __future__ import unicode_literals |
3 | ||
34440095 | 4 | import re |
1d430674 | 5 | import json |
34440095 S |
6 | import base64 |
7 | import zlib | |
78272a07 | 8 | import xml.etree.ElementTree |
34440095 | 9 | |
c8434e83 | 10 | from hashlib import sha1 |
11 | from math import pow, sqrt, floor | |
b5857f62 | 12 | from .common import InfoExtractor |
1cc79574 | 13 | from ..compat import ( |
c8434e83 | 14 | compat_urllib_parse, |
a60cccbf | 15 | compat_urllib_parse_unquote, |
c8434e83 | 16 | compat_urllib_request, |
1cc79574 PH |
17 | ) |
18 | from ..utils import ( | |
19 | ExtractorError, | |
c8434e83 | 20 | bytes_to_intlist, |
21 | intlist_to_bytes, | |
22 | unified_strdate, | |
723e04d0 | 23 | urlencode_postdata, |
c8434e83 | 24 | ) |
25 | from ..aes import ( | |
26 | aes_cbc_decrypt, | |
c8434e83 | 27 | ) |
28 | ||
34440095 | 29 | |
b5857f62 | 30 | class CrunchyrollIE(InfoExtractor): |
ede21449 | 31 | _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|[^/]*/[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)' |
499bfcbf | 32 | _NETRC_MACHINE = 'crunchyroll' |
a8896c5a | 33 | _TESTS = [{ |
38a40276 | 34 | 'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513', |
38a40276 | 35 | 'info_dict': { |
34440095 S |
36 | 'id': '645513', |
37 | 'ext': 'flv', | |
38a40276 | 38 | 'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!', |
39 | 'description': 'md5:2d17137920c64f2f49981a7797d275ef', | |
40 | 'thumbnail': 'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg', | |
41 | 'uploader': 'Yomiuri Telecasting Corporation (YTV)', | |
42 | 'upload_date': '20131013', | |
b1edd7a4 | 43 | 'url': 're:(?!.*&)', |
c8434e83 | 44 | }, |
38a40276 | 45 | 'params': { |
c8434e83 | 46 | # rtmp |
38a40276 | 47 | 'skip_download': True, |
c8434e83 | 48 | }, |
ede21449 S |
49 | }, { |
50 | 'url': 'http://www.crunchyroll.com/media-589804/culture-japan-1', | |
51 | 'info_dict': { | |
52 | 'id': '589804', | |
53 | 'ext': 'flv', | |
54 | 'title': 'Culture Japan Episode 1 – Rebuilding Japan after the 3.11', | |
55 | 'description': 'md5:fe2743efedb49d279552926d0bd0cd9e', | |
56 | 'thumbnail': 're:^https?://.*\.jpg$', | |
57 | 'uploader': 'Danny Choo Network', | |
58 | 'upload_date': '20120213', | |
59 | }, | |
60 | 'params': { | |
61 | # rtmp | |
62 | 'skip_download': True, | |
63 | }, | |
64 | ||
a8896c5a S |
65 | }, { |
66 | 'url': 'http://www.crunchyroll.fr/girl-friend-beta/episode-11-goodbye-la-mode-661697', | |
67 | 'only_matching': True, | |
68 | }] | |
c8434e83 | 69 | |
70 | _FORMAT_IDS = { | |
38a40276 | 71 | '360': ('60', '106'), |
72 | '480': ('61', '106'), | |
73 | '720': ('62', '106'), | |
74 | '1080': ('80', '108'), | |
c8434e83 | 75 | } |
76 | ||
723e04d0 A |
77 | def _login(self): |
78 | (username, password) = self._get_login_info() | |
79 | if username is None: | |
80 | return | |
81 | self.report_login() | |
82 | login_url = 'https://www.crunchyroll.com/?a=formhandler' | |
83 | data = urlencode_postdata({ | |
84 | 'formname': 'RpcApiUser_Login', | |
85 | 'name': username, | |
86 | 'password': password, | |
87 | }) | |
88 | login_request = compat_urllib_request.Request(login_url, data) | |
89 | login_request.add_header('Content-Type', 'application/x-www-form-urlencoded') | |
90 | self._download_webpage(login_request, None, False, 'Wrong login info') | |
91 | ||
92 | def _real_initialize(self): | |
93 | self._login() | |
94 | ||
c8434e83 | 95 | def _decrypt_subtitles(self, data, iv, id): |
1a5b77dc S |
96 | data = bytes_to_intlist(base64.b64decode(data.encode('utf-8'))) |
97 | iv = bytes_to_intlist(base64.b64decode(iv.encode('utf-8'))) | |
c8434e83 | 98 | id = int(id) |
99 | ||
100 | def obfuscate_key_aux(count, modulo, start): | |
101 | output = list(start) | |
102 | for _ in range(count): | |
103 | output.append(output[-1] + output[-2]) | |
104 | # cut off start values | |
105 | output = output[2:] | |
106 | output = list(map(lambda x: x % modulo + 33, output)) | |
107 | return output | |
108 | ||
109 | def obfuscate_key(key): | |
110 | num1 = int(floor(pow(2, 25) * sqrt(6.9))) | |
111 | num2 = (num1 ^ key) << 5 | |
112 | num3 = key ^ num1 | |
113 | num4 = num3 ^ (num3 >> 3) ^ num2 | |
114 | prefix = intlist_to_bytes(obfuscate_key_aux(20, 97, (1, 2))) | |
38a40276 | 115 | shaHash = bytes_to_intlist(sha1(prefix + str(num4).encode('ascii')).digest()) |
c8434e83 | 116 | # Extend 160 Bit hash to 256 Bit |
117 | return shaHash + [0] * 12 | |
34440095 | 118 | |
c8434e83 | 119 | key = obfuscate_key(id) |
5f6a1245 | 120 | |
c8434e83 | 121 | decrypted_data = intlist_to_bytes(aes_cbc_decrypt(data, key, iv)) |
122 | return zlib.decompress(decrypted_data) | |
123 | ||
d65d6286 | 124 | def _convert_subtitles_to_srt(self, sub_root): |
38a40276 | 125 | output = '' |
d65d6286 JMF |
126 | |
127 | for i, event in enumerate(sub_root.findall('./events/event'), 1): | |
128 | start = event.attrib['start'].replace('.', ',') | |
129 | end = event.attrib['end'].replace('.', ',') | |
130 | text = event.attrib['text'].replace('\\N', '\n') | |
38a40276 | 131 | output += '%d\n%s --> %s\n%s\n\n' % (i, start, end, text) |
c8434e83 | 132 | return output |
133 | ||
d65d6286 | 134 | def _convert_subtitles_to_ass(self, sub_root): |
78272a07 A |
135 | output = '' |
136 | ||
137 | def ass_bool(strvalue): | |
138 | assvalue = '0' | |
139 | if strvalue == '1': | |
140 | assvalue = '-1' | |
141 | return assvalue | |
142 | ||
78272a07 A |
143 | output = '[Script Info]\n' |
144 | output += 'Title: %s\n' % sub_root.attrib["title"] | |
145 | output += 'ScriptType: v4.00+\n' | |
146 | output += 'WrapStyle: %s\n' % sub_root.attrib["wrap_style"] | |
147 | output += 'PlayResX: %s\n' % sub_root.attrib["play_res_x"] | |
148 | output += 'PlayResY: %s\n' % sub_root.attrib["play_res_y"] | |
149 | output += """ScaledBorderAndShadow: yes | |
150 | ||
151 | [V4+ Styles] | |
152 | Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding | |
153 | """ | |
154 | for style in sub_root.findall('./styles/style'): | |
155 | output += 'Style: ' + style.attrib["name"] | |
156 | output += ',' + style.attrib["font_name"] | |
157 | output += ',' + style.attrib["font_size"] | |
158 | output += ',' + style.attrib["primary_colour"] | |
159 | output += ',' + style.attrib["secondary_colour"] | |
160 | output += ',' + style.attrib["outline_colour"] | |
161 | output += ',' + style.attrib["back_colour"] | |
162 | output += ',' + ass_bool(style.attrib["bold"]) | |
163 | output += ',' + ass_bool(style.attrib["italic"]) | |
164 | output += ',' + ass_bool(style.attrib["underline"]) | |
165 | output += ',' + ass_bool(style.attrib["strikeout"]) | |
166 | output += ',' + style.attrib["scale_x"] | |
167 | output += ',' + style.attrib["scale_y"] | |
168 | output += ',' + style.attrib["spacing"] | |
169 | output += ',' + style.attrib["angle"] | |
170 | output += ',' + style.attrib["border_style"] | |
171 | output += ',' + style.attrib["outline"] | |
172 | output += ',' + style.attrib["shadow"] | |
173 | output += ',' + style.attrib["alignment"] | |
174 | output += ',' + style.attrib["margin_l"] | |
175 | output += ',' + style.attrib["margin_r"] | |
176 | output += ',' + style.attrib["margin_v"] | |
177 | output += ',' + style.attrib["encoding"] | |
178 | output += '\n' | |
179 | ||
180 | output += """ | |
181 | [Events] | |
182 | Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text | |
183 | """ | |
184 | for event in sub_root.findall('./events/event'): | |
185 | output += 'Dialogue: 0' | |
186 | output += ',' + event.attrib["start"] | |
187 | output += ',' + event.attrib["end"] | |
188 | output += ',' + event.attrib["style"] | |
189 | output += ',' + event.attrib["name"] | |
190 | output += ',' + event.attrib["margin_l"] | |
191 | output += ',' + event.attrib["margin_r"] | |
192 | output += ',' + event.attrib["margin_v"] | |
193 | output += ',' + event.attrib["effect"] | |
194 | output += ',' + event.attrib["text"] | |
195 | output += '\n' | |
196 | ||
197 | return output | |
198 | ||
0385d642 S |
199 | def _extract_subtitles(self, subtitle): |
200 | sub_root = xml.etree.ElementTree.fromstring(subtitle) | |
201 | return [{ | |
202 | 'ext': 'srt', | |
203 | 'data': self._convert_subtitles_to_srt(sub_root), | |
204 | }, { | |
205 | 'ext': 'ass', | |
206 | 'data': self._convert_subtitles_to_ass(sub_root), | |
207 | }] | |
208 | ||
b5857f62 JMF |
209 | def _get_subtitles(self, video_id, webpage): |
210 | subtitles = {} | |
211 | for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage): | |
212 | sub_page = self._download_webpage( | |
213 | 'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id, | |
214 | video_id, note='Downloading subtitles for ' + sub_name) | |
215 | id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False) | |
216 | iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False) | |
217 | data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False) | |
218 | if not id or not iv or not data: | |
219 | continue | |
b5857f62 JMF |
220 | subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8') |
221 | lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False) | |
222 | if not lang_code: | |
223 | continue | |
0385d642 | 224 | subtitles[lang_code] = self._extract_subtitles(subtitle) |
b5857f62 JMF |
225 | return subtitles |
226 | ||
5f6a1245 | 227 | def _real_extract(self, url): |
c8434e83 | 228 | mobj = re.match(self._VALID_URL, url) |
38a40276 | 229 | video_id = mobj.group('video_id') |
230 | ||
231 | if mobj.group('prefix') == 'm': | |
232 | mobile_webpage = self._download_webpage(url, video_id, 'Downloading mobile webpage') | |
233 | webpage_url = self._search_regex(r'<link rel="canonical" href="([^"]+)" />', mobile_webpage, 'webpage_url') | |
234 | else: | |
235 | webpage_url = 'http://www.' + mobj.group('url') | |
c8434e83 | 236 | |
38a40276 | 237 | webpage = self._download_webpage(webpage_url, video_id, 'Downloading webpage') |
238 | note_m = self._html_search_regex(r'<div class="showmedia-trailer-notice">(.+?)</div>', webpage, 'trailer-notice', default='') | |
c8434e83 | 239 | if note_m: |
240 | raise ExtractorError(note_m) | |
241 | ||
1d430674 S |
242 | mobj = re.search(r'Page\.messaging_box_controller\.addItems\(\[(?P<msg>{.+?})\]\)', webpage) |
243 | if mobj: | |
244 | msg = json.loads(mobj.group('msg')) | |
245 | if msg.get('type') == 'error': | |
246 | raise ExtractorError('crunchyroll returned error: %s' % msg['message_body'], expected=True) | |
247 | ||
38a40276 | 248 | video_title = self._html_search_regex(r'<h1[^>]*>(.+?)</h1>', webpage, 'video_title', flags=re.DOTALL) |
249 | video_title = re.sub(r' {2,}', ' ', video_title) | |
250 | video_description = self._html_search_regex(r'"description":"([^"]+)', webpage, 'video_description', default='') | |
c8434e83 | 251 | if not video_description: |
252 | video_description = None | |
38a40276 | 253 | video_upload_date = self._html_search_regex(r'<div>Availability for free users:(.+?)</div>', webpage, 'video_upload_date', fatal=False, flags=re.DOTALL) |
c8434e83 | 254 | if video_upload_date: |
255 | video_upload_date = unified_strdate(video_upload_date) | |
38a40276 | 256 | video_uploader = self._html_search_regex(r'<div>\s*Publisher:(.+?)</div>', webpage, 'video_uploader', fatal=False, flags=re.DOTALL) |
c8434e83 | 257 | |
a60cccbf | 258 | playerdata_url = compat_urllib_parse_unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url')) |
c8434e83 | 259 | playerdata_req = compat_urllib_request.Request(playerdata_url) |
38a40276 | 260 | playerdata_req.data = compat_urllib_parse.urlencode({'current_page': webpage_url}) |
261 | playerdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded') | |
262 | playerdata = self._download_webpage(playerdata_req, video_id, note='Downloading media info') | |
34440095 | 263 | |
38a40276 | 264 | stream_id = self._search_regex(r'<media_id>([^<]+)', playerdata, 'stream_id') |
265 | video_thumbnail = self._search_regex(r'<episode_image_url>([^<]+)', playerdata, 'thumbnail', fatal=False) | |
c8434e83 | 266 | |
267 | formats = [] | |
a221f229 | 268 | for fmt in re.findall(r'showmedia\.([0-9]{3,4})p', webpage): |
c8434e83 | 269 | stream_quality, stream_format = self._FORMAT_IDS[fmt] |
2514d263 | 270 | video_format = fmt + 'p' |
ede21449 S |
271 | streamdata_req = compat_urllib_request.Request( |
272 | 'http://www.crunchyroll.com/xml/?req=RpcApiVideoPlayer_GetStandardConfig&media_id=%s&video_format=%s&video_quality=%s' | |
273 | % (stream_id, stream_format, stream_quality), | |
274 | compat_urllib_parse.urlencode({'current_page': url}).encode('utf-8')) | |
38a40276 | 275 | streamdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded') |
b1edd7a4 PH |
276 | streamdata = self._download_xml( |
277 | streamdata_req, video_id, | |
278 | note='Downloading media info for %s' % video_format) | |
ede21449 S |
279 | stream_info = streamdata.find('./{default}preload/stream_info') |
280 | video_url = stream_info.find('./host').text | |
281 | video_play_path = stream_info.find('./file').text | |
c8434e83 | 282 | formats.append({ |
38a40276 | 283 | 'url': video_url, |
b1edd7a4 | 284 | 'play_path': video_play_path, |
38a40276 | 285 | 'ext': 'flv', |
286 | 'format': video_format, | |
287 | 'format_id': video_format, | |
c8434e83 | 288 | }) |
289 | ||
b5857f62 | 290 | subtitles = self.extract_subtitles(video_id, webpage) |
11b3ce85 | 291 | |
c8434e83 | 292 | return { |
8bcc8756 JW |
293 | 'id': video_id, |
294 | 'title': video_title, | |
38a40276 | 295 | 'description': video_description, |
8bcc8756 JW |
296 | 'thumbnail': video_thumbnail, |
297 | 'uploader': video_uploader, | |
38a40276 | 298 | 'upload_date': video_upload_date, |
8bcc8756 JW |
299 | 'subtitles': subtitles, |
300 | 'formats': formats, | |
d0a72674 | 301 | } |
8230018c GS |
302 | |
303 | ||
304 | class CrunchyrollShowPlaylistIE(InfoExtractor): | |
305 | IE_NAME = "crunchyroll:playlist" | |
09e5d6a6 | 306 | _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login))(?P<id>[\w\-]+))/?$' |
8230018c GS |
307 | |
308 | _TESTS = [{ | |
09e5d6a6 PH |
309 | 'url': 'http://www.crunchyroll.com/a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi', |
310 | 'info_dict': { | |
311 | 'id': 'a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi', | |
312 | 'title': 'A Bridge to the Starry Skies - Hoshizora e Kakaru Hashi' | |
8230018c | 313 | }, |
09e5d6a6 | 314 | 'playlist_count': 13, |
8230018c GS |
315 | }] |
316 | ||
8230018c | 317 | def _real_extract(self, url): |
09e5d6a6 PH |
318 | show_id = self._match_id(url) |
319 | ||
320 | webpage = self._download_webpage(url, show_id) | |
321 | title = self._html_search_regex( | |
322 | r'(?s)<h1[^>]*>\s*<span itemprop="name">(.*?)</span>', | |
323 | webpage, 'title') | |
324 | episode_paths = re.findall( | |
325 | r'(?s)<li id="showview_videos_media_[0-9]+"[^>]+>.*?<a href="([^"]+)"', | |
326 | webpage) | |
327 | entries = [ | |
328 | self.url_result('http://www.crunchyroll.com' + ep, 'Crunchyroll') | |
329 | for ep in episode_paths | |
330 | ] | |
331 | entries.reverse() | |
332 | ||
8230018c | 333 | return { |
09e5d6a6 PH |
334 | '_type': 'playlist', |
335 | 'id': show_id, | |
336 | 'title': title, | |
337 | 'entries': entries, | |
338 | } |