]>
Commit | Line | Data |
---|---|---|
ab4bdc91 MS |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
f542a3d2 | 3 | |
ab4bdc91 | 4 | from .common import InfoExtractor |
cef3f301 YCH |
5 | from ..compat import ( |
6 | compat_HTTPError, | |
7 | compat_urllib_parse_unquote_plus, | |
8 | ) | |
ab4bdc91 | 9 | from ..utils import ( |
f542a3d2 S |
10 | clean_html, |
11 | determine_ext, | |
f377f44d | 12 | int_or_none, |
ab4bdc91 MS |
13 | sanitized_Request, |
14 | ExtractorError, | |
15 | urlencode_postdata | |
16 | ) | |
ab4bdc91 | 17 | |
b4c299ba | 18 | |
ab4bdc91 | 19 | class FunimationIE(InfoExtractor): |
b091529a | 20 | _VALID_URL = r'https?://(?:www\.)?funimation\.com/shows/[^/]+/videos/(?:official|promotional)/(?P<id>[^/?#&]+)' |
ab4bdc91 | 21 | |
0014ffa8 S |
22 | _NETRC_MACHINE = 'funimation' |
23 | ||
b59623ef | 24 | _TESTS = [{ |
ab4bdc91 MS |
25 | 'url': 'http://www.funimation.com/shows/air/videos/official/breeze', |
26 | 'info_dict': { | |
d357bbd3 S |
27 | 'id': '658', |
28 | 'display_id': 'breeze', | |
ab4bdc91 | 29 | 'ext': 'mp4', |
d357bbd3 S |
30 | 'title': 'Air - 1 - Breeze', |
31 | 'description': 'md5:1769f43cd5fc130ace8fd87232207892', | |
32 | 'thumbnail': 're:https?://.*\.jpg', | |
33 | }, | |
cef3f301 | 34 | 'skip': 'Access without user interaction is forbidden by CloudFlare, and video removed', |
b59623ef S |
35 | }, { |
36 | 'url': 'http://www.funimation.com/shows/hacksign/videos/official/role-play', | |
37 | 'info_dict': { | |
38 | 'id': '31128', | |
39 | 'display_id': 'role-play', | |
40 | 'ext': 'mp4', | |
41 | 'title': '.hack//SIGN - 1 - Role Play', | |
42 | 'description': 'md5:b602bdc15eef4c9bbb201bb6e6a4a2dd', | |
43 | 'thumbnail': 're:https?://.*\.jpg', | |
44 | }, | |
cef3f301 | 45 | 'skip': 'Access without user interaction is forbidden by CloudFlare', |
b091529a S |
46 | }, { |
47 | 'url': 'http://www.funimation.com/shows/attack-on-titan-junior-high/videos/promotional/broadcast-dub-preview', | |
0b1bb1ac S |
48 | 'info_dict': { |
49 | 'id': '9635', | |
50 | 'display_id': 'broadcast-dub-preview', | |
51 | 'ext': 'mp4', | |
52 | 'title': 'Attack on Titan: Junior High - Broadcast Dub Preview', | |
53 | 'description': 'md5:f8ec49c0aff702a7832cd81b8a44f803', | |
54 | 'thumbnail': 're:https?://.*\.(?:jpg|png)', | |
55 | }, | |
cef3f301 | 56 | 'skip': 'Access without user interaction is forbidden by CloudFlare', |
b59623ef | 57 | }] |
f542a3d2 | 58 | |
cef3f301 YCH |
59 | _LOGIN_URL = 'http://www.funimation.com/login' |
60 | ||
61 | def _download_webpage(self, *args, **kwargs): | |
62 | try: | |
63 | return super(FunimationIE, self)._download_webpage(*args, **kwargs) | |
64 | except ExtractorError as ee: | |
65 | if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403: | |
66 | response = ee.cause.read() | |
67 | if b'>Please complete the security check to access<' in response: | |
68 | raise ExtractorError( | |
69 | 'Access to funimation.com is blocked by CloudFlare. ' | |
70 | 'Please browse to http://www.funimation.com/, solve ' | |
71 | 'the reCAPTCHA, export browser cookies to a text file,' | |
72 | ' and then try again with --cookies YOUR_COOKIE_FILE.', | |
73 | expected=True) | |
74 | raise | |
75 | ||
76 | def _extract_cloudflare_session_ua(self, url): | |
77 | ci_session_cookie = self._get_cookies(url).get('ci_session') | |
78 | if ci_session_cookie: | |
79 | ci_session = compat_urllib_parse_unquote_plus(ci_session_cookie.value) | |
80 | # ci_session is a string serialized by PHP function serialize() | |
81 | # This case is simple enough to use regular expressions only | |
82 | return self._search_regex( | |
83 | r'"user_agent";s:\d+:"([^"]+)"', ci_session, 'user agent', | |
84 | default=None) | |
85 | ||
ab4bdc91 MS |
86 | def _login(self): |
87 | (username, password) = self._get_login_info() | |
88 | if username is None: | |
89 | return | |
15707c7e | 90 | data = urlencode_postdata({ |
ab4bdc91 MS |
91 | 'email_field': username, |
92 | 'password_field': password, | |
15707c7e | 93 | }) |
cef3f301 YCH |
94 | user_agent = self._extract_cloudflare_session_ua(self._LOGIN_URL) |
95 | if not user_agent: | |
96 | user_agent = 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:42.0) Gecko/20100101 Firefox/42.0' | |
97 | login_request = sanitized_Request(self._LOGIN_URL, data, headers={ | |
98 | 'User-Agent': user_agent, | |
59a4ff48 S |
99 | 'Content-Type': 'application/x-www-form-urlencoded' |
100 | }) | |
0014ffa8 | 101 | login_page = self._download_webpage( |
f542a3d2 | 102 | login_request, None, 'Logging in as %s' % username) |
0014ffa8 S |
103 | if any(p in login_page for p in ('funimation.com/logout', '>Log Out<')): |
104 | return | |
105 | error = self._html_search_regex( | |
106 | r'(?s)<div[^>]+id=["\']errorMessages["\'][^>]*>(.+?)</div>', | |
107 | login_page, 'error messages', default=None) | |
108 | if error: | |
109 | raise ExtractorError('Unable to login: %s' % error, expected=True) | |
110 | raise ExtractorError('Unable to log in') | |
ab4bdc91 MS |
111 | |
112 | def _real_initialize(self): | |
113 | self._login() | |
114 | ||
115 | def _real_extract(self, url): | |
f542a3d2 S |
116 | display_id = self._match_id(url) |
117 | ||
b59623ef S |
118 | errors = [] |
119 | formats = [] | |
f542a3d2 S |
120 | |
121 | ERRORS_MAP = { | |
122 | 'ERROR_MATURE_CONTENT_LOGGED_IN': 'matureContentLoggedIn', | |
123 | 'ERROR_MATURE_CONTENT_LOGGED_OUT': 'matureContentLoggedOut', | |
124 | 'ERROR_SUBSCRIPTION_LOGGED_OUT': 'subscriptionLoggedOut', | |
125 | 'ERROR_VIDEO_EXPIRED': 'videoExpired', | |
126 | 'ERROR_TERRITORY_UNAVAILABLE': 'territoryUnavailable', | |
127 | 'SVODBASIC_SUBSCRIPTION_IN_PLAYER': 'basicSubscription', | |
128 | 'SVODNON_SUBSCRIPTION_IN_PLAYER': 'nonSubscription', | |
129 | 'ERROR_PLAYER_NOT_RESPONDING': 'playerNotResponding', | |
130 | 'ERROR_UNABLE_TO_CONNECT_TO_CDN': 'unableToConnectToCDN', | |
131 | 'ERROR_STREAM_NOT_FOUND': 'streamNotFound', | |
132 | } | |
133 | ||
b59623ef S |
134 | USER_AGENTS = ( |
135 | # PC UA is served with m3u8 that provides some bonus lower quality formats | |
136 | ('pc', 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:42.0) Gecko/20100101 Firefox/42.0'), | |
137 | # Mobile UA allows to extract direct links and also does not fail when | |
138 | # PC UA fails with hulu error (e.g. | |
139 | # http://www.funimation.com/shows/hacksign/videos/official/role-play) | |
140 | ('mobile', 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.114 Mobile Safari/537.36'), | |
141 | ) | |
142 | ||
cef3f301 YCH |
143 | user_agent = self._extract_cloudflare_session_ua(url) |
144 | if user_agent: | |
145 | USER_AGENTS = ((None, user_agent),) | |
146 | ||
b59623ef S |
147 | for kind, user_agent in USER_AGENTS: |
148 | request = sanitized_Request(url) | |
149 | request.add_header('User-Agent', user_agent) | |
150 | webpage = self._download_webpage( | |
cef3f301 YCH |
151 | request, display_id, |
152 | 'Downloading %s webpage' % kind if kind else 'Downloading webpage') | |
b59623ef | 153 | |
f208e52a | 154 | playlist = self._parse_json( |
b59623ef S |
155 | self._search_regex( |
156 | r'var\s+playersData\s*=\s*(\[.+?\]);\n', | |
157 | webpage, 'players data'), | |
f208e52a | 158 | display_id)[0]['playlist'] |
b59623ef | 159 | |
f208e52a | 160 | items = next(item['items'] for item in playlist if item.get('items')) |
b59623ef S |
161 | item = next(item for item in items if item.get('itemAK') == display_id) |
162 | ||
163 | error_messages = {} | |
164 | video_error_messages = self._search_regex( | |
165 | r'var\s+videoErrorMessages\s*=\s*({.+?});\n', | |
166 | webpage, 'error messages', default=None) | |
167 | if video_error_messages: | |
168 | error_messages_json = self._parse_json(video_error_messages, display_id, fatal=False) | |
169 | if error_messages_json: | |
170 | for _, error in error_messages_json.items(): | |
171 | type_ = error.get('type') | |
172 | description = error.get('description') | |
173 | content = error.get('content') | |
174 | if type_ == 'text' and description and content: | |
175 | error_message = ERRORS_MAP.get(description) | |
176 | if error_message: | |
177 | error_messages[error_message] = content | |
178 | ||
179 | for video in item.get('videoSet', []): | |
180 | auth_token = video.get('authToken') | |
181 | if not auth_token: | |
f542a3d2 | 182 | continue |
b59623ef S |
183 | funimation_id = video.get('FUNImationID') or video.get('videoId') |
184 | preference = 1 if video.get('languageMode') == 'dub' else 0 | |
185 | if not auth_token.startswith('?'): | |
186 | auth_token = '?%s' % auth_token | |
f377f44d | 187 | for quality, height in (('sd', 480), ('hd', 720), ('hd1080', 1080)): |
b59623ef S |
188 | format_url = video.get('%sUrl' % quality) |
189 | if not format_url: | |
190 | continue | |
191 | if not format_url.startswith(('http', '//')): | |
192 | errors.append(format_url) | |
193 | continue | |
194 | if determine_ext(format_url) == 'm3u8': | |
7e5edcfd | 195 | formats.extend(self._extract_m3u8_formats( |
b59623ef | 196 | format_url + auth_token, display_id, 'mp4', entry_protocol='m3u8_native', |
7e5edcfd | 197 | preference=preference, m3u8_id='%s-hls' % funimation_id, fatal=False)) |
b59623ef | 198 | else: |
f377f44d S |
199 | tbr = int_or_none(self._search_regex( |
200 | r'-(\d+)[Kk]', format_url, 'tbr', default=None)) | |
201 | formats.append({ | |
b59623ef | 202 | 'url': format_url + auth_token, |
f377f44d S |
203 | 'format_id': '%s-http-%dp' % (funimation_id, height), |
204 | 'height': height, | |
205 | 'tbr': tbr, | |
b59623ef | 206 | 'preference': preference, |
f377f44d | 207 | }) |
f542a3d2 S |
208 | |
209 | if not formats and errors: | |
210 | raise ExtractorError( | |
211 | '%s returned error: %s' | |
212 | % (self.IE_NAME, clean_html(error_messages.get(errors[0], errors[0]))), | |
213 | expected=True) | |
214 | ||
b59623ef S |
215 | self._sort_formats(formats) |
216 | ||
f542a3d2 S |
217 | title = item['title'] |
218 | artist = item.get('artist') | |
219 | if artist: | |
220 | title = '%s - %s' % (artist, title) | |
221 | description = self._og_search_description(webpage) or item.get('description') | |
222 | thumbnail = self._og_search_thumbnail(webpage) or item.get('posterUrl') | |
223 | video_id = item.get('itemId') or display_id | |
ab4bdc91 MS |
224 | |
225 | return { | |
226 | 'id': video_id, | |
f542a3d2 S |
227 | 'display_id': display_id, |
228 | 'title': title, | |
229 | 'description': description, | |
230 | 'thumbnail': thumbnail, | |
ab4bdc91 | 231 | 'formats': formats, |
ab4bdc91 | 232 | } |