2 from __future__
import unicode_literals
4 from .common
import InfoExtractor
14 from ..compat
import compat_HTTPError
17 class AnimeLabBaseIE(InfoExtractor
):
18 _LOGIN_URL
= 'https://www.animelab.com/login'
19 _NETRC_MACHINE
= 'animelab'
22 def _is_logged_in(self
, login_page
=None):
23 if not self
._LOGGED
_IN
:
25 login_page
= self
._download
_webpage
(self
._LOGIN
_URL
, None, 'Downloading login page')
26 AnimeLabBaseIE
._LOGGED
_IN
= 'Sign In' not in login_page
27 return self
._LOGGED
_IN
29 def _perform_login(self
, username
, password
):
30 if self
._is
_logged
_in
():
39 response
= self
._download
_webpage
(
40 self
._LOGIN
_URL
, None, 'Logging in', 'Wrong login info',
41 data
=urlencode_postdata(login_form
),
42 headers
={'Content-Type': 'application/x-www-form-urlencoded'}
)
43 except ExtractorError
as e
:
44 if isinstance(e
.cause
, compat_HTTPError
) and e
.cause
.code
== 400:
45 raise ExtractorError('Unable to log in (wrong credentials?)', expected
=True)
48 if not self
._is
_logged
_in
(response
):
49 raise ExtractorError('Unable to login (cannot verify if logged in)')
51 def _real_initialize(self
):
52 if not self
._is
_logged
_in
():
53 self
.raise_login_required('Login is required to access any AnimeLab content')
56 class AnimeLabIE(AnimeLabBaseIE
):
57 _VALID_URL
= r
'https?://(?:www\.)?animelab\.com/player/(?P<id>[^/]+)'
59 # the following tests require authentication, but a free account will suffice
60 # just set 'usenetrc' to true in test/local_parameters.json if you use a .netrc file
61 # or you can set 'username' and 'password' there
62 # the tests also select a specific format so that the same video is downloaded
63 # regardless of whether the user is premium or not (needs testing on a premium account)
65 'url': 'https://www.animelab.com/player/fullmetal-alchemist-brotherhood-episode-42',
66 'md5': '05bde4b91a5d1ff46ef5b94df05b0f7f',
70 'display_id': 'fullmetal-alchemist-brotherhood-episode-42',
71 'title': 'Fullmetal Alchemist: Brotherhood - Episode 42 - Signs of a Counteroffensive',
72 'description': 'md5:103eb61dd0a56d3dfc5dbf748e5e83f4',
73 'series': 'Fullmetal Alchemist: Brotherhood',
74 'episode': 'Signs of a Counteroffensive',
82 'format': '[format_id=21711_yeshardsubbed_ja-JP][height=480]',
84 'skip': 'All AnimeLab content requires authentication',
87 def _real_extract(self
, url
):
88 display_id
= self
._match
_id
(url
)
90 # unfortunately we can get different URLs for the same formats
91 # e.g. if we are using a "free" account so no dubs available
92 # (so _remove_duplicate_formats is not effective)
93 # so we use a dictionary as a workaround
95 for language_option_url
in ('https://www.animelab.com/player/%s/subtitles',
96 'https://www.animelab.com/player/%s/dubbed'):
97 actual_url
= language_option_url
% display_id
98 webpage
= self
._download
_webpage
(actual_url
, display_id
, 'Downloading URL ' + actual_url
)
100 video_collection
= self
._parse
_json
(self
._search
_regex
(r
'new\s+?AnimeLabApp\.VideoCollection\s*?\((.*?)\);', webpage
, 'AnimeLab VideoCollection'), display_id
)
101 position
= int_or_none(self
._search
_regex
(r
'playlistPosition\s*?=\s*?(\d+)', webpage
, 'Playlist Position'))
103 raw_data
= video_collection
[position
]['videoEntry']
105 video_id
= str_or_none(raw_data
['id'])
107 # create a title from many sources (while grabbing other info)
108 # TODO use more fallback sources to get some of these
109 series
= raw_data
.get('showTitle')
110 video_type
= raw_data
.get('videoEntryType', {}).get('name')
111 episode_number
= raw_data
.get('episodeNumber')
112 episode_name
= raw_data
.get('name')
114 title_parts
= (series
, video_type
, episode_number
, episode_name
)
115 if None not in title_parts
:
116 title
= '%s - %s %s - %s' % title_parts
120 description
= raw_data
.get('synopsis') or self
._og
_search
_description
(webpage
, default
=None)
122 duration
= int_or_none(raw_data
.get('duration'))
124 thumbnail_data
= raw_data
.get('images', [])
126 for thumbnail
in thumbnail_data
:
127 for instance
in thumbnail
['imageInstances']:
128 image_data
= instance
.get('imageInfo', {})
130 'id': str_or_none(image_data
.get('id')),
131 'url': image_data
.get('fullPath'),
132 'width': image_data
.get('width'),
133 'height': image_data
.get('height'),
136 season_data
= raw_data
.get('season', {}) or {}
137 season
= str_or_none(season_data
.get('name'))
138 season_number
= int_or_none(season_data
.get('seasonNumber'))
139 season_id
= str_or_none(season_data
.get('id'))
141 for video_data
in raw_data
['videoList']:
142 current_video_list
= {}
143 current_video_list
['language'] = video_data
.get('language', {}).get('languageCode')
145 is_hardsubbed
= video_data
.get('hardSubbed')
147 for video_instance
in video_data
['videoInstances']:
148 httpurl
= video_instance
.get('httpUrl')
149 url
= httpurl
if httpurl
else video_instance
.get('rtmpUrl')
151 # this video format is unavailable to the user (not premium etc.)
154 current_format
= current_video_list
.copy()
158 format_id_parts
.append(str_or_none(video_instance
.get('id')))
160 if is_hardsubbed
is not None:
162 format_id_parts
.append('yeshardsubbed')
164 format_id_parts
.append('nothardsubbed')
166 format_id_parts
.append(current_format
['language'])
168 format_id
= '_'.join([x
for x
in format_id_parts
if x
is not None])
170 ext
= determine_ext(url
)
172 for format_
in self
._extract
_m
3u8_formats
(
173 url
, video_id
, m3u8_id
=format_id
, fatal
=False):
174 formats
[format_
['format_id']] = format_
177 for format_
in self
._extract
_mpd
_formats
(
178 url
, video_id
, mpd_id
=format_id
, fatal
=False):
179 formats
[format_
['format_id']] = format_
182 current_format
['url'] = url
183 quality_data
= video_instance
.get('videoQuality')
185 quality
= quality_data
.get('name') or quality_data
.get('description')
191 height
= int_or_none(self
._search
_regex
(r
'(\d+)p?$', quality
, 'Video format height', default
=None))
194 self
.report_warning('Could not get height of video')
196 current_format
['height'] = height
197 current_format
['format_id'] = format_id
199 formats
[current_format
['format_id']] = current_format
201 formats
= list(formats
.values())
202 self
._sort
_formats
(formats
)
206 'display_id': display_id
,
208 'description': description
,
210 'episode': episode_name
,
211 'episode_number': int_or_none(episode_number
),
212 'thumbnails': thumbnails
,
213 'duration': duration
,
216 'season_number': season_number
,
217 'season_id': season_id
,
221 class AnimeLabShowsIE(AnimeLabBaseIE
):
222 _VALID_URL
= r
'https?://(?:www\.)?animelab\.com/shows/(?P<id>[^/]+)'
225 'url': 'https://www.animelab.com/shows/attack-on-titan',
228 'title': 'Attack on Titan',
229 'description': 'md5:989d95a2677e9309368d5cf39ba91469',
231 'playlist_count': 59,
232 'skip': 'All AnimeLab content requires authentication',
235 def _real_extract(self
, url
):
236 _BASE_URL
= 'http://www.animelab.com'
237 _SHOWS_API_URL
= '/api/videoentries/show/videos/'
238 display_id
= self
._match
_id
(url
)
240 webpage
= self
._download
_webpage
(url
, display_id
, 'Downloading requested URL')
242 show_data_str
= self
._search
_regex
(r
'({"id":.*}),\svideoEntry', webpage
, 'AnimeLab show data')
243 show_data
= self
._parse
_json
(show_data_str
, display_id
)
245 show_id
= str_or_none(show_data
.get('id'))
246 title
= show_data
.get('name')
247 description
= show_data
.get('shortSynopsis') or show_data
.get('longSynopsis')
250 for season
in show_data
['seasons']:
251 season_id
= season
['id']
252 get_data
= urlencode_postdata({
253 'seasonId': season_id
,
256 # despite using urlencode_postdata, we are sending a GET request
257 target_url
= _BASE_URL
+ _SHOWS_API_URL
+ show_id
+ "?" + get_data
.decode('utf-8')
258 response
= self
._download
_webpage
(
260 None, 'Season id %s' % season_id
)
262 season_data
= self
._parse
_json
(response
, display_id
)
264 for video_data
in season_data
['list']:
265 entries
.append(self
.url_result(
266 _BASE_URL
+ '/player/' + video_data
['slug'], 'AnimeLab',
267 str_or_none(video_data
.get('id')), video_data
.get('name')
274 'description': description
,
278 # TODO implement myqueue