2 from __future__
import unicode_literals
4 from .common
import InfoExtractor
14 from ..compat
import compat_HTTPError
17 class AnimeLabBaseIE(InfoExtractor
):
18 _LOGIN_REQUIRED
= True
19 _LOGIN_URL
= 'https://www.animelab.com/login'
20 _NETRC_MACHINE
= 'animelab'
23 def is_logged_in(login_webpage
):
24 return 'Sign In' not in login_webpage
26 login_page
= self
._download
_webpage
(
27 self
._LOGIN
_URL
, None, 'Downloading login page')
29 # Check if already logged in
30 if is_logged_in(login_page
):
33 (username
, password
) = self
._get
_login
_info
()
34 if username
is None and self
._LOGIN
_REQUIRED
:
35 self
.raise_login_required('Login is required to access any AnimeLab content')
43 response
= self
._download
_webpage
(
44 self
._LOGIN
_URL
, None, 'Logging in', 'Wrong login info',
45 data
=urlencode_postdata(login_form
),
46 headers
={'Content-Type': 'application/x-www-form-urlencoded'}
)
47 except ExtractorError
as e
:
48 if isinstance(e
.cause
, compat_HTTPError
) and e
.cause
.code
== 400:
49 raise ExtractorError('Unable to log in (wrong credentials?)', expected
=True)
53 # if login was successful
54 if is_logged_in(response
):
57 raise ExtractorError('Unable to login (cannot verify if logged in)')
59 def _real_initialize(self
):
63 class AnimeLabIE(AnimeLabBaseIE
):
64 _VALID_URL
= r
'https?://(?:www\.)?animelab\.com/player/(?P<id>[^/]+)'
66 # the following tests require authentication, but a free account will suffice
67 # just set 'usenetrc' to true in test/local_parameters.json if you use a .netrc file
68 # or you can set 'username' and 'password' there
69 # the tests also select a specific format so that the same video is downloaded
70 # regardless of whether the user is premium or not (needs testing on a premium account)
72 'url': 'https://www.animelab.com/player/fullmetal-alchemist-brotherhood-episode-42',
73 'md5': '05bde4b91a5d1ff46ef5b94df05b0f7f',
77 'display_id': 'fullmetal-alchemist-brotherhood-episode-42',
78 'title': 'Fullmetal Alchemist: Brotherhood - Episode 42 - Signs of a Counteroffensive',
79 'description': 'md5:103eb61dd0a56d3dfc5dbf748e5e83f4',
80 'series': 'Fullmetal Alchemist: Brotherhood',
81 'episode': 'Signs of a Counteroffensive',
89 'format': '[format_id=21711_yeshardsubbed_ja-JP][height=480]',
91 'skip': 'All AnimeLab content requires authentication',
94 def _real_extract(self
, url
):
95 display_id
= self
._match
_id
(url
)
97 # unfortunately we can get different URLs for the same formats
98 # e.g. if we are using a "free" account so no dubs available
99 # (so _remove_duplicate_formats is not effective)
100 # so we use a dictionary as a workaround
102 for language_option_url
in ('https://www.animelab.com/player/%s/subtitles',
103 'https://www.animelab.com/player/%s/dubbed'):
104 actual_url
= language_option_url
% display_id
105 webpage
= self
._download
_webpage
(actual_url
, display_id
, 'Downloading URL ' + actual_url
)
107 video_collection
= self
._parse
_json
(self
._search
_regex
(r
'new\s+?AnimeLabApp\.VideoCollection\s*?\((.*?)\);', webpage
, 'AnimeLab VideoCollection'), display_id
)
108 position
= int_or_none(self
._search
_regex
(r
'playlistPosition\s*?=\s*?(\d+)', webpage
, 'Playlist Position'))
110 raw_data
= video_collection
[position
]['videoEntry']
112 video_id
= str_or_none(raw_data
['id'])
114 # create a title from many sources (while grabbing other info)
115 # TODO use more fallback sources to get some of these
116 series
= raw_data
.get('showTitle')
117 video_type
= raw_data
.get('videoEntryType', {}).get('name')
118 episode_number
= raw_data
.get('episodeNumber')
119 episode_name
= raw_data
.get('name')
121 title_parts
= (series
, video_type
, episode_number
, episode_name
)
122 if None not in title_parts
:
123 title
= '%s - %s %s - %s' % title_parts
127 description
= raw_data
.get('synopsis') or self
._og
_search
_description
(webpage
, default
=None)
129 duration
= int_or_none(raw_data
.get('duration'))
131 thumbnail_data
= raw_data
.get('images', [])
133 for thumbnail
in thumbnail_data
:
134 for instance
in thumbnail
['imageInstances']:
135 image_data
= instance
.get('imageInfo', {})
137 'id': str_or_none(image_data
.get('id')),
138 'url': image_data
.get('fullPath'),
139 'width': image_data
.get('width'),
140 'height': image_data
.get('height'),
143 season_data
= raw_data
.get('season', {}) or {}
144 season
= str_or_none(season_data
.get('name'))
145 season_number
= int_or_none(season_data
.get('seasonNumber'))
146 season_id
= str_or_none(season_data
.get('id'))
148 for video_data
in raw_data
['videoList']:
149 current_video_list
= {}
150 current_video_list
['language'] = video_data
.get('language', {}).get('languageCode')
152 is_hardsubbed
= video_data
.get('hardSubbed')
154 for video_instance
in video_data
['videoInstances']:
155 httpurl
= video_instance
.get('httpUrl')
156 url
= httpurl
if httpurl
else video_instance
.get('rtmpUrl')
158 # this video format is unavailable to the user (not premium etc.)
161 current_format
= current_video_list
.copy()
165 format_id_parts
.append(str_or_none(video_instance
.get('id')))
167 if is_hardsubbed
is not None:
169 format_id_parts
.append('yeshardsubbed')
171 format_id_parts
.append('nothardsubbed')
173 format_id_parts
.append(current_format
['language'])
175 format_id
= '_'.join([x
for x
in format_id_parts
if x
is not None])
177 ext
= determine_ext(url
)
179 for format_
in self
._extract
_m
3u8_formats
(
180 url
, video_id
, m3u8_id
=format_id
, fatal
=False):
181 formats
[format_
['format_id']] = format_
184 for format_
in self
._extract
_mpd
_formats
(
185 url
, video_id
, mpd_id
=format_id
, fatal
=False):
186 formats
[format_
['format_id']] = format_
189 current_format
['url'] = url
190 quality_data
= video_instance
.get('videoQuality')
192 quality
= quality_data
.get('name') or quality_data
.get('description')
198 height
= int_or_none(self
._search
_regex
(r
'(\d+)p?$', quality
, 'Video format height', default
=None))
201 self
.report_warning('Could not get height of video')
203 current_format
['height'] = height
204 current_format
['format_id'] = format_id
206 formats
[current_format
['format_id']] = current_format
208 formats
= list(formats
.values())
209 self
._sort
_formats
(formats
)
213 'display_id': display_id
,
215 'description': description
,
217 'episode': episode_name
,
218 'episode_number': int_or_none(episode_number
),
219 'thumbnails': thumbnails
,
220 'duration': duration
,
223 'season_number': season_number
,
224 'season_id': season_id
,
228 class AnimeLabShowsIE(AnimeLabBaseIE
):
229 _VALID_URL
= r
'https?://(?:www\.)?animelab\.com/shows/(?P<id>[^/]+)'
232 'url': 'https://www.animelab.com/shows/attack-on-titan',
235 'title': 'Attack on Titan',
236 'description': 'md5:989d95a2677e9309368d5cf39ba91469',
238 'playlist_count': 59,
239 'skip': 'All AnimeLab content requires authentication',
242 def _real_extract(self
, url
):
243 _BASE_URL
= 'http://www.animelab.com'
244 _SHOWS_API_URL
= '/api/videoentries/show/videos/'
245 display_id
= self
._match
_id
(url
)
247 webpage
= self
._download
_webpage
(url
, display_id
, 'Downloading requested URL')
249 show_data_str
= self
._search
_regex
(r
'({"id":.*}),\svideoEntry', webpage
, 'AnimeLab show data')
250 show_data
= self
._parse
_json
(show_data_str
, display_id
)
252 show_id
= str_or_none(show_data
.get('id'))
253 title
= show_data
.get('name')
254 description
= show_data
.get('shortSynopsis') or show_data
.get('longSynopsis')
257 for season
in show_data
['seasons']:
258 season_id
= season
['id']
259 get_data
= urlencode_postdata({
260 'seasonId': season_id
,
263 # despite using urlencode_postdata, we are sending a GET request
264 target_url
= _BASE_URL
+ _SHOWS_API_URL
+ show_id
+ "?" + get_data
.decode('utf-8')
265 response
= self
._download
_webpage
(
267 None, 'Season id %s' % season_id
)
269 season_data
= self
._parse
_json
(response
, display_id
)
271 for video_data
in season_data
['list']:
272 entries
.append(self
.url_result(
273 _BASE_URL
+ '/player/' + video_data
['slug'], 'AnimeLab',
274 str_or_none(video_data
.get('id')), video_data
.get('name')
281 'description': description
,
285 # TODO implement myqueue