]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/animelab.py
Return an error code if update fails
[yt-dlp.git] / yt_dlp / extractor / animelab.py
1 from .common import InfoExtractor
2
3 from ..utils import (
4 ExtractorError,
5 urlencode_postdata,
6 int_or_none,
7 str_or_none,
8 determine_ext,
9 )
10
11 from ..compat import compat_HTTPError
12
13
14 class AnimeLabBaseIE(InfoExtractor):
15 _LOGIN_URL = 'https://www.animelab.com/login'
16 _NETRC_MACHINE = 'animelab'
17 _LOGGED_IN = False
18
19 def _is_logged_in(self, login_page=None):
20 if not self._LOGGED_IN:
21 if not login_page:
22 login_page = self._download_webpage(self._LOGIN_URL, None, 'Downloading login page')
23 AnimeLabBaseIE._LOGGED_IN = 'Sign In' not in login_page
24 return self._LOGGED_IN
25
26 def _perform_login(self, username, password):
27 if self._is_logged_in():
28 return
29
30 login_form = {
31 'email': username,
32 'password': password,
33 }
34
35 try:
36 response = self._download_webpage(
37 self._LOGIN_URL, None, 'Logging in', 'Wrong login info',
38 data=urlencode_postdata(login_form),
39 headers={'Content-Type': 'application/x-www-form-urlencoded'})
40 except ExtractorError as e:
41 if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
42 raise ExtractorError('Unable to log in (wrong credentials?)', expected=True)
43 raise
44
45 if not self._is_logged_in(response):
46 raise ExtractorError('Unable to login (cannot verify if logged in)')
47
48 def _real_initialize(self):
49 if not self._is_logged_in():
50 self.raise_login_required('Login is required to access any AnimeLab content')
51
52
53 class AnimeLabIE(AnimeLabBaseIE):
54 _VALID_URL = r'https?://(?:www\.)?animelab\.com/player/(?P<id>[^/]+)'
55
56 _TEST = {
57 'url': 'https://www.animelab.com/player/fullmetal-alchemist-brotherhood-episode-42',
58 'md5': '05bde4b91a5d1ff46ef5b94df05b0f7f',
59 'info_dict': {
60 'id': '383',
61 'ext': 'mp4',
62 'display_id': 'fullmetal-alchemist-brotherhood-episode-42',
63 'title': 'Fullmetal Alchemist: Brotherhood - Episode 42 - Signs of a Counteroffensive',
64 'description': 'md5:103eb61dd0a56d3dfc5dbf748e5e83f4',
65 'series': 'Fullmetal Alchemist: Brotherhood',
66 'episode': 'Signs of a Counteroffensive',
67 'episode_number': 42,
68 'duration': 1469,
69 'season': 'Season 1',
70 'season_number': 1,
71 'season_id': '38',
72 },
73 'params': {
74 # Ensure the same video is downloaded whether the user is premium or not
75 'format': '[format_id=21711_yeshardsubbed_ja-JP][height=480]',
76 },
77 }
78
79 def _real_extract(self, url):
80 display_id = self._match_id(url)
81
82 # unfortunately we can get different URLs for the same formats
83 # e.g. if we are using a "free" account so no dubs available
84 # (so _remove_duplicate_formats is not effective)
85 # so we use a dictionary as a workaround
86 formats = {}
87 for language_option_url in ('https://www.animelab.com/player/%s/subtitles',
88 'https://www.animelab.com/player/%s/dubbed'):
89 actual_url = language_option_url % display_id
90 webpage = self._download_webpage(actual_url, display_id, 'Downloading URL ' + actual_url)
91
92 video_collection = self._parse_json(self._search_regex(r'new\s+?AnimeLabApp\.VideoCollection\s*?\((.*?)\);', webpage, 'AnimeLab VideoCollection'), display_id)
93 position = int_or_none(self._search_regex(r'playlistPosition\s*?=\s*?(\d+)', webpage, 'Playlist Position'))
94
95 raw_data = video_collection[position]['videoEntry']
96
97 video_id = str_or_none(raw_data['id'])
98
99 # create a title from many sources (while grabbing other info)
100 # TODO use more fallback sources to get some of these
101 series = raw_data.get('showTitle')
102 video_type = raw_data.get('videoEntryType', {}).get('name')
103 episode_number = raw_data.get('episodeNumber')
104 episode_name = raw_data.get('name')
105
106 title_parts = (series, video_type, episode_number, episode_name)
107 if None not in title_parts:
108 title = '%s - %s %s - %s' % title_parts
109 else:
110 title = episode_name
111
112 description = raw_data.get('synopsis') or self._og_search_description(webpage, default=None)
113
114 duration = int_or_none(raw_data.get('duration'))
115
116 thumbnail_data = raw_data.get('images', [])
117 thumbnails = []
118 for thumbnail in thumbnail_data:
119 for instance in thumbnail['imageInstances']:
120 image_data = instance.get('imageInfo', {})
121 thumbnails.append({
122 'id': str_or_none(image_data.get('id')),
123 'url': image_data.get('fullPath'),
124 'width': image_data.get('width'),
125 'height': image_data.get('height'),
126 })
127
128 season_data = raw_data.get('season', {}) or {}
129 season = str_or_none(season_data.get('name'))
130 season_number = int_or_none(season_data.get('seasonNumber'))
131 season_id = str_or_none(season_data.get('id'))
132
133 for video_data in raw_data['videoList']:
134 current_video_list = {}
135 current_video_list['language'] = video_data.get('language', {}).get('languageCode')
136
137 is_hardsubbed = video_data.get('hardSubbed')
138
139 for video_instance in video_data['videoInstances']:
140 httpurl = video_instance.get('httpUrl')
141 url = httpurl if httpurl else video_instance.get('rtmpUrl')
142 if url is None:
143 # this video format is unavailable to the user (not premium etc.)
144 continue
145
146 current_format = current_video_list.copy()
147
148 format_id_parts = []
149
150 format_id_parts.append(str_or_none(video_instance.get('id')))
151
152 if is_hardsubbed is not None:
153 if is_hardsubbed:
154 format_id_parts.append('yeshardsubbed')
155 else:
156 format_id_parts.append('nothardsubbed')
157
158 format_id_parts.append(current_format['language'])
159
160 format_id = '_'.join([x for x in format_id_parts if x is not None])
161
162 ext = determine_ext(url)
163 if ext == 'm3u8':
164 for format_ in self._extract_m3u8_formats(
165 url, video_id, m3u8_id=format_id, fatal=False):
166 formats[format_['format_id']] = format_
167 continue
168 elif ext == 'mpd':
169 for format_ in self._extract_mpd_formats(
170 url, video_id, mpd_id=format_id, fatal=False):
171 formats[format_['format_id']] = format_
172 continue
173
174 current_format['url'] = url
175 quality_data = video_instance.get('videoQuality')
176 if quality_data:
177 quality = quality_data.get('name') or quality_data.get('description')
178 else:
179 quality = None
180
181 height = None
182 if quality:
183 height = int_or_none(self._search_regex(r'(\d+)p?$', quality, 'Video format height', default=None))
184
185 if height is None:
186 self.report_warning('Could not get height of video')
187 else:
188 current_format['height'] = height
189 current_format['format_id'] = format_id
190
191 formats[current_format['format_id']] = current_format
192
193 formats = list(formats.values())
194 self._sort_formats(formats)
195
196 return {
197 'id': video_id,
198 'display_id': display_id,
199 'title': title,
200 'description': description,
201 'series': series,
202 'episode': episode_name,
203 'episode_number': int_or_none(episode_number),
204 'thumbnails': thumbnails,
205 'duration': duration,
206 'formats': formats,
207 'season': season,
208 'season_number': season_number,
209 'season_id': season_id,
210 }
211
212
213 class AnimeLabShowsIE(AnimeLabBaseIE):
214 _VALID_URL = r'https?://(?:www\.)?animelab\.com/shows/(?P<id>[^/]+)'
215
216 _TEST = {
217 'url': 'https://www.animelab.com/shows/attack-on-titan',
218 'info_dict': {
219 'id': '45',
220 'title': 'Attack on Titan',
221 'description': 'md5:989d95a2677e9309368d5cf39ba91469',
222 },
223 'playlist_count': 59,
224 'skip': 'All AnimeLab content requires authentication',
225 }
226
227 def _real_extract(self, url):
228 _BASE_URL = 'http://www.animelab.com'
229 _SHOWS_API_URL = '/api/videoentries/show/videos/'
230 display_id = self._match_id(url)
231
232 webpage = self._download_webpage(url, display_id, 'Downloading requested URL')
233
234 show_data_str = self._search_regex(r'({"id":.*}),\svideoEntry', webpage, 'AnimeLab show data')
235 show_data = self._parse_json(show_data_str, display_id)
236
237 show_id = str_or_none(show_data.get('id'))
238 title = show_data.get('name')
239 description = show_data.get('shortSynopsis') or show_data.get('longSynopsis')
240
241 entries = []
242 for season in show_data['seasons']:
243 season_id = season['id']
244 get_data = urlencode_postdata({
245 'seasonId': season_id,
246 'limit': 1000,
247 })
248 # despite using urlencode_postdata, we are sending a GET request
249 target_url = _BASE_URL + _SHOWS_API_URL + show_id + "?" + get_data.decode('utf-8')
250 response = self._download_webpage(
251 target_url,
252 None, 'Season id %s' % season_id)
253
254 season_data = self._parse_json(response, display_id)
255
256 for video_data in season_data['list']:
257 entries.append(self.url_result(
258 _BASE_URL + '/player/' + video_data['slug'], 'AnimeLab',
259 str_or_none(video_data.get('id')), video_data.get('name')
260 ))
261
262 return {
263 '_type': 'playlist',
264 'id': show_id,
265 'title': title,
266 'description': description,
267 'entries': entries,
268 }
269
270 # TODO implement myqueue