]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/animeondemand.py
[youtube] Prefer UTC upload date for videos (#2223)
[yt-dlp.git] / yt_dlp / extractor / animeondemand.py
CommitLineData
e2bd68c9
S
1from __future__ import unicode_literals
2
3import re
4
5from .common import InfoExtractor
2f483758 6from ..compat import compat_str
e2bd68c9
S
7from ..utils import (
8 determine_ext,
3c5d183c 9 extract_attributes,
e2bd68c9 10 ExtractorError,
34921b43 11 join_nonempty,
3052a30d 12 url_or_none,
e2bd68c9 13 urlencode_postdata,
2f483758 14 urljoin,
e2bd68c9
S
15)
16
17
18class AnimeOnDemandIE(InfoExtractor):
19 _VALID_URL = r'https?://(?:www\.)?anime-on-demand\.de/anime/(?P<id>\d+)'
20 _LOGIN_URL = 'https://www.anime-on-demand.de/users/sign_in'
21 _APPLY_HTML5_URL = 'https://www.anime-on-demand.de/html5apply'
b4561e85 22 _NETRC_MACHINE = 'animeondemand'
018cc615
S
23 # German-speaking countries of Europe
24 _GEO_COUNTRIES = ['AT', 'CH', 'DE', 'LI', 'LU']
b57fecfd 25 _TESTS = [{
1f7258a3 26 # jap, OmU
e2bd68c9
S
27 'url': 'https://www.anime-on-demand.de/anime/161',
28 'info_dict': {
29 'id': '161',
30 'title': 'Grimgar, Ashes and Illusions (OmU)',
31 'description': 'md5:6681ce3c07c7189d255ac6ab23812d31',
32 },
33 'playlist_mincount': 4,
b57fecfd 34 }, {
1f7258a3 35 # Film wording is used instead of Episode, ger/jap, Dub/OmU
b57fecfd
S
36 'url': 'https://www.anime-on-demand.de/anime/39',
37 'only_matching': True,
85e8f26b 38 }, {
1f7258a3 39 # Episodes without titles, jap, OmU
85e8f26b
S
40 'url': 'https://www.anime-on-demand.de/anime/162',
41 'only_matching': True,
3c5d183c
S
42 }, {
43 # ger/jap, Dub/OmU, account required
44 'url': 'https://www.anime-on-demand.de/anime/169',
45 'only_matching': True,
1f7258a3
S
46 }, {
47 # Full length film, non-series, ger/jap, Dub/OmU, account required
48 'url': 'https://www.anime-on-demand.de/anime/185',
49 'only_matching': True,
2709d9fa
S
50 }, {
51 # Flash videos
52 'url': 'https://www.anime-on-demand.de/anime/12',
53 'only_matching': True,
b57fecfd 54 }]
e2bd68c9
S
55
56 def _login(self):
68217024 57 username, password = self._get_login_info()
e2bd68c9
S
58 if username is None:
59 return
60
61 login_page = self._download_webpage(
62 self._LOGIN_URL, None, 'Downloading login page')
63
3e8bb9a9
S
64 if '>Our licensing terms allow the distribution of animes only to German-speaking countries of Europe' in login_page:
65 self.raise_geo_restricted(
66 '%s is only available in German-speaking countries of Europe' % self.IE_NAME)
67
e2bd68c9
S
68 login_form = self._form_hidden_inputs('new_user', login_page)
69
70 login_form.update({
71 'user[login]': username,
72 'user[password]': password,
73 })
74
75 post_url = self._search_regex(
76 r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
77 'post url', default=self._LOGIN_URL, group='url')
78
79 if not post_url.startswith('http'):
2f483758 80 post_url = urljoin(self._LOGIN_URL, post_url)
e2bd68c9
S
81
82 response = self._download_webpage(
e4d95865 83 post_url, None, 'Logging in',
2f483758
S
84 data=urlencode_postdata(login_form), headers={
85 'Referer': self._LOGIN_URL,
86 })
e2bd68c9
S
87
88 if all(p not in response for p in ('>Logout<', 'href="/users/sign_out"')):
89 error = self._search_regex(
d2d766bc 90 r'<p[^>]+\bclass=(["\'])(?:(?!\1).)*\balert\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</p>',
17c3aced 91 response, 'error', default=None, group='error')
e2bd68c9
S
92 if error:
93 raise ExtractorError('Unable to login: %s' % error, expected=True)
94 raise ExtractorError('Unable to log in')
95
96 def _real_initialize(self):
97 self._login()
98
99 def _real_extract(self, url):
100 anime_id = self._match_id(url)
101
102 webpage = self._download_webpage(url, anime_id)
103
104 if 'data-playlist=' not in webpage:
105 self._download_webpage(
106 self._APPLY_HTML5_URL, anime_id,
107 'Activating HTML5 beta', 'Unable to apply HTML5 beta')
108 webpage = self._download_webpage(url, anime_id)
109
110 csrf_token = self._html_search_meta(
111 'csrf-token', webpage, 'csrf token', fatal=True)
112
113 anime_title = self._html_search_regex(
114 r'(?s)<h1[^>]+itemprop="name"[^>]*>(.+?)</h1>',
115 webpage, 'anime name')
116 anime_description = self._html_search_regex(
117 r'(?s)<div[^>]+itemprop="description"[^>]*>(.+?)</div>',
118 webpage, 'anime description', default=None)
119
9e4f5dc1 120 def extract_info(html, video_id, num=None):
1f7258a3 121 title, description = [None] * 2
e2bd68c9
S
122 formats = []
123
3c5d183c 124 for input_ in re.findall(
2709d9fa 125 r'<input[^>]+class=["\'].*?streamstarter[^>]+>', html):
3c5d183c 126 attributes = extract_attributes(input_)
2709d9fa 127 title = attributes.get('data-dialog-header')
3c5d183c 128 playlist_urls = []
2709d9fa 129 for playlist_key in ('data-playlist', 'data-otherplaylist', 'data-stream'):
3c5d183c
S
130 playlist_url = attributes.get(playlist_key)
131 if isinstance(playlist_url, compat_str) and re.match(
132 r'/?[\da-zA-Z]+', playlist_url):
133 playlist_urls.append(attributes[playlist_key])
134 if not playlist_urls:
135 continue
136
137 lang = attributes.get('data-lang')
138 lang_note = attributes.get('value')
139
140 for playlist_url in playlist_urls:
141 kind = self._search_regex(
142 r'videomaterialurl/\d+/([^/]+)/',
143 playlist_url, 'media kind', default=None)
34921b43 144 format_id = join_nonempty(lang, kind) if lang or kind else str(num)
145 format_note = join_nonempty(kind, lang_note, delim=', ')
2f483758
S
146 item_id_list = []
147 if format_id:
148 item_id_list.append(format_id)
149 item_id_list.append('videomaterial')
150 playlist = self._download_json(
151 urljoin(url, playlist_url), video_id,
152 'Downloading %s JSON' % ' '.join(item_id_list),
3c5d183c
S
153 headers={
154 'X-Requested-With': 'XMLHttpRequest',
155 'X-CSRF-Token': csrf_token,
156 'Referer': url,
157 'Accept': 'application/json, text/javascript, */*; q=0.01',
2f483758 158 }, fatal=False)
3c5d183c
S
159 if not playlist:
160 continue
3052a30d 161 stream_url = url_or_none(playlist.get('streamurl'))
2709d9fa
S
162 if stream_url:
163 rtmp = re.search(
164 r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+/))(?P<playpath>mp[34]:.+)',
165 stream_url)
166 if rtmp:
167 formats.append({
168 'url': rtmp.group('url'),
169 'app': rtmp.group('app'),
170 'play_path': rtmp.group('playpath'),
171 'page_url': url,
172 'player_url': 'https://www.anime-on-demand.de/assets/jwplayer.flash-55abfb34080700304d49125ce9ffb4a6.swf',
173 'rtmp_real_time': True,
174 'format_id': 'rtmp',
175 'ext': 'flv',
176 })
177 continue
5c69f7a4 178 start_video = playlist.get('startvideo', 0)
3c5d183c
S
179 playlist = playlist.get('playlist')
180 if not playlist or not isinstance(playlist, list):
181 continue
5c69f7a4 182 playlist = playlist[start_video]
3c5d183c
S
183 title = playlist.get('title')
184 if not title:
185 continue
e2bd68c9
S
186 description = playlist.get('description')
187 for source in playlist.get('sources', []):
188 file_ = source.get('file')
5c69f7a4
S
189 if not file_:
190 continue
191 ext = determine_ext(file_)
34921b43 192 format_id = join_nonempty(
193 lang, kind,
194 'hls' if ext == 'm3u8' else None,
195 'dash' if source.get('type') == 'video/dash' or ext == 'mpd' else None)
5c69f7a4
S
196 if ext == 'm3u8':
197 file_formats = self._extract_m3u8_formats(
e2bd68c9 198 file_, video_id, 'mp4',
5c69f7a4
S
199 entry_protocol='m3u8_native', m3u8_id=format_id, fatal=False)
200 elif source.get('type') == 'video/dash' or ext == 'mpd':
bc5d16b3 201 continue
5c69f7a4
S
202 file_formats = self._extract_mpd_formats(
203 file_, video_id, mpd_id=format_id, fatal=False)
204 else:
205 continue
206 for f in file_formats:
207 f.update({
208 'language': lang,
209 'format_note': format_note,
210 })
211 formats.extend(file_formats)
e2bd68c9 212
1f7258a3
S
213 return {
214 'title': title,
215 'description': description,
216 'formats': formats,
217 }
218
ab52bb51 219 def extract_entries(html, video_id, common_info, num=None):
9e4f5dc1 220 info = extract_info(html, video_id, num)
1f7258a3
S
221
222 if info['formats']:
223 self._sort_formats(info['formats'])
e2bd68c9 224 f = common_info.copy()
1f7258a3 225 f.update(info)
30a074c2 226 yield f
e2bd68c9 227
1f7258a3
S
228 # Extract teaser/trailer only when full episode is not available
229 if not info['formats']:
85c637b7 230 m = re.search(
1f7258a3
S
231 r'data-dialog-header=(["\'])(?P<title>.+?)\1[^>]+href=(["\'])(?P<href>.+?)\3[^>]*>(?P<kind>Teaser|Trailer)<',
232 html)
85c637b7
S
233 if m:
234 f = common_info.copy()
235 f.update({
1f7258a3 236 'id': '%s-%s' % (f['id'], m.group('kind').lower()),
85c637b7 237 'title': m.group('title'),
2f483758 238 'url': urljoin(url, m.group('href')),
85c637b7 239 })
30a074c2 240 yield f
e2bd68c9 241
1f7258a3
S
242 def extract_episodes(html):
243 for num, episode_html in enumerate(re.findall(
244 r'(?s)<h3[^>]+class="episodebox-title".+?>Episodeninhalt<', html), 1):
245 episodebox_title = self._search_regex(
246 (r'class="episodebox-title"[^>]+title=(["\'])(?P<title>.+?)\1',
247 r'class="episodebox-title"[^>]+>(?P<title>.+?)<'),
248 episode_html, 'episodebox title', default=None, group='title')
249 if not episodebox_title:
250 continue
251
252 episode_number = int(self._search_regex(
253 r'(?:Episode|Film)\s*(\d+)',
254 episodebox_title, 'episode number', default=num))
255 episode_title = self._search_regex(
256 r'(?:Episode|Film)\s*\d+\s*-\s*(.+)',
257 episodebox_title, 'episode title', default=None)
258
259 video_id = 'episode-%d' % episode_number
260
261 common_info = {
262 'id': video_id,
263 'series': anime_title,
264 'episode': episode_title,
265 'episode_number': episode_number,
266 }
267
30a074c2 268 for e in extract_entries(episode_html, video_id, common_info):
269 yield e
1f7258a3
S
270
271 def extract_film(html, video_id):
272 common_info = {
273 'id': anime_id,
274 'title': anime_title,
275 'description': anime_description,
276 }
30a074c2 277 for e in extract_entries(html, video_id, common_info):
278 yield e
1f7258a3 279
30a074c2 280 def entries():
281 has_episodes = False
282 for e in extract_episodes(webpage):
283 has_episodes = True
284 yield e
1f7258a3 285
30a074c2 286 if not has_episodes:
287 for e in extract_film(webpage, anime_id):
288 yield e
1f7258a3 289
30a074c2 290 return self.playlist_result(
291 entries(), anime_id, anime_title, anime_description)