]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/crunchyroll.py
[ie/crunchyroll] Fix extractor (#9615)
[yt-dlp.git] / yt_dlp / extractor / crunchyroll.py
1 import base64
2 import uuid
3
4 from .common import InfoExtractor
5 from ..networking.exceptions import HTTPError
6 from ..utils import (
7 ExtractorError,
8 float_or_none,
9 format_field,
10 int_or_none,
11 jwt_decode_hs256,
12 parse_age_limit,
13 parse_count,
14 parse_iso8601,
15 qualities,
16 time_seconds,
17 traverse_obj,
18 url_or_none,
19 urlencode_postdata,
20 )
21
22
23 class CrunchyrollBaseIE(InfoExtractor):
24 _BASE_URL = 'https://www.crunchyroll.com'
25 _API_BASE = 'https://api.crunchyroll.com'
26 _NETRC_MACHINE = 'crunchyroll'
27 _AUTH_HEADERS = None
28 _API_ENDPOINT = None
29 _BASIC_AUTH = None
30 _IS_PREMIUM = None
31 _CLIENT_ID = ('cr_web', 'noaihdevm_6iyg0a8l0q')
32 _LOCALE_LOOKUP = {
33 'ar': 'ar-SA',
34 'de': 'de-DE',
35 '': 'en-US',
36 'es': 'es-419',
37 'es-es': 'es-ES',
38 'fr': 'fr-FR',
39 'it': 'it-IT',
40 'pt-br': 'pt-BR',
41 'pt-pt': 'pt-PT',
42 'ru': 'ru-RU',
43 'hi': 'hi-IN',
44 }
45
46 @property
47 def is_logged_in(self):
48 return bool(self._get_cookies(self._BASE_URL).get('etp_rt'))
49
50 def _perform_login(self, username, password):
51 if self.is_logged_in:
52 return
53
54 upsell_response = self._download_json(
55 f'{self._API_BASE}/get_upsell_data.0.json', None, 'Getting session id',
56 query={
57 'sess_id': 1,
58 'device_id': 'whatvalueshouldbeforweb',
59 'device_type': 'com.crunchyroll.static',
60 'access_token': 'giKq5eY27ny3cqz',
61 'referer': f'{self._BASE_URL}/welcome/login'
62 })
63 if upsell_response['code'] != 'ok':
64 raise ExtractorError('Could not get session id')
65 session_id = upsell_response['data']['session_id']
66
67 login_response = self._download_json(
68 f'{self._API_BASE}/login.1.json', None, 'Logging in',
69 data=urlencode_postdata({
70 'account': username,
71 'password': password,
72 'session_id': session_id
73 }))
74 if login_response['code'] != 'ok':
75 raise ExtractorError('Login failed. Server message: %s' % login_response['message'], expected=True)
76 if not self.is_logged_in:
77 raise ExtractorError('Login succeeded but did not set etp_rt cookie')
78
79 def _update_auth(self):
80 if CrunchyrollBaseIE._AUTH_HEADERS and CrunchyrollBaseIE._AUTH_REFRESH > time_seconds():
81 return
82
83 if not CrunchyrollBaseIE._BASIC_AUTH:
84 cx_api_param = self._CLIENT_ID[self.is_logged_in]
85 self.write_debug(f'Using cxApiParam={cx_api_param}')
86 CrunchyrollBaseIE._BASIC_AUTH = 'Basic ' + base64.b64encode(f'{cx_api_param}:'.encode()).decode()
87
88 auth_headers = {'Authorization': CrunchyrollBaseIE._BASIC_AUTH}
89 if self.is_logged_in:
90 grant_type = 'etp_rt_cookie'
91 else:
92 grant_type = 'client_id'
93 auth_headers['ETP-Anonymous-ID'] = uuid.uuid4()
94 try:
95 auth_response = self._download_json(
96 f'{self._BASE_URL}/auth/v1/token', None, note=f'Authenticating with grant_type={grant_type}',
97 headers=auth_headers, data=f'grant_type={grant_type}'.encode())
98 except ExtractorError as error:
99 if isinstance(error.cause, HTTPError) and error.cause.status == 403:
100 raise ExtractorError(
101 'Request blocked by Cloudflare; navigate to Crunchyroll in your browser, '
102 'then pass the fresh cookies (with --cookies-from-browser or --cookies) '
103 'and your browser\'s User-Agent (with --user-agent)', expected=True)
104 raise
105
106 CrunchyrollBaseIE._IS_PREMIUM = 'cr_premium' in traverse_obj(auth_response, ('access_token', {jwt_decode_hs256}, 'benefits', ...))
107 CrunchyrollBaseIE._AUTH_HEADERS = {'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token']}
108 CrunchyrollBaseIE._AUTH_REFRESH = time_seconds(seconds=traverse_obj(auth_response, ('expires_in', {float_or_none}), default=300) - 10)
109
110 def _locale_from_language(self, language):
111 config_locale = self._configuration_arg('metadata', ie_key=CrunchyrollBetaIE, casesense=True)
112 return config_locale[0] if config_locale else self._LOCALE_LOOKUP.get(language)
113
114 def _call_base_api(self, endpoint, internal_id, lang, note=None, query={}):
115 self._update_auth()
116
117 if not endpoint.startswith('/'):
118 endpoint = f'/{endpoint}'
119
120 query = query.copy()
121 locale = self._locale_from_language(lang)
122 if locale:
123 query['locale'] = locale
124
125 return self._download_json(
126 f'{self._BASE_URL}{endpoint}', internal_id, note or f'Calling API: {endpoint}',
127 headers=CrunchyrollBaseIE._AUTH_HEADERS, query=query)
128
129 def _call_api(self, path, internal_id, lang, note='api', query={}):
130 if not path.startswith(f'/content/v2/{self._API_ENDPOINT}/'):
131 path = f'/content/v2/{self._API_ENDPOINT}/{path}'
132
133 try:
134 result = self._call_base_api(
135 path, internal_id, lang, f'Downloading {note} JSON ({self._API_ENDPOINT})', query=query)
136 except ExtractorError as error:
137 if isinstance(error.cause, HTTPError) and error.cause.status == 404:
138 return None
139 raise
140
141 if not result:
142 raise ExtractorError(f'Unexpected response when downloading {note} JSON')
143 return result
144
145 def _extract_chapters(self, internal_id):
146 # if no skip events are available, a 403 xml error is returned
147 skip_events = self._download_json(
148 f'https://static.crunchyroll.com/skip-events/production/{internal_id}.json',
149 internal_id, note='Downloading chapter info', fatal=False, errnote=False)
150 if not skip_events:
151 return None
152
153 chapters = []
154 for event in ('recap', 'intro', 'credits', 'preview'):
155 start = traverse_obj(skip_events, (event, 'start', {float_or_none}))
156 end = traverse_obj(skip_events, (event, 'end', {float_or_none}))
157 # some chapters have no start and/or ending time, they will just be ignored
158 if start is None or end is None:
159 continue
160 chapters.append({'title': event.capitalize(), 'start_time': start, 'end_time': end})
161
162 return chapters
163
164 def _extract_stream(self, identifier, display_id=None):
165 if not display_id:
166 display_id = identifier
167
168 self._update_auth()
169 stream_response = self._download_json(
170 f'https://cr-play-service.prd.crunchyrollsvc.com/v1/{identifier}/console/switch/play',
171 display_id, note='Downloading stream info', headers=CrunchyrollBaseIE._AUTH_HEADERS)
172
173 available_formats = {'': ('', '', stream_response['url'])}
174 for hardsub_lang, stream in traverse_obj(stream_response, ('hardSubs', {dict.items}, lambda _, v: v[1]['url'])):
175 available_formats[hardsub_lang] = (f'hardsub-{hardsub_lang}', hardsub_lang, stream['url'])
176
177 requested_hardsubs = [('' if val == 'none' else val) for val in (self._configuration_arg('hardsub') or ['none'])]
178 hardsub_langs = [lang for lang in available_formats if lang]
179 if hardsub_langs and 'all' not in requested_hardsubs:
180 full_format_langs = set(requested_hardsubs)
181 self.to_screen(f'Available hardsub languages: {", ".join(hardsub_langs)}')
182 self.to_screen(
183 'To extract formats of a hardsub language, use '
184 '"--extractor-args crunchyrollbeta:hardsub=<language_code or all>". '
185 'See https://github.com/yt-dlp/yt-dlp#crunchyrollbeta-crunchyroll for more info',
186 only_once=True)
187 else:
188 full_format_langs = set(map(str.lower, available_formats))
189
190 audio_locale = traverse_obj(stream_response, ('audioLocale', {str}))
191 hardsub_preference = qualities(requested_hardsubs[::-1])
192 formats, subtitles = [], {}
193 for format_id, hardsub_lang, stream_url in available_formats.values():
194 if hardsub_lang.lower() in full_format_langs:
195 adaptive_formats, dash_subs = self._extract_mpd_formats_and_subtitles(
196 stream_url, display_id, mpd_id=format_id, headers=CrunchyrollBaseIE._AUTH_HEADERS,
197 fatal=False, note=f'Downloading {f"{format_id} " if hardsub_lang else ""}MPD manifest')
198 self._merge_subtitles(dash_subs, target=subtitles)
199 else:
200 continue # XXX: Update this if/when meta mpd formats are working
201 for f in adaptive_formats:
202 if f.get('acodec') != 'none':
203 f['language'] = audio_locale
204 f['quality'] = hardsub_preference(hardsub_lang.lower())
205 formats.extend(adaptive_formats)
206
207 for locale, subtitle in traverse_obj(stream_response, (('subtitles', 'captions'), {dict.items}, ...)):
208 subtitles.setdefault(locale, []).append(traverse_obj(subtitle, {'url': 'url', 'ext': 'format'}))
209
210 return formats, subtitles
211
212
213 class CrunchyrollCmsBaseIE(CrunchyrollBaseIE):
214 _API_ENDPOINT = 'cms'
215 _CMS_EXPIRY = None
216
217 def _call_cms_api_signed(self, path, internal_id, lang, note='api'):
218 if not CrunchyrollCmsBaseIE._CMS_EXPIRY or CrunchyrollCmsBaseIE._CMS_EXPIRY <= time_seconds():
219 response = self._call_base_api('index/v2', None, lang, 'Retrieving signed policy')['cms_web']
220 CrunchyrollCmsBaseIE._CMS_QUERY = {
221 'Policy': response['policy'],
222 'Signature': response['signature'],
223 'Key-Pair-Id': response['key_pair_id'],
224 }
225 CrunchyrollCmsBaseIE._CMS_BUCKET = response['bucket']
226 CrunchyrollCmsBaseIE._CMS_EXPIRY = parse_iso8601(response['expires']) - 10
227
228 if not path.startswith('/cms/v2'):
229 path = f'/cms/v2{CrunchyrollCmsBaseIE._CMS_BUCKET}/{path}'
230
231 return self._call_base_api(
232 path, internal_id, lang, f'Downloading {note} JSON (signed cms)', query=CrunchyrollCmsBaseIE._CMS_QUERY)
233
234
235 class CrunchyrollBetaIE(CrunchyrollCmsBaseIE):
236 IE_NAME = 'crunchyroll'
237 _VALID_URL = r'''(?x)
238 https?://(?:beta\.|www\.)?crunchyroll\.com/
239 (?:(?P<lang>\w{2}(?:-\w{2})?)/)?
240 watch/(?!concert|musicvideo)(?P<id>\w+)'''
241 _TESTS = [{
242 # Premium only
243 'url': 'https://www.crunchyroll.com/watch/GY2P1Q98Y/to-the-future',
244 'info_dict': {
245 'id': 'GY2P1Q98Y',
246 'ext': 'mp4',
247 'duration': 1380.241,
248 'timestamp': 1459632600,
249 'description': 'md5:a022fbec4fbb023d43631032c91ed64b',
250 'title': 'World Trigger Episode 73 – To the Future',
251 'upload_date': '20160402',
252 'series': 'World Trigger',
253 'series_id': 'GR757DMKY',
254 'season': 'World Trigger',
255 'season_id': 'GR9P39NJ6',
256 'season_number': 1,
257 'episode': 'To the Future',
258 'episode_number': 73,
259 'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
260 'chapters': 'count:2',
261 'age_limit': 14,
262 'like_count': int,
263 'dislike_count': int,
264 },
265 'params': {
266 'skip_download': 'm3u8',
267 'extractor_args': {'crunchyrollbeta': {'hardsub': ['de-DE']}},
268 'format': 'bv[format_id~=hardsub]',
269 },
270 }, {
271 # Premium only
272 'url': 'https://www.crunchyroll.com/watch/GYE5WKQGR',
273 'info_dict': {
274 'id': 'GYE5WKQGR',
275 'ext': 'mp4',
276 'duration': 366.459,
277 'timestamp': 1476788400,
278 'description': 'md5:74b67283ffddd75f6e224ca7dc031e76',
279 'title': 'SHELTER – Porter Robinson presents Shelter the Animation',
280 'upload_date': '20161018',
281 'series': 'SHELTER',
282 'series_id': 'GYGG09WWY',
283 'season': 'SHELTER',
284 'season_id': 'GR09MGK4R',
285 'season_number': 1,
286 'episode': 'Porter Robinson presents Shelter the Animation',
287 'episode_number': 0,
288 'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
289 'age_limit': 14,
290 'like_count': int,
291 'dislike_count': int,
292 },
293 'params': {'skip_download': True},
294 }, {
295 'url': 'https://www.crunchyroll.com/watch/GJWU2VKK3/cherry-blossom-meeting-and-a-coming-blizzard',
296 'info_dict': {
297 'id': 'GJWU2VKK3',
298 'ext': 'mp4',
299 'duration': 1420.054,
300 'description': 'md5:2d1c67c0ec6ae514d9c30b0b99a625cd',
301 'title': 'The Ice Guy and His Cool Female Colleague Episode 1 – Cherry Blossom Meeting and a Coming Blizzard',
302 'series': 'The Ice Guy and His Cool Female Colleague',
303 'series_id': 'GW4HM75NP',
304 'season': 'The Ice Guy and His Cool Female Colleague',
305 'season_id': 'GY9PC21VE',
306 'season_number': 1,
307 'episode': 'Cherry Blossom Meeting and a Coming Blizzard',
308 'episode_number': 1,
309 'chapters': 'count:2',
310 'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
311 'timestamp': 1672839000,
312 'upload_date': '20230104',
313 'age_limit': 14,
314 'like_count': int,
315 'dislike_count': int,
316 },
317 'params': {'skip_download': 'm3u8'},
318 }, {
319 'url': 'https://www.crunchyroll.com/watch/GM8F313NQ',
320 'info_dict': {
321 'id': 'GM8F313NQ',
322 'ext': 'mp4',
323 'title': 'Garakowa -Restore the World-',
324 'description': 'md5:8d2f8b6b9dd77d87810882e7d2ee5608',
325 'duration': 3996.104,
326 'age_limit': 13,
327 'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
328 },
329 'params': {'skip_download': 'm3u8'},
330 'skip': 'no longer exists',
331 }, {
332 'url': 'https://www.crunchyroll.com/watch/G62PEZ2E6',
333 'info_dict': {
334 'id': 'G62PEZ2E6',
335 'description': 'md5:8d2f8b6b9dd77d87810882e7d2ee5608',
336 'age_limit': 13,
337 'duration': 65.138,
338 'title': 'Garakowa -Restore the World-',
339 },
340 'playlist_mincount': 5,
341 }, {
342 'url': 'https://www.crunchyroll.com/de/watch/GY2P1Q98Y',
343 'only_matching': True,
344 }, {
345 'url': 'https://beta.crunchyroll.com/pt-br/watch/G8WUN8VKP/the-ruler-of-conspiracy',
346 'only_matching': True,
347 }]
348 # We want to support lazy playlist filtering and movie listings cannot be inside a playlist
349 _RETURN_TYPE = 'video'
350
351 def _real_extract(self, url):
352 lang, internal_id = self._match_valid_url(url).group('lang', 'id')
353
354 # We need to use unsigned API call to allow ratings query string
355 response = traverse_obj(self._call_api(
356 f'objects/{internal_id}', internal_id, lang, 'object info', {'ratings': 'true'}), ('data', 0, {dict}))
357 if not response:
358 raise ExtractorError(f'No video with id {internal_id} could be found (possibly region locked?)', expected=True)
359
360 object_type = response.get('type')
361 if object_type == 'episode':
362 result = self._transform_episode_response(response)
363
364 elif object_type == 'movie':
365 result = self._transform_movie_response(response)
366
367 elif object_type == 'movie_listing':
368 first_movie_id = traverse_obj(response, ('movie_listing_metadata', 'first_movie_id'))
369 if not self._yes_playlist(internal_id, first_movie_id):
370 return self.url_result(f'{self._BASE_URL}/{lang}watch/{first_movie_id}', CrunchyrollBetaIE, first_movie_id)
371
372 def entries():
373 movies = self._call_api(f'movie_listings/{internal_id}/movies', internal_id, lang, 'movie list')
374 for movie_response in traverse_obj(movies, ('data', ...)):
375 yield self.url_result(
376 f'{self._BASE_URL}/{lang}watch/{movie_response["id"]}',
377 CrunchyrollBetaIE, **self._transform_movie_response(movie_response))
378
379 return self.playlist_result(entries(), **self._transform_movie_response(response))
380
381 else:
382 raise ExtractorError(f'Unknown object type {object_type}')
383
384 if not self._IS_PREMIUM and traverse_obj(response, (f'{object_type}_metadata', 'is_premium_only')):
385 message = f'This {object_type} is for premium members only'
386 if self.is_logged_in:
387 raise ExtractorError(message, expected=True)
388 self.raise_login_required(message)
389
390 result['formats'], result['subtitles'] = self._extract_stream(internal_id)
391
392 result['chapters'] = self._extract_chapters(internal_id)
393
394 def calculate_count(item):
395 return parse_count(''.join((item['displayed'], item.get('unit') or '')))
396
397 result.update(traverse_obj(response, ('rating', {
398 'like_count': ('up', {calculate_count}),
399 'dislike_count': ('down', {calculate_count}),
400 })))
401
402 return result
403
404 @staticmethod
405 def _transform_episode_response(data):
406 metadata = traverse_obj(data, (('episode_metadata', None), {dict}), get_all=False) or {}
407 return {
408 'id': data['id'],
409 'title': ' \u2013 '.join((
410 ('%s%s' % (
411 format_field(metadata, 'season_title'),
412 format_field(metadata, 'episode', ' Episode %s'))),
413 format_field(data, 'title'))),
414 **traverse_obj(data, {
415 'episode': ('title', {str}),
416 'description': ('description', {str}, {lambda x: x.replace(r'\r\n', '\n')}),
417 'thumbnails': ('images', 'thumbnail', ..., ..., {
418 'url': ('source', {url_or_none}),
419 'width': ('width', {int_or_none}),
420 'height': ('height', {int_or_none}),
421 }),
422 }),
423 **traverse_obj(metadata, {
424 'duration': ('duration_ms', {lambda x: float_or_none(x, 1000)}),
425 'timestamp': ('upload_date', {parse_iso8601}),
426 'series': ('series_title', {str}),
427 'series_id': ('series_id', {str}),
428 'season': ('season_title', {str}),
429 'season_id': ('season_id', {str}),
430 'season_number': ('season_number', ({int}, {float_or_none})),
431 'episode_number': ('sequence_number', ({int}, {float_or_none})),
432 'age_limit': ('maturity_ratings', -1, {parse_age_limit}),
433 'language': ('audio_locale', {str}),
434 }, get_all=False),
435 }
436
437 @staticmethod
438 def _transform_movie_response(data):
439 metadata = traverse_obj(data, (('movie_metadata', 'movie_listing_metadata', None), {dict}), get_all=False) or {}
440 return {
441 'id': data['id'],
442 **traverse_obj(data, {
443 'title': ('title', {str}),
444 'description': ('description', {str}, {lambda x: x.replace(r'\r\n', '\n')}),
445 'thumbnails': ('images', 'thumbnail', ..., ..., {
446 'url': ('source', {url_or_none}),
447 'width': ('width', {int_or_none}),
448 'height': ('height', {int_or_none}),
449 }),
450 }),
451 **traverse_obj(metadata, {
452 'duration': ('duration_ms', {lambda x: float_or_none(x, 1000)}),
453 'age_limit': ('maturity_ratings', -1, {parse_age_limit}),
454 }),
455 }
456
457
458 class CrunchyrollBetaShowIE(CrunchyrollCmsBaseIE):
459 IE_NAME = 'crunchyroll:playlist'
460 _VALID_URL = r'''(?x)
461 https?://(?:beta\.|www\.)?crunchyroll\.com/
462 (?P<lang>(?:\w{2}(?:-\w{2})?/)?)
463 series/(?P<id>\w+)'''
464 _TESTS = [{
465 'url': 'https://www.crunchyroll.com/series/GY19NQ2QR/Girl-Friend-BETA',
466 'info_dict': {
467 'id': 'GY19NQ2QR',
468 'title': 'Girl Friend BETA',
469 'description': 'md5:99c1b22ee30a74b536a8277ced8eb750',
470 # XXX: `thumbnail` does not get set from `thumbnails` in playlist
471 # 'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
472 'age_limit': 14,
473 },
474 'playlist_mincount': 10,
475 }, {
476 'url': 'https://beta.crunchyroll.com/it/series/GY19NQ2QR',
477 'only_matching': True,
478 }]
479
480 def _real_extract(self, url):
481 lang, internal_id = self._match_valid_url(url).group('lang', 'id')
482
483 def entries():
484 seasons_response = self._call_cms_api_signed(f'seasons?series_id={internal_id}', internal_id, lang, 'seasons')
485 for season in traverse_obj(seasons_response, ('items', ..., {dict})):
486 episodes_response = self._call_cms_api_signed(
487 f'episodes?season_id={season["id"]}', season["id"], lang, 'episode list')
488 for episode_response in traverse_obj(episodes_response, ('items', ..., {dict})):
489 yield self.url_result(
490 f'{self._BASE_URL}/{lang}watch/{episode_response["id"]}',
491 CrunchyrollBetaIE, **CrunchyrollBetaIE._transform_episode_response(episode_response))
492
493 return self.playlist_result(
494 entries(), internal_id,
495 **traverse_obj(self._call_api(f'series/{internal_id}', internal_id, lang, 'series'), ('data', 0, {
496 'title': ('title', {str}),
497 'description': ('description', {lambda x: x.replace(r'\r\n', '\n')}),
498 'age_limit': ('maturity_ratings', -1, {parse_age_limit}),
499 'thumbnails': ('images', ..., ..., ..., {
500 'url': ('source', {url_or_none}),
501 'width': ('width', {int_or_none}),
502 'height': ('height', {int_or_none}),
503 })
504 })))
505
506
507 class CrunchyrollMusicIE(CrunchyrollBaseIE):
508 IE_NAME = 'crunchyroll:music'
509 _VALID_URL = r'''(?x)
510 https?://(?:www\.)?crunchyroll\.com/
511 (?P<lang>(?:\w{2}(?:-\w{2})?/)?)
512 watch/(?P<type>concert|musicvideo)/(?P<id>\w+)'''
513 _TESTS = [{
514 'url': 'https://www.crunchyroll.com/de/watch/musicvideo/MV5B02C79',
515 'info_dict': {
516 'ext': 'mp4',
517 'id': 'MV5B02C79',
518 'display_id': 'egaono-hana',
519 'title': 'Egaono Hana',
520 'track': 'Egaono Hana',
521 'artists': ['Goose house'],
522 'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
523 'genres': ['J-Pop'],
524 },
525 'params': {'skip_download': 'm3u8'},
526 }, {
527 'url': 'https://www.crunchyroll.com/watch/musicvideo/MV88BB7F2C',
528 'info_dict': {
529 'ext': 'mp4',
530 'id': 'MV88BB7F2C',
531 'display_id': 'crossing-field',
532 'title': 'Crossing Field',
533 'track': 'Crossing Field',
534 'artists': ['LiSA'],
535 'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
536 'genres': ['Anime'],
537 },
538 'params': {'skip_download': 'm3u8'},
539 'skip': 'no longer exists',
540 }, {
541 'url': 'https://www.crunchyroll.com/watch/concert/MC2E2AC135',
542 'info_dict': {
543 'ext': 'mp4',
544 'id': 'MC2E2AC135',
545 'display_id': 'live-is-smile-always-364joker-at-yokohama-arena',
546 'title': 'LiVE is Smile Always-364+JOKER- at YOKOHAMA ARENA',
547 'track': 'LiVE is Smile Always-364+JOKER- at YOKOHAMA ARENA',
548 'artists': ['LiSA'],
549 'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
550 'description': 'md5:747444e7e6300907b7a43f0a0503072e',
551 'genres': ['J-Pop'],
552 },
553 'params': {'skip_download': 'm3u8'},
554 }, {
555 'url': 'https://www.crunchyroll.com/de/watch/musicvideo/MV5B02C79/egaono-hana',
556 'only_matching': True,
557 }, {
558 'url': 'https://www.crunchyroll.com/watch/concert/MC2E2AC135/live-is-smile-always-364joker-at-yokohama-arena',
559 'only_matching': True,
560 }, {
561 'url': 'https://www.crunchyroll.com/watch/musicvideo/MV88BB7F2C/crossing-field',
562 'only_matching': True,
563 }]
564 _API_ENDPOINT = 'music'
565
566 def _real_extract(self, url):
567 lang, internal_id, object_type = self._match_valid_url(url).group('lang', 'id', 'type')
568 path, name = {
569 'concert': ('concerts', 'concert info'),
570 'musicvideo': ('music_videos', 'music video info'),
571 }[object_type]
572 response = traverse_obj(self._call_api(f'{path}/{internal_id}', internal_id, lang, name), ('data', 0, {dict}))
573 if not response:
574 raise ExtractorError(f'No video with id {internal_id} could be found (possibly region locked?)', expected=True)
575
576 if not self._IS_PREMIUM and response.get('isPremiumOnly'):
577 message = f'This {response.get("type") or "media"} is for premium members only'
578 if self.is_logged_in:
579 raise ExtractorError(message, expected=True)
580 self.raise_login_required(message)
581
582 result = self._transform_music_response(response)
583 result['formats'], _ = self._extract_stream(f'music/{internal_id}', internal_id)
584
585 return result
586
587 @staticmethod
588 def _transform_music_response(data):
589 return {
590 'id': data['id'],
591 **traverse_obj(data, {
592 'display_id': 'slug',
593 'title': 'title',
594 'track': 'title',
595 'artists': ('artist', 'name', all),
596 'description': ('description', {str}, {lambda x: x.replace(r'\r\n', '\n') or None}),
597 'thumbnails': ('images', ..., ..., {
598 'url': ('source', {url_or_none}),
599 'width': ('width', {int_or_none}),
600 'height': ('height', {int_or_none}),
601 }),
602 'genres': ('genres', ..., 'displayValue'),
603 'age_limit': ('maturity_ratings', -1, {parse_age_limit}),
604 }),
605 }
606
607
608 class CrunchyrollArtistIE(CrunchyrollBaseIE):
609 IE_NAME = 'crunchyroll:artist'
610 _VALID_URL = r'''(?x)
611 https?://(?:www\.)?crunchyroll\.com/
612 (?P<lang>(?:\w{2}(?:-\w{2})?/)?)
613 artist/(?P<id>\w{10})'''
614 _TESTS = [{
615 'url': 'https://www.crunchyroll.com/artist/MA179CB50D',
616 'info_dict': {
617 'id': 'MA179CB50D',
618 'title': 'LiSA',
619 'genres': ['Anime', 'J-Pop', 'Rock'],
620 'description': 'md5:16d87de61a55c3f7d6c454b73285938e',
621 },
622 'playlist_mincount': 83,
623 }, {
624 'url': 'https://www.crunchyroll.com/artist/MA179CB50D/lisa',
625 'only_matching': True,
626 }]
627 _API_ENDPOINT = 'music'
628
629 def _real_extract(self, url):
630 lang, internal_id = self._match_valid_url(url).group('lang', 'id')
631 response = traverse_obj(self._call_api(
632 f'artists/{internal_id}', internal_id, lang, 'artist info'), ('data', 0))
633
634 def entries():
635 for attribute, path in [('concerts', 'concert'), ('videos', 'musicvideo')]:
636 for internal_id in traverse_obj(response, (attribute, ...)):
637 yield self.url_result(f'{self._BASE_URL}/watch/{path}/{internal_id}', CrunchyrollMusicIE, internal_id)
638
639 return self.playlist_result(entries(), **self._transform_artist_response(response))
640
641 @staticmethod
642 def _transform_artist_response(data):
643 return {
644 'id': data['id'],
645 **traverse_obj(data, {
646 'title': 'name',
647 'description': ('description', {str}, {lambda x: x.replace(r'\r\n', '\n')}),
648 'thumbnails': ('images', ..., ..., {
649 'url': ('source', {url_or_none}),
650 'width': ('width', {int_or_none}),
651 'height': ('height', {int_or_none}),
652 }),
653 'genres': ('genres', ..., 'displayValue'),
654 }),
655 }