[yt-dlp.git] / yt_dlp / extractor / crunchyroll.py

import base64
import urllib.parse

from .common import InfoExtractor
from ..utils import (
    ExtractorError,
    float_or_none,
    format_field,
    join_nonempty,
    parse_iso8601,
    qualities,
    traverse_obj,
    try_get,
)


class CrunchyrollBaseIE(InfoExtractor):
    _LOGIN_URL = 'https://www.crunchyroll.com/welcome/login'
    _API_BASE = 'https://api.crunchyroll.com'
    _NETRC_MACHINE = 'crunchyroll'
    params = None

    @property
    def is_logged_in(self):
        return self._get_cookies(self._LOGIN_URL).get('etp_rt')

    def _perform_login(self, username, password):
        if self.is_logged_in:
            return

        upsell_response = self._download_json(
            f'{self._API_BASE}/get_upsell_data.0.json', None, 'Getting session id',
            query={
                'sess_id': 1,
                'device_id': 'whatvalueshouldbeforweb',
                'device_type': 'com.crunchyroll.static',
                'access_token': 'giKq5eY27ny3cqz',
                'referer': self._LOGIN_URL
            })
        if upsell_response['code'] != 'ok':
            raise ExtractorError('Could not get session id')
        session_id = upsell_response['data']['session_id']

        login_response = self._download_json(
            f'{self._API_BASE}/login.1.json', None, 'Logging in',
            data=urllib.parse.urlencode({
                'account': username,
                'password': password,
                'session_id': session_id
            }).encode('ascii'))
        if login_response['code'] != 'ok':
            raise ExtractorError('Login failed. Server message: %s' % login_response['message'], expected=True)
        if not self.is_logged_in:
            raise ExtractorError('Login succeeded but did not set etp_rt cookie')

    def _get_embedded_json(self, webpage, display_id):
        initial_state = self._parse_json(self._search_regex(
            r'__INITIAL_STATE__\s*=\s*({.+?})\s*;', webpage, 'initial state'), display_id)
        app_config = self._parse_json(self._search_regex(
            r'__APP_CONFIG__\s*=\s*({.+?})\s*;', webpage, 'app config'), display_id)
        return initial_state, app_config

    def _get_params(self, lang):
        if not CrunchyrollBaseIE.params:
            if self._get_cookies(f'https://www.crunchyroll.com/{lang}').get('etp_rt'):
                grant_type, key = 'etp_rt_cookie', 'accountAuthClientId'
            else:
                grant_type, key = 'client_id', 'anonClientId'

            initial_state, app_config = self._get_embedded_json(self._download_webpage(
                f'https://www.crunchyroll.com/{lang}', None, note='Retrieving main page'), None)
            api_domain = app_config['cxApiParams']['apiDomain'].replace('beta.crunchyroll.com', 'www.crunchyroll.com')

            auth_response = self._download_json(
                f'{api_domain}/auth/v1/token', None, note=f'Authenticating with grant_type={grant_type}',
                headers={
                    'Authorization': 'Basic ' + str(base64.b64encode(('%s:' % app_config['cxApiParams'][key]).encode('ascii')), 'ascii')
                }, data=f'grant_type={grant_type}'.encode('ascii'))
            policy_response = self._download_json(
                f'{api_domain}/index/v2', None, note='Retrieving signed policy',
                headers={
                    'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token']
                })
            cms = policy_response.get('cms_web')
            bucket = cms['bucket']
            params = {
                'Policy': cms['policy'],
                'Signature': cms['signature'],
                'Key-Pair-Id': cms['key_pair_id']
            }
            locale = traverse_obj(initial_state, ('localization', 'locale'))
            if locale:
                params['locale'] = locale
            CrunchyrollBaseIE.params = (api_domain, bucket, params)
        return CrunchyrollBaseIE.params


class CrunchyrollBetaIE(CrunchyrollBaseIE):
    IE_NAME = 'crunchyroll'
    _VALID_URL = r'''(?x)
        https?://(?:beta|www)\.crunchyroll\.com/
        (?P<lang>(?:\w{2}(?:-\w{2})?/)?)
        watch/(?P<id>\w+)
        (?:/(?P<display_id>[\w-]+))?/?(?:[?#]|$)'''
    _TESTS = [{
        'url': 'https://www.crunchyroll.com/watch/GY2P1Q98Y/to-the-future',
        'info_dict': {
            'id': 'GY2P1Q98Y',
            'ext': 'mp4',
            'duration': 1380.241,
            'timestamp': 1459632600,
            'description': 'md5:a022fbec4fbb023d43631032c91ed64b',
            'title': 'World Trigger Episode 73 – To the Future',
            'upload_date': '20160402',
            'series': 'World Trigger',
            'series_id': 'GR757DMKY',
            'season': 'World Trigger',
            'season_id': 'GR9P39NJ6',
            'season_number': 1,
            'episode': 'To the Future',
            'episode_number': 73,
            'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg$',
            'chapters': 'count:2',
        },
        'params': {'skip_download': 'm3u8', 'format': 'all[format_id~=hardsub]'},
    }, {
        'url': 'https://www.crunchyroll.com/watch/GYE5WKQGR',
        'info_dict': {
            'id': 'GYE5WKQGR',
            'ext': 'mp4',
            'duration': 366.459,
            'timestamp': 1476788400,
            'description': 'md5:74b67283ffddd75f6e224ca7dc031e76',
            'title': 'SHELTER Episode  – Porter Robinson presents Shelter the Animation',
            'upload_date': '20161018',
            'series': 'SHELTER',
            'series_id': 'GYGG09WWY',
            'season': 'SHELTER',
            'season_id': 'GR09MGK4R',
            'season_number': 1,
            'episode': 'Porter Robinson presents Shelter the Animation',
            'episode_number': 0,
            'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg$',
            'chapters': 'count:0',
        },
        'params': {'skip_download': True},
        'skip': 'Video is Premium only',
    }, {
        'url': 'https://www.crunchyroll.com/watch/GY2P1Q98Y',
        'only_matching': True,
    }, {
        'url': 'https://beta.crunchyroll.com/pt-br/watch/G8WUN8VKP/the-ruler-of-conspiracy',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        lang, internal_id, display_id = self._match_valid_url(url).group('lang', 'id', 'display_id')
        api_domain, bucket, params = self._get_params(lang)

        episode_response = self._download_json(
            f'{api_domain}/cms/v2{bucket}/episodes/{internal_id}', display_id,
            note='Retrieving episode metadata', query=params)
        if episode_response.get('is_premium_only') and not episode_response.get('playback'):
            if self.is_logged_in:
                raise ExtractorError('This video is for premium members only', expected=True)
            else:
                self.raise_login_required('This video is for premium members only')

        stream_response = self._download_json(
            f'{api_domain}{episode_response["__links__"]["streams"]["href"]}', display_id,
            note='Retrieving stream info', query=params)
        get_streams = lambda name: (traverse_obj(stream_response, name) or {}).items()

        requested_hardsubs = [('' if val == 'none' else val) for val in (self._configuration_arg('hardsub') or ['none'])]
        hardsub_preference = qualities(requested_hardsubs[::-1])
        requested_formats = self._configuration_arg('format') or ['adaptive_hls']

        available_formats = {}
        for stream_type, streams in get_streams('streams'):
            if stream_type not in requested_formats:
                continue
            for stream in streams.values():
                if not stream.get('url'):
                    continue
                hardsub_lang = stream.get('hardsub_locale') or ''
                format_id = join_nonempty(stream_type, format_field(stream, 'hardsub_locale', 'hardsub-%s'))
                available_formats[hardsub_lang] = (stream_type, format_id, hardsub_lang, stream['url'])

        if '' in available_formats and 'all' not in requested_hardsubs:
            full_format_langs = set(requested_hardsubs)
            self.to_screen(
                'To get all formats of a hardsub language, use '
                '"--extractor-args crunchyrollbeta:hardsub=<language_code or all>". '
                'See https://github.com/yt-dlp/yt-dlp#crunchyrollbeta-crunchyroll for more info',
                only_once=True)
        else:
            full_format_langs = set(map(str.lower, available_formats))

        formats = []
        for stream_type, format_id, hardsub_lang, stream_url in available_formats.values():
            if stream_type.endswith('hls'):
                if hardsub_lang.lower() in full_format_langs:
                    adaptive_formats = self._extract_m3u8_formats(
                        stream_url, display_id, 'mp4', m3u8_id=format_id,
                        fatal=False, note=f'Downloading {format_id} HLS manifest')
                else:
                    adaptive_formats = (self._m3u8_meta_format(stream_url, ext='mp4', m3u8_id=format_id),)
            elif stream_type.endswith('dash'):
                adaptive_formats = self._extract_mpd_formats(
                    stream_url, display_id, mpd_id=format_id,
                    fatal=False, note=f'Downloading {format_id} MPD manifest')
            else:
                self.report_warning(f'Encountered unknown stream_type: {stream_type!r}', display_id, only_once=True)
                continue
            for f in adaptive_formats:
                if f.get('acodec') != 'none':
                    f['language'] = stream_response.get('audio_locale')
                f['quality'] = hardsub_preference(hardsub_lang.lower())
            formats.extend(adaptive_formats)

        chapters = None
        # if no intro chapter is available, a 403 without usable data is returned
        intro_chapter = self._download_json(f'https://static.crunchyroll.com/datalab-intro-v2/{internal_id}.json',
                                            display_id, fatal=False, errnote=False)
        if isinstance(intro_chapter, dict):
            chapters = [{
                'title': 'Intro',
                'start_time': float_or_none(intro_chapter.get('startTime')),
                'end_time': float_or_none(intro_chapter.get('endTime'))
            }]

        return {
            'id': internal_id,
            'title': '%s Episode %s – %s' % (
                episode_response.get('season_title'), episode_response.get('episode'), episode_response.get('title')),
            'description': try_get(episode_response, lambda x: x['description'].replace(r'\r\n', '\n')),
            'duration': float_or_none(episode_response.get('duration_ms'), 1000),
            'timestamp': parse_iso8601(episode_response.get('upload_date')),
            'series': episode_response.get('series_title'),
            'series_id': episode_response.get('series_id'),
            'season': episode_response.get('season_title'),
            'season_id': episode_response.get('season_id'),
            'season_number': episode_response.get('season_number'),
            'episode': episode_response.get('title'),
            'episode_number': episode_response.get('sequence_number'),
            'formats': formats,
            'thumbnails': [{
                'url': thumb.get('source'),
                'width': thumb.get('width'),
                'height': thumb.get('height'),
            } for thumb in traverse_obj(episode_response, ('images', 'thumbnail', ..., ...)) or []],
            'subtitles': {
                lang: [{
                    'url': subtitle_data.get('url'),
                    'ext': subtitle_data.get('format')
                }] for lang, subtitle_data in get_streams('subtitles')
            },
            'chapters': chapters
        }


class CrunchyrollBetaShowIE(CrunchyrollBaseIE):
    IE_NAME = 'crunchyroll:playlist'
    _VALID_URL = r'''(?x)
        https?://(?:beta|www)\.crunchyroll\.com/
        (?P<lang>(?:\w{2}(?:-\w{2})?/)?)
        series/(?P<id>\w+)
        (?:/(?P<display_id>[\w-]+))?/?(?:[?#]|$)'''
    _TESTS = [{
        'url': 'https://www.crunchyroll.com/series/GY19NQ2QR/Girl-Friend-BETA',
        'info_dict': {
            'id': 'GY19NQ2QR',
            'title': 'Girl Friend BETA',
        },
        'playlist_mincount': 10,
    }, {
        'url': 'https://beta.crunchyroll.com/it/series/GY19NQ2QR',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        lang, internal_id, display_id = self._match_valid_url(url).group('lang', 'id', 'display_id')
        api_domain, bucket, params = self._get_params(lang)

        series_response = self._download_json(
            f'{api_domain}/cms/v2{bucket}/series/{internal_id}', display_id,
            note='Retrieving series metadata', query=params)

        seasons_response = self._download_json(
            f'{api_domain}/cms/v2{bucket}/seasons?series_id={internal_id}', display_id,
            note='Retrieving season list', query=params)

        def entries():
            for season in seasons_response['items']:
                episodes_response = self._download_json(
                    f'{api_domain}/cms/v2{bucket}/episodes?season_id={season["id"]}', display_id,
                    note=f'Retrieving episode list for {season.get("slug_title")}', query=params)
                for episode in episodes_response['items']:
                    episode_id = episode['id']
                    episode_display_id = episode['slug_title']
                    yield {
                        '_type': 'url',
                        'url': f'https://www.crunchyroll.com/{lang}watch/{episode_id}/{episode_display_id}',
                        'ie_key': CrunchyrollBetaIE.ie_key(),
                        'id': episode_id,
                        'title': '%s Episode %s – %s' % (episode.get('season_title'), episode.get('episode'), episode.get('title')),
                        'description': try_get(episode, lambda x: x['description'].replace(r'\r\n', '\n')),
                        'duration': float_or_none(episode.get('duration_ms'), 1000),
                        'series': episode.get('series_title'),
                        'series_id': episode.get('series_id'),
                        'season': episode.get('season_title'),
                        'season_id': episode.get('season_id'),
                        'season_number': episode.get('season_number'),
                        'episode': episode.get('title'),
                        'episode_number': episode.get('sequence_number'),
                        'language': episode.get('audio_locale'),
                    }

        return self.playlist_result(entries(), internal_id, series_response.get('title'))
Commit	Line	Data
706dfe44	1	import base64
cb1553e9	2	import urllib.parse
ac668111	3
46279958	4	from .common import InfoExtractor
1cc79574 PH	5	from ..utils import (
1cc79574 PH	6	ExtractorError,
54a5be4d	7	float_or_none,
706dfe44	8	format_field,
706dfe44	9	join_nonempty,
b99ba3df	10	parse_iso8601,
a9d4da60	11	qualities,
706dfe44	12	traverse_obj,
245d43ca	13	try_get,
c8434e83	14	)
c8434e83	15
34440095	16
46279958	17	class CrunchyrollBaseIE(InfoExtractor):
7c74a015 JH	18	_LOGIN_URL = 'https://www.crunchyroll.com/welcome/login'
7c74a015 JH	19	_API_BASE = 'https://api.crunchyroll.com'
80f48920	20	_NETRC_MACHINE = 'crunchyroll'
cb1553e9	21	params = None
05dee6c5	22
44699d10	23	@property
	24	def is_logged_in(self):
	25	return self._get_cookies(self._LOGIN_URL).get('etp_rt')
	26
52efa4b3	27	def _perform_login(self, username, password):
44699d10	28	if self.is_logged_in:
eb5b1fc0 S	29	return
eb5b1fc0 S	30
7c74a015 JH	31	upsell_response = self._download_json(
	32	f'{self._API_BASE}/get_upsell_data.0.json', None, 'Getting session id',
	33	query={
	34	'sess_id': 1,
	35	'device_id': 'whatvalueshouldbeforweb',
	36	'device_type': 'com.crunchyroll.static',
	37	'access_token': 'giKq5eY27ny3cqz',
	38	'referer': self._LOGIN_URL
	39	})
	40	if upsell_response['code'] != 'ok':
	41	raise ExtractorError('Could not get session id')
	42	session_id = upsell_response['data']['session_id']
	43
	44	login_response = self._download_json(
	45	f'{self._API_BASE}/login.1.json', None, 'Logging in',
cb1553e9	46	data=urllib.parse.urlencode({
7c74a015 JH	47	'account': username,
	48	'password': password,
	49	'session_id': session_id
	50	}).encode('ascii'))
	51	if login_response['code'] != 'ok':
97bef011	52	raise ExtractorError('Login failed. Server message: %s' % login_response['message'], expected=True)
44699d10	53	if not self.is_logged_in:
7c74a015	54	raise ExtractorError('Login succeeded but did not set etp_rt cookie')
80f48920	55
cb1553e9	56	def _get_embedded_json(self, webpage, display_id):
f4d706a9 JH	57	initial_state = self._parse_json(self._search_regex(
	58	r'__INITIAL_STATE__\s=\s({.+?})\s*;', webpage, 'initial state'), display_id)
	59	app_config = self._parse_json(self._search_regex(
	60	r'__APP_CONFIG__\s=\s({.+?})\s*;', webpage, 'app config'), display_id)
	61	return initial_state, app_config
	62
f4d706a9	63	def _get_params(self, lang):
cb1553e9 JH	64	if not CrunchyrollBaseIE.params:
cb1553e9 JH	65	if self._get_cookies(f'https://www.crunchyroll.com/{lang}').get('etp_rt'):
459262ac JH	66	grant_type, key = 'etp_rt_cookie', 'accountAuthClientId'
	67	else:
	68	grant_type, key = 'client_id', 'anonClientId'
	69
cb1553e9 JH	70	initial_state, app_config = self._get_embedded_json(self._download_webpage(
	71	f'https://www.crunchyroll.com/{lang}', None, note='Retrieving main page'), None)
	72	api_domain = app_config['cxApiParams']['apiDomain'].replace('beta.crunchyroll.com', 'www.crunchyroll.com')
459262ac	73
f4d706a9	74	auth_response = self._download_json(
459262ac	75	f'{api_domain}/auth/v1/token', None, note=f'Authenticating with grant_type={grant_type}',
f4d706a9	76	headers={
459262ac JH	77	'Authorization': 'Basic ' + str(base64.b64encode(('%s:' % app_config['cxApiParams'][key]).encode('ascii')), 'ascii')
459262ac JH	78	}, data=f'grant_type={grant_type}'.encode('ascii'))
f4d706a9 JH	79	policy_response = self._download_json(
	80	f'{api_domain}/index/v2', None, note='Retrieving signed policy',
	81	headers={
	82	'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token']
	83	})
cb1553e9	84	cms = policy_response.get('cms_web')
88d62206	85	bucket = cms['bucket']
f4d706a9	86	params = {
88d62206 JH	87	'Policy': cms['policy'],
	88	'Signature': cms['signature'],
	89	'Key-Pair-Id': cms['key_pair_id']
f4d706a9 JH	90	}
	91	locale = traverse_obj(initial_state, ('localization', 'locale'))
	92	if locale:
	93	params['locale'] = locale
cb1553e9 JH	94	CrunchyrollBaseIE.params = (api_domain, bucket, params)
cb1553e9 JH	95	return CrunchyrollBaseIE.params
f4d706a9	96
f4d706a9	97
cb1553e9 JH	98	class CrunchyrollBetaIE(CrunchyrollBaseIE):
cb1553e9 JH	99	IE_NAME = 'crunchyroll'
5da42f2b	100	_VALID_URL = r'''(?x)
cb1553e9	101	https?://(?:beta\|www)\.crunchyroll\.com/
5da42f2b	102	(?P<lang>(?:\w{2}(?:-\w{2})?/)?)
	103	watch/(?P<id>\w+)
	104	(?:/(?P<display_id>[\w-]+))?/?(?:[?#]\|$)'''
dd078970	105	_TESTS = [{
cb1553e9	106	'url': 'https://www.crunchyroll.com/watch/GY2P1Q98Y/to-the-future',
dd078970	107	'info_dict': {
b99ba3df	108	'id': 'GY2P1Q98Y',
dd078970	109	'ext': 'mp4',
b99ba3df JH	110	'duration': 1380.241,
b99ba3df JH	111	'timestamp': 1459632600,
dd078970	112	'description': 'md5:a022fbec4fbb023d43631032c91ed64b',
dd078970	113	'title': 'World Trigger Episode 73 – To the Future',
dd078970	114	'upload_date': '20160402',
f4d706a9	115	'series': 'World Trigger',
b99ba3df	116	'series_id': 'GR757DMKY',
f4d706a9	117	'season': 'World Trigger',
b99ba3df	118	'season_id': 'GR9P39NJ6',
f4d706a9	119	'season_number': 1,
b99ba3df JH	120	'episode': 'To the Future',
b99ba3df JH	121	'episode_number': 73,
cb1553e9	122	'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg$',
93abb740	123	'chapters': 'count:2',
dd078970	124	},
dfea94f8 SS	125	'params': {'skip_download': 'm3u8', 'format': 'all[format_id~=hardsub]'},
dfea94f8 SS	126	}, {
cb1553e9	127	'url': 'https://www.crunchyroll.com/watch/GYE5WKQGR',
dfea94f8 SS	128	'info_dict': {
	129	'id': 'GYE5WKQGR',
	130	'ext': 'mp4',
	131	'duration': 366.459,
	132	'timestamp': 1476788400,
	133	'description': 'md5:74b67283ffddd75f6e224ca7dc031e76',
	134	'title': 'SHELTER Episode – Porter Robinson presents Shelter the Animation',
	135	'upload_date': '20161018',
	136	'series': 'SHELTER',
	137	'series_id': 'GYGG09WWY',
	138	'season': 'SHELTER',
	139	'season_id': 'GR09MGK4R',
	140	'season_number': 1,
	141	'episode': 'Porter Robinson presents Shelter the Animation',
	142	'episode_number': 0,
cb1553e9	143	'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg$',
93abb740	144	'chapters': 'count:0',
dfea94f8 SS	145	},
	146	'params': {'skip_download': True},
	147	'skip': 'Video is Premium only',
f4d706a9	148	}, {
cb1553e9	149	'url': 'https://www.crunchyroll.com/watch/GY2P1Q98Y',
f4d706a9	150	'only_matching': True,
964b5493	151	}, {
	152	'url': 'https://beta.crunchyroll.com/pt-br/watch/G8WUN8VKP/the-ruler-of-conspiracy',
	153	'only_matching': True,
dd078970	154	}]
	155
	156	def _real_extract(self, url):
f4d706a9	157	lang, internal_id, display_id = self._match_valid_url(url).group('lang', 'id', 'display_id')
f4d706a9 JH	158	api_domain, bucket, params = self._get_params(lang)
f4d706a9 JH	159
706dfe44 JH	160	episode_response = self._download_json(
706dfe44 JH	161	f'{api_domain}/cms/v2{bucket}/episodes/{internal_id}', display_id,
7d0f6f0c	162	note='Retrieving episode metadata', query=params)
706dfe44	163	if episode_response.get('is_premium_only') and not episode_response.get('playback'):
44699d10	164	if self.is_logged_in:
	165	raise ExtractorError('This video is for premium members only', expected=True)
	166	else:
	167	self.raise_login_required('This video is for premium members only')
706dfe44	168
f62f553d JH	169	stream_response = self._download_json(
	170	f'{api_domain}{episode_response["__links__"]["streams"]["href"]}', display_id,
	171	note='Retrieving stream info', query=params)
7d0f6f0c	172	get_streams = lambda name: (traverse_obj(stream_response, name) or {}).items()
706dfe44 JH	173
	174	requested_hardsubs = [('' if val == 'none' else val) for val in (self._configuration_arg('hardsub') or ['none'])]
	175	hardsub_preference = qualities(requested_hardsubs[::-1])
	176	requested_formats = self._configuration_arg('format') or ['adaptive_hls']
	177
dfea94f8	178	available_formats = {}
7d0f6f0c	179	for stream_type, streams in get_streams('streams'):
706dfe44 JH	180	if stream_type not in requested_formats:
	181	continue
	182	for stream in streams.values():
706dfe44 JH	183	if not stream.get('url'):
706dfe44 JH	184	continue
dfea94f8 SS	185	hardsub_lang = stream.get('hardsub_locale') or ''
	186	format_id = join_nonempty(stream_type, format_field(stream, 'hardsub_locale', 'hardsub-%s'))
	187	available_formats[hardsub_lang] = (stream_type, format_id, hardsub_lang, stream['url'])
	188
	189	if '' in available_formats and 'all' not in requested_hardsubs:
	190	full_format_langs = set(requested_hardsubs)
	191	self.to_screen(
	192	'To get all formats of a hardsub language, use '
	193	'"--extractor-args crunchyrollbeta:hardsub=<language_code or all>". '
08e29b9f	194	'See https://github.com/yt-dlp/yt-dlp#crunchyrollbeta-crunchyroll for more info',
dfea94f8 SS	195	only_once=True)
	196	else:
	197	full_format_langs = set(map(str.lower, available_formats))
	198
	199	formats = []
	200	for stream_type, format_id, hardsub_lang, stream_url in available_formats.values():
	201	if stream_type.endswith('hls'):
	202	if hardsub_lang.lower() in full_format_langs:
706dfe44	203	adaptive_formats = self._extract_m3u8_formats(
dfea94f8	204	stream_url, display_id, 'mp4', m3u8_id=format_id,
7d0f6f0c	205	fatal=False, note=f'Downloading {format_id} HLS manifest')
dfea94f8 SS	206	else:
	207	adaptive_formats = (self._m3u8_meta_format(stream_url, ext='mp4', m3u8_id=format_id),)
	208	elif stream_type.endswith('dash'):
	209	adaptive_formats = self._extract_mpd_formats(
	210	stream_url, display_id, mpd_id=format_id,
	211	fatal=False, note=f'Downloading {format_id} MPD manifest')
	212	else:
	213	self.report_warning(f'Encountered unknown stream_type: {stream_type!r}', display_id, only_once=True)
	214	continue
	215	for f in adaptive_formats:
	216	if f.get('acodec') != 'none':
	217	f['language'] = stream_response.get('audio_locale')
	218	f['quality'] = hardsub_preference(hardsub_lang.lower())
	219	formats.extend(adaptive_formats)
706dfe44	220
93abb740 B	221	chapters = None
	222	# if no intro chapter is available, a 403 without usable data is returned
	223	intro_chapter = self._download_json(f'https://static.crunchyroll.com/datalab-intro-v2/{internal_id}.json',
	224	display_id, fatal=False, errnote=False)
	225	if isinstance(intro_chapter, dict):
	226	chapters = [{
	227	'title': 'Intro',
	228	'start_time': float_or_none(intro_chapter.get('startTime')),
	229	'end_time': float_or_none(intro_chapter.get('endTime'))
	230	}]
	231
706dfe44 JH	232	return {
706dfe44 JH	233	'id': internal_id,
7d0f6f0c B	234	'title': '%s Episode %s – %s' % (
	235	episode_response.get('season_title'), episode_response.get('episode'), episode_response.get('title')),
	236	'description': try_get(episode_response, lambda x: x['description'].replace(r'\r\n', '\n')),
706dfe44	237	'duration': float_or_none(episode_response.get('duration_ms'), 1000),
b99ba3df	238	'timestamp': parse_iso8601(episode_response.get('upload_date')),
706dfe44 JH	239	'series': episode_response.get('series_title'),
	240	'series_id': episode_response.get('series_id'),
	241	'season': episode_response.get('season_title'),
	242	'season_id': episode_response.get('season_id'),
	243	'season_number': episode_response.get('season_number'),
	244	'episode': episode_response.get('title'),
	245	'episode_number': episode_response.get('sequence_number'),
7d0f6f0c B	246	'formats': formats,
	247	'thumbnails': [{
	248	'url': thumb.get('source'),
	249	'width': thumb.get('width'),
	250	'height': thumb.get('height'),
	251	} for thumb in traverse_obj(episode_response, ('images', 'thumbnail', ..., ...)) or []],
	252	'subtitles': {
	253	lang: [{
	254	'url': subtitle_data.get('url'),
	255	'ext': subtitle_data.get('format')
	256	}] for lang, subtitle_data in get_streams('subtitles')
	257	},
93abb740	258	'chapters': chapters
706dfe44	259	}
dd078970	260
dd078970	261
cb1553e9 JH	262	class CrunchyrollBetaShowIE(CrunchyrollBaseIE):
cb1553e9 JH	263	IE_NAME = 'crunchyroll:playlist'
5da42f2b	264	_VALID_URL = r'''(?x)
cb1553e9	265	https?://(?:beta\|www)\.crunchyroll\.com/
5da42f2b	266	(?P<lang>(?:\w{2}(?:-\w{2})?/)?)
	267	series/(?P<id>\w+)
	268	(?:/(?P<display_id>[\w-]+))?/?(?:[?#]\|$)'''
dd078970	269	_TESTS = [{
cb1553e9	270	'url': 'https://www.crunchyroll.com/series/GY19NQ2QR/Girl-Friend-BETA',
dd078970	271	'info_dict': {
b99ba3df	272	'id': 'GY19NQ2QR',
dd078970	273	'title': 'Girl Friend BETA',
	274	},
	275	'playlist_mincount': 10,
	276	}, {
5da42f2b	277	'url': 'https://beta.crunchyroll.com/it/series/GY19NQ2QR',
dd078970	278	'only_matching': True,
	279	}]
	280
	281	def _real_extract(self, url):
f4d706a9	282	lang, internal_id, display_id = self._match_valid_url(url).group('lang', 'id', 'display_id')
f4d706a9 JH	283	api_domain, bucket, params = self._get_params(lang)
	284
	285	series_response = self._download_json(
	286	f'{api_domain}/cms/v2{bucket}/series/{internal_id}', display_id,
	287	note='Retrieving series metadata', query=params)
	288
	289	seasons_response = self._download_json(
	290	f'{api_domain}/cms/v2{bucket}/seasons?series_id={internal_id}', display_id,
	291	note='Retrieving season list', query=params)
	292
	293	def entries():
	294	for season in seasons_response['items']:
	295	episodes_response = self._download_json(
	296	f'{api_domain}/cms/v2{bucket}/episodes?season_id={season["id"]}', display_id,
	297	note=f'Retrieving episode list for {season.get("slug_title")}', query=params)
	298	for episode in episodes_response['items']:
	299	episode_id = episode['id']
	300	episode_display_id = episode['slug_title']
	301	yield {
	302	'_type': 'url',
cb1553e9	303	'url': f'https://www.crunchyroll.com/{lang}watch/{episode_id}/{episode_display_id}',
f4d706a9 JH	304	'ie_key': CrunchyrollBetaIE.ie_key(),
	305	'id': episode_id,
	306	'title': '%s Episode %s – %s' % (episode.get('season_title'), episode.get('episode'), episode.get('title')),
	307	'description': try_get(episode, lambda x: x['description'].replace(r'\r\n', '\n')),
	308	'duration': float_or_none(episode.get('duration_ms'), 1000),
	309	'series': episode.get('series_title'),
	310	'series_id': episode.get('series_id'),
	311	'season': episode.get('season_title'),
	312	'season_id': episode.get('season_id'),
	313	'season_number': episode.get('season_number'),
	314	'episode': episode.get('title'),
8a6b1677 CF	315	'episode_number': episode.get('sequence_number'),
8a6b1677 CF	316	'language': episode.get('audio_locale'),
f4d706a9 JH	317	}
	318
	319	return self.playlist_result(entries(), internal_id, series_response.get('title'))