[yt-dlp.git] / yt_dlp / extractor / crunchyroll.py

import base64
import urllib.parse

from .common import InfoExtractor
from ..utils import (
    ExtractorError,
    float_or_none,
    format_field,
    join_nonempty,
    parse_iso8601,
    qualities,
    traverse_obj,
    try_get,
)


class CrunchyrollBaseIE(InfoExtractor):
    _LOGIN_URL = 'https://www.crunchyroll.com/welcome/login'
    _API_BASE = 'https://api.crunchyroll.com'
    _NETRC_MACHINE = 'crunchyroll'
    params = None

    def _perform_login(self, username, password):
        if self._get_cookies(self._LOGIN_URL).get('etp_rt'):
            return

        upsell_response = self._download_json(
            f'{self._API_BASE}/get_upsell_data.0.json', None, 'Getting session id',
            query={
                'sess_id': 1,
                'device_id': 'whatvalueshouldbeforweb',
                'device_type': 'com.crunchyroll.static',
                'access_token': 'giKq5eY27ny3cqz',
                'referer': self._LOGIN_URL
            })
        if upsell_response['code'] != 'ok':
            raise ExtractorError('Could not get session id')
        session_id = upsell_response['data']['session_id']

        login_response = self._download_json(
            f'{self._API_BASE}/login.1.json', None, 'Logging in',
            data=urllib.parse.urlencode({
                'account': username,
                'password': password,
                'session_id': session_id
            }).encode('ascii'))
        if login_response['code'] != 'ok':
            raise ExtractorError('Login failed. Server message: %s' % login_response['message'], expected=True)
        if not self._get_cookies(self._LOGIN_URL).get('etp_rt'):
            raise ExtractorError('Login succeeded but did not set etp_rt cookie')

    def _get_embedded_json(self, webpage, display_id):
        initial_state = self._parse_json(self._search_regex(
            r'__INITIAL_STATE__\s*=\s*({.+?})\s*;', webpage, 'initial state'), display_id)
        app_config = self._parse_json(self._search_regex(
            r'__APP_CONFIG__\s*=\s*({.+?})\s*;', webpage, 'app config'), display_id)
        return initial_state, app_config

    def _get_params(self, lang):
        if not CrunchyrollBaseIE.params:
            if self._get_cookies(f'https://www.crunchyroll.com/{lang}').get('etp_rt'):
                grant_type, key = 'etp_rt_cookie', 'accountAuthClientId'
            else:
                grant_type, key = 'client_id', 'anonClientId'

            initial_state, app_config = self._get_embedded_json(self._download_webpage(
                f'https://www.crunchyroll.com/{lang}', None, note='Retrieving main page'), None)
            api_domain = app_config['cxApiParams']['apiDomain'].replace('beta.crunchyroll.com', 'www.crunchyroll.com')

            auth_response = self._download_json(
                f'{api_domain}/auth/v1/token', None, note=f'Authenticating with grant_type={grant_type}',
                headers={
                    'Authorization': 'Basic ' + str(base64.b64encode(('%s:' % app_config['cxApiParams'][key]).encode('ascii')), 'ascii')
                }, data=f'grant_type={grant_type}'.encode('ascii'))
            policy_response = self._download_json(
                f'{api_domain}/index/v2', None, note='Retrieving signed policy',
                headers={
                    'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token']
                })
            cms = policy_response.get('cms_web')
            bucket = cms['bucket']
            params = {
                'Policy': cms['policy'],
                'Signature': cms['signature'],
                'Key-Pair-Id': cms['key_pair_id']
            }
            locale = traverse_obj(initial_state, ('localization', 'locale'))
            if locale:
                params['locale'] = locale
            CrunchyrollBaseIE.params = (api_domain, bucket, params)
        return CrunchyrollBaseIE.params


class CrunchyrollBetaIE(CrunchyrollBaseIE):
    IE_NAME = 'crunchyroll'
    _VALID_URL = r'''(?x)
        https?://(?:beta|www)\.crunchyroll\.com/
        (?P<lang>(?:\w{2}(?:-\w{2})?/)?)
        watch/(?P<id>\w+)
        (?:/(?P<display_id>[\w-]+))?/?(?:[?#]|$)'''
    _TESTS = [{
        'url': 'https://www.crunchyroll.com/watch/GY2P1Q98Y/to-the-future',
        'info_dict': {
            'id': 'GY2P1Q98Y',
            'ext': 'mp4',
            'duration': 1380.241,
            'timestamp': 1459632600,
            'description': 'md5:a022fbec4fbb023d43631032c91ed64b',
            'title': 'World Trigger Episode 73 – To the Future',
            'upload_date': '20160402',
            'series': 'World Trigger',
            'series_id': 'GR757DMKY',
            'season': 'World Trigger',
            'season_id': 'GR9P39NJ6',
            'season_number': 1,
            'episode': 'To the Future',
            'episode_number': 73,
            'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg$',
        },
        'params': {'skip_download': 'm3u8', 'format': 'all[format_id~=hardsub]'},
    }, {
        'url': 'https://www.crunchyroll.com/watch/GYE5WKQGR',
        'info_dict': {
            'id': 'GYE5WKQGR',
            'ext': 'mp4',
            'duration': 366.459,
            'timestamp': 1476788400,
            'description': 'md5:74b67283ffddd75f6e224ca7dc031e76',
            'title': 'SHELTER Episode  – Porter Robinson presents Shelter the Animation',
            'upload_date': '20161018',
            'series': 'SHELTER',
            'series_id': 'GYGG09WWY',
            'season': 'SHELTER',
            'season_id': 'GR09MGK4R',
            'season_number': 1,
            'episode': 'Porter Robinson presents Shelter the Animation',
            'episode_number': 0,
            'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg$',
        },
        'params': {'skip_download': True},
        'skip': 'Video is Premium only',
    }, {
        'url': 'https://www.crunchyroll.com/watch/GY2P1Q98Y',
        'only_matching': True,
    }, {
        'url': 'https://beta.crunchyroll.com/pt-br/watch/G8WUN8VKP/the-ruler-of-conspiracy',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        lang, internal_id, display_id = self._match_valid_url(url).group('lang', 'id', 'display_id')
        api_domain, bucket, params = self._get_params(lang)

        episode_response = self._download_json(
            f'{api_domain}/cms/v2{bucket}/episodes/{internal_id}', display_id,
            note='Retrieving episode metadata', query=params)
        if episode_response.get('is_premium_only') and not episode_response.get('playback'):
            raise ExtractorError('This video is for premium members only.', expected=True)

        stream_response = self._download_json(
            f'{api_domain}{episode_response["__links__"]["streams"]["href"]}', display_id,
            note='Retrieving stream info', query=params)
        get_streams = lambda name: (traverse_obj(stream_response, name) or {}).items()

        requested_hardsubs = [('' if val == 'none' else val) for val in (self._configuration_arg('hardsub') or ['none'])]
        hardsub_preference = qualities(requested_hardsubs[::-1])
        requested_formats = self._configuration_arg('format') or ['adaptive_hls']

        available_formats = {}
        for stream_type, streams in get_streams('streams'):
            if stream_type not in requested_formats:
                continue
            for stream in streams.values():
                if not stream.get('url'):
                    continue
                hardsub_lang = stream.get('hardsub_locale') or ''
                format_id = join_nonempty(stream_type, format_field(stream, 'hardsub_locale', 'hardsub-%s'))
                available_formats[hardsub_lang] = (stream_type, format_id, hardsub_lang, stream['url'])

        if '' in available_formats and 'all' not in requested_hardsubs:
            full_format_langs = set(requested_hardsubs)
            self.to_screen(
                'To get all formats of a hardsub language, use '
                '"--extractor-args crunchyrollbeta:hardsub=<language_code or all>". '
                'See https://github.com/yt-dlp/yt-dlp#crunchyrollbeta for more info',
                only_once=True)
        else:
            full_format_langs = set(map(str.lower, available_formats))

        formats = []
        for stream_type, format_id, hardsub_lang, stream_url in available_formats.values():
            if stream_type.endswith('hls'):
                if hardsub_lang.lower() in full_format_langs:
                    adaptive_formats = self._extract_m3u8_formats(
                        stream_url, display_id, 'mp4', m3u8_id=format_id,
                        fatal=False, note=f'Downloading {format_id} HLS manifest')
                else:
                    adaptive_formats = (self._m3u8_meta_format(stream_url, ext='mp4', m3u8_id=format_id),)
            elif stream_type.endswith('dash'):
                adaptive_formats = self._extract_mpd_formats(
                    stream_url, display_id, mpd_id=format_id,
                    fatal=False, note=f'Downloading {format_id} MPD manifest')
            else:
                self.report_warning(f'Encountered unknown stream_type: {stream_type!r}', display_id, only_once=True)
                continue
            for f in adaptive_formats:
                if f.get('acodec') != 'none':
                    f['language'] = stream_response.get('audio_locale')
                f['quality'] = hardsub_preference(hardsub_lang.lower())
            formats.extend(adaptive_formats)

        return {
            'id': internal_id,
            'title': '%s Episode %s – %s' % (
                episode_response.get('season_title'), episode_response.get('episode'), episode_response.get('title')),
            'description': try_get(episode_response, lambda x: x['description'].replace(r'\r\n', '\n')),
            'duration': float_or_none(episode_response.get('duration_ms'), 1000),
            'timestamp': parse_iso8601(episode_response.get('upload_date')),
            'series': episode_response.get('series_title'),
            'series_id': episode_response.get('series_id'),
            'season': episode_response.get('season_title'),
            'season_id': episode_response.get('season_id'),
            'season_number': episode_response.get('season_number'),
            'episode': episode_response.get('title'),
            'episode_number': episode_response.get('sequence_number'),
            'formats': formats,
            'thumbnails': [{
                'url': thumb.get('source'),
                'width': thumb.get('width'),
                'height': thumb.get('height'),
            } for thumb in traverse_obj(episode_response, ('images', 'thumbnail', ..., ...)) or []],
            'subtitles': {
                lang: [{
                    'url': subtitle_data.get('url'),
                    'ext': subtitle_data.get('format')
                }] for lang, subtitle_data in get_streams('subtitles')
            },
        }


class CrunchyrollBetaShowIE(CrunchyrollBaseIE):
    IE_NAME = 'crunchyroll:playlist'
    _VALID_URL = r'''(?x)
        https?://(?:beta|www)\.crunchyroll\.com/
        (?P<lang>(?:\w{2}(?:-\w{2})?/)?)
        series/(?P<id>\w+)
        (?:/(?P<display_id>[\w-]+))?/?(?:[?#]|$)'''
    _TESTS = [{
        'url': 'https://www.crunchyroll.com/series/GY19NQ2QR/Girl-Friend-BETA',
        'info_dict': {
            'id': 'GY19NQ2QR',
            'title': 'Girl Friend BETA',
        },
        'playlist_mincount': 10,
    }, {
        'url': 'https://beta.crunchyroll.com/it/series/GY19NQ2QR',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        lang, internal_id, display_id = self._match_valid_url(url).group('lang', 'id', 'display_id')
        api_domain, bucket, params = self._get_params(lang)

        series_response = self._download_json(
            f'{api_domain}/cms/v2{bucket}/series/{internal_id}', display_id,
            note='Retrieving series metadata', query=params)

        seasons_response = self._download_json(
            f'{api_domain}/cms/v2{bucket}/seasons?series_id={internal_id}', display_id,
            note='Retrieving season list', query=params)

        def entries():
            for season in seasons_response['items']:
                episodes_response = self._download_json(
                    f'{api_domain}/cms/v2{bucket}/episodes?season_id={season["id"]}', display_id,
                    note=f'Retrieving episode list for {season.get("slug_title")}', query=params)
                for episode in episodes_response['items']:
                    episode_id = episode['id']
                    episode_display_id = episode['slug_title']
                    yield {
                        '_type': 'url',
                        'url': f'https://www.crunchyroll.com/{lang}watch/{episode_id}/{episode_display_id}',
                        'ie_key': CrunchyrollBetaIE.ie_key(),
                        'id': episode_id,
                        'title': '%s Episode %s – %s' % (episode.get('season_title'), episode.get('episode'), episode.get('title')),
                        'description': try_get(episode, lambda x: x['description'].replace(r'\r\n', '\n')),
                        'duration': float_or_none(episode.get('duration_ms'), 1000),
                        'series': episode.get('series_title'),
                        'series_id': episode.get('series_id'),
                        'season': episode.get('season_title'),
                        'season_id': episode.get('season_id'),
                        'season_number': episode.get('season_number'),
                        'episode': episode.get('title'),
                        'episode_number': episode.get('sequence_number')
                    }

        return self.playlist_result(entries(), internal_id, series_response.get('title'))
Commit	Line	Data
706dfe44	1	import base64
cb1553e9	2	import urllib.parse
ac668111	3
46279958	4	from .common import InfoExtractor
1cc79574 PH	5	from ..utils import (
1cc79574 PH	6	ExtractorError,
54a5be4d	7	float_or_none,
706dfe44	8	format_field,
706dfe44	9	join_nonempty,
b99ba3df	10	parse_iso8601,
a9d4da60	11	qualities,
706dfe44	12	traverse_obj,
245d43ca	13	try_get,
c8434e83	14	)
c8434e83	15
34440095	16
46279958	17	class CrunchyrollBaseIE(InfoExtractor):
7c74a015 JH	18	_LOGIN_URL = 'https://www.crunchyroll.com/welcome/login'
7c74a015 JH	19	_API_BASE = 'https://api.crunchyroll.com'
80f48920	20	_NETRC_MACHINE = 'crunchyroll'
cb1553e9	21	params = None
05dee6c5	22
52efa4b3	23	def _perform_login(self, username, password):
7c74a015	24	if self._get_cookies(self._LOGIN_URL).get('etp_rt'):
eb5b1fc0 S	25	return
eb5b1fc0 S	26
7c74a015 JH	27	upsell_response = self._download_json(
	28	f'{self._API_BASE}/get_upsell_data.0.json', None, 'Getting session id',
	29	query={
	30	'sess_id': 1,
	31	'device_id': 'whatvalueshouldbeforweb',
	32	'device_type': 'com.crunchyroll.static',
	33	'access_token': 'giKq5eY27ny3cqz',
	34	'referer': self._LOGIN_URL
	35	})
	36	if upsell_response['code'] != 'ok':
	37	raise ExtractorError('Could not get session id')
	38	session_id = upsell_response['data']['session_id']
	39
	40	login_response = self._download_json(
	41	f'{self._API_BASE}/login.1.json', None, 'Logging in',
cb1553e9	42	data=urllib.parse.urlencode({
7c74a015 JH	43	'account': username,
	44	'password': password,
	45	'session_id': session_id
	46	}).encode('ascii'))
	47	if login_response['code'] != 'ok':
97bef011	48	raise ExtractorError('Login failed. Server message: %s' % login_response['message'], expected=True)
7c74a015 JH	49	if not self._get_cookies(self._LOGIN_URL).get('etp_rt'):
7c74a015 JH	50	raise ExtractorError('Login succeeded but did not set etp_rt cookie')
80f48920	51
cb1553e9	52	def _get_embedded_json(self, webpage, display_id):
f4d706a9 JH	53	initial_state = self._parse_json(self._search_regex(
	54	r'__INITIAL_STATE__\s=\s({.+?})\s*;', webpage, 'initial state'), display_id)
	55	app_config = self._parse_json(self._search_regex(
	56	r'__APP_CONFIG__\s=\s({.+?})\s*;', webpage, 'app config'), display_id)
	57	return initial_state, app_config
	58
f4d706a9	59	def _get_params(self, lang):
cb1553e9 JH	60	if not CrunchyrollBaseIE.params:
cb1553e9 JH	61	if self._get_cookies(f'https://www.crunchyroll.com/{lang}').get('etp_rt'):
459262ac JH	62	grant_type, key = 'etp_rt_cookie', 'accountAuthClientId'
	63	else:
	64	grant_type, key = 'client_id', 'anonClientId'
	65
cb1553e9 JH	66	initial_state, app_config = self._get_embedded_json(self._download_webpage(
	67	f'https://www.crunchyroll.com/{lang}', None, note='Retrieving main page'), None)
	68	api_domain = app_config['cxApiParams']['apiDomain'].replace('beta.crunchyroll.com', 'www.crunchyroll.com')
459262ac	69
f4d706a9	70	auth_response = self._download_json(
459262ac	71	f'{api_domain}/auth/v1/token', None, note=f'Authenticating with grant_type={grant_type}',
f4d706a9	72	headers={
459262ac JH	73	'Authorization': 'Basic ' + str(base64.b64encode(('%s:' % app_config['cxApiParams'][key]).encode('ascii')), 'ascii')
459262ac JH	74	}, data=f'grant_type={grant_type}'.encode('ascii'))
f4d706a9 JH	75	policy_response = self._download_json(
	76	f'{api_domain}/index/v2', None, note='Retrieving signed policy',
	77	headers={
	78	'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token']
	79	})
cb1553e9	80	cms = policy_response.get('cms_web')
88d62206	81	bucket = cms['bucket']
f4d706a9	82	params = {
88d62206 JH	83	'Policy': cms['policy'],
	84	'Signature': cms['signature'],
	85	'Key-Pair-Id': cms['key_pair_id']
f4d706a9 JH	86	}
	87	locale = traverse_obj(initial_state, ('localization', 'locale'))
	88	if locale:
	89	params['locale'] = locale
cb1553e9 JH	90	CrunchyrollBaseIE.params = (api_domain, bucket, params)
cb1553e9 JH	91	return CrunchyrollBaseIE.params
f4d706a9	92
f4d706a9	93
cb1553e9 JH	94	class CrunchyrollBetaIE(CrunchyrollBaseIE):
cb1553e9 JH	95	IE_NAME = 'crunchyroll'
5da42f2b	96	_VALID_URL = r'''(?x)
cb1553e9	97	https?://(?:beta\|www)\.crunchyroll\.com/
5da42f2b	98	(?P<lang>(?:\w{2}(?:-\w{2})?/)?)
	99	watch/(?P<id>\w+)
	100	(?:/(?P<display_id>[\w-]+))?/?(?:[?#]\|$)'''
dd078970	101	_TESTS = [{
cb1553e9	102	'url': 'https://www.crunchyroll.com/watch/GY2P1Q98Y/to-the-future',
dd078970	103	'info_dict': {
b99ba3df	104	'id': 'GY2P1Q98Y',
dd078970	105	'ext': 'mp4',
b99ba3df JH	106	'duration': 1380.241,
b99ba3df JH	107	'timestamp': 1459632600,
dd078970	108	'description': 'md5:a022fbec4fbb023d43631032c91ed64b',
dd078970	109	'title': 'World Trigger Episode 73 – To the Future',
dd078970	110	'upload_date': '20160402',
f4d706a9	111	'series': 'World Trigger',
b99ba3df	112	'series_id': 'GR757DMKY',
f4d706a9	113	'season': 'World Trigger',
b99ba3df	114	'season_id': 'GR9P39NJ6',
f4d706a9	115	'season_number': 1,
b99ba3df JH	116	'episode': 'To the Future',
b99ba3df JH	117	'episode_number': 73,
cb1553e9	118	'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg$',
dd078970	119	},
dfea94f8 SS	120	'params': {'skip_download': 'm3u8', 'format': 'all[format_id~=hardsub]'},
dfea94f8 SS	121	}, {
cb1553e9	122	'url': 'https://www.crunchyroll.com/watch/GYE5WKQGR',
dfea94f8 SS	123	'info_dict': {
	124	'id': 'GYE5WKQGR',
	125	'ext': 'mp4',
	126	'duration': 366.459,
	127	'timestamp': 1476788400,
	128	'description': 'md5:74b67283ffddd75f6e224ca7dc031e76',
	129	'title': 'SHELTER Episode – Porter Robinson presents Shelter the Animation',
	130	'upload_date': '20161018',
	131	'series': 'SHELTER',
	132	'series_id': 'GYGG09WWY',
	133	'season': 'SHELTER',
	134	'season_id': 'GR09MGK4R',
	135	'season_number': 1,
	136	'episode': 'Porter Robinson presents Shelter the Animation',
	137	'episode_number': 0,
cb1553e9	138	'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg$',
dfea94f8 SS	139	},
	140	'params': {'skip_download': True},
	141	'skip': 'Video is Premium only',
f4d706a9	142	}, {
cb1553e9	143	'url': 'https://www.crunchyroll.com/watch/GY2P1Q98Y',
f4d706a9	144	'only_matching': True,
964b5493	145	}, {
	146	'url': 'https://beta.crunchyroll.com/pt-br/watch/G8WUN8VKP/the-ruler-of-conspiracy',
	147	'only_matching': True,
dd078970	148	}]
	149
	150	def _real_extract(self, url):
f4d706a9	151	lang, internal_id, display_id = self._match_valid_url(url).group('lang', 'id', 'display_id')
f4d706a9 JH	152	api_domain, bucket, params = self._get_params(lang)
f4d706a9 JH	153
706dfe44 JH	154	episode_response = self._download_json(
706dfe44 JH	155	f'{api_domain}/cms/v2{bucket}/episodes/{internal_id}', display_id,
7d0f6f0c	156	note='Retrieving episode metadata', query=params)
706dfe44 JH	157	if episode_response.get('is_premium_only') and not episode_response.get('playback'):
706dfe44 JH	158	raise ExtractorError('This video is for premium members only.', expected=True)
706dfe44	159
f62f553d JH	160	stream_response = self._download_json(
	161	f'{api_domain}{episode_response["__links__"]["streams"]["href"]}', display_id,
	162	note='Retrieving stream info', query=params)
7d0f6f0c	163	get_streams = lambda name: (traverse_obj(stream_response, name) or {}).items()
706dfe44 JH	164
	165	requested_hardsubs = [('' if val == 'none' else val) for val in (self._configuration_arg('hardsub') or ['none'])]
	166	hardsub_preference = qualities(requested_hardsubs[::-1])
	167	requested_formats = self._configuration_arg('format') or ['adaptive_hls']
	168
dfea94f8	169	available_formats = {}
7d0f6f0c	170	for stream_type, streams in get_streams('streams'):
706dfe44 JH	171	if stream_type not in requested_formats:
	172	continue
	173	for stream in streams.values():
706dfe44 JH	174	if not stream.get('url'):
706dfe44 JH	175	continue
dfea94f8 SS	176	hardsub_lang = stream.get('hardsub_locale') or ''
	177	format_id = join_nonempty(stream_type, format_field(stream, 'hardsub_locale', 'hardsub-%s'))
	178	available_formats[hardsub_lang] = (stream_type, format_id, hardsub_lang, stream['url'])
	179
	180	if '' in available_formats and 'all' not in requested_hardsubs:
	181	full_format_langs = set(requested_hardsubs)
	182	self.to_screen(
	183	'To get all formats of a hardsub language, use '
	184	'"--extractor-args crunchyrollbeta:hardsub=<language_code or all>". '
	185	'See https://github.com/yt-dlp/yt-dlp#crunchyrollbeta for more info',
	186	only_once=True)
	187	else:
	188	full_format_langs = set(map(str.lower, available_formats))
	189
	190	formats = []
	191	for stream_type, format_id, hardsub_lang, stream_url in available_formats.values():
	192	if stream_type.endswith('hls'):
	193	if hardsub_lang.lower() in full_format_langs:
706dfe44	194	adaptive_formats = self._extract_m3u8_formats(
dfea94f8	195	stream_url, display_id, 'mp4', m3u8_id=format_id,
7d0f6f0c	196	fatal=False, note=f'Downloading {format_id} HLS manifest')
dfea94f8 SS	197	else:
	198	adaptive_formats = (self._m3u8_meta_format(stream_url, ext='mp4', m3u8_id=format_id),)
	199	elif stream_type.endswith('dash'):
	200	adaptive_formats = self._extract_mpd_formats(
	201	stream_url, display_id, mpd_id=format_id,
	202	fatal=False, note=f'Downloading {format_id} MPD manifest')
	203	else:
	204	self.report_warning(f'Encountered unknown stream_type: {stream_type!r}', display_id, only_once=True)
	205	continue
	206	for f in adaptive_formats:
	207	if f.get('acodec') != 'none':
	208	f['language'] = stream_response.get('audio_locale')
	209	f['quality'] = hardsub_preference(hardsub_lang.lower())
	210	formats.extend(adaptive_formats)
706dfe44 JH	211
	212	return {
	213	'id': internal_id,
7d0f6f0c B	214	'title': '%s Episode %s – %s' % (
	215	episode_response.get('season_title'), episode_response.get('episode'), episode_response.get('title')),
	216	'description': try_get(episode_response, lambda x: x['description'].replace(r'\r\n', '\n')),
706dfe44	217	'duration': float_or_none(episode_response.get('duration_ms'), 1000),
b99ba3df	218	'timestamp': parse_iso8601(episode_response.get('upload_date')),
706dfe44 JH	219	'series': episode_response.get('series_title'),
	220	'series_id': episode_response.get('series_id'),
	221	'season': episode_response.get('season_title'),
	222	'season_id': episode_response.get('season_id'),
	223	'season_number': episode_response.get('season_number'),
	224	'episode': episode_response.get('title'),
	225	'episode_number': episode_response.get('sequence_number'),
7d0f6f0c B	226	'formats': formats,
	227	'thumbnails': [{
	228	'url': thumb.get('source'),
	229	'width': thumb.get('width'),
	230	'height': thumb.get('height'),
	231	} for thumb in traverse_obj(episode_response, ('images', 'thumbnail', ..., ...)) or []],
	232	'subtitles': {
	233	lang: [{
	234	'url': subtitle_data.get('url'),
	235	'ext': subtitle_data.get('format')
	236	}] for lang, subtitle_data in get_streams('subtitles')
	237	},
706dfe44	238	}
dd078970	239
dd078970	240
cb1553e9 JH	241	class CrunchyrollBetaShowIE(CrunchyrollBaseIE):
cb1553e9 JH	242	IE_NAME = 'crunchyroll:playlist'
5da42f2b	243	_VALID_URL = r'''(?x)
cb1553e9	244	https?://(?:beta\|www)\.crunchyroll\.com/
5da42f2b	245	(?P<lang>(?:\w{2}(?:-\w{2})?/)?)
	246	series/(?P<id>\w+)
	247	(?:/(?P<display_id>[\w-]+))?/?(?:[?#]\|$)'''
dd078970	248	_TESTS = [{
cb1553e9	249	'url': 'https://www.crunchyroll.com/series/GY19NQ2QR/Girl-Friend-BETA',
dd078970	250	'info_dict': {
b99ba3df	251	'id': 'GY19NQ2QR',
dd078970	252	'title': 'Girl Friend BETA',
	253	},
	254	'playlist_mincount': 10,
	255	}, {
5da42f2b	256	'url': 'https://beta.crunchyroll.com/it/series/GY19NQ2QR',
dd078970	257	'only_matching': True,
	258	}]
	259
	260	def _real_extract(self, url):
f4d706a9	261	lang, internal_id, display_id = self._match_valid_url(url).group('lang', 'id', 'display_id')
f4d706a9 JH	262	api_domain, bucket, params = self._get_params(lang)
	263
	264	series_response = self._download_json(
	265	f'{api_domain}/cms/v2{bucket}/series/{internal_id}', display_id,
	266	note='Retrieving series metadata', query=params)
	267
	268	seasons_response = self._download_json(
	269	f'{api_domain}/cms/v2{bucket}/seasons?series_id={internal_id}', display_id,
	270	note='Retrieving season list', query=params)
	271
	272	def entries():
	273	for season in seasons_response['items']:
	274	episodes_response = self._download_json(
	275	f'{api_domain}/cms/v2{bucket}/episodes?season_id={season["id"]}', display_id,
	276	note=f'Retrieving episode list for {season.get("slug_title")}', query=params)
	277	for episode in episodes_response['items']:
	278	episode_id = episode['id']
	279	episode_display_id = episode['slug_title']
	280	yield {
	281	'_type': 'url',
cb1553e9	282	'url': f'https://www.crunchyroll.com/{lang}watch/{episode_id}/{episode_display_id}',
f4d706a9 JH	283	'ie_key': CrunchyrollBetaIE.ie_key(),
	284	'id': episode_id,
	285	'title': '%s Episode %s – %s' % (episode.get('season_title'), episode.get('episode'), episode.get('title')),
	286	'description': try_get(episode, lambda x: x['description'].replace(r'\r\n', '\n')),
	287	'duration': float_or_none(episode.get('duration_ms'), 1000),
	288	'series': episode.get('series_title'),
	289	'series_id': episode.get('series_id'),
	290	'season': episode.get('season_title'),
	291	'season_id': episode.get('season_id'),
	292	'season_number': episode.get('season_number'),
	293	'episode': episode.get('title'),
	294	'episode_number': episode.get('sequence_number')
	295	}
	296
	297	return self.playlist_result(entries(), internal_id, series_response.get('title'))