4 from .common
import InfoExtractor
17 class CrunchyrollBaseIE(InfoExtractor
):
18 _LOGIN_URL
= 'https://www.crunchyroll.com/welcome/login'
19 _API_BASE
= 'https://api.crunchyroll.com'
20 _NETRC_MACHINE
= 'crunchyroll'
23 def _perform_login(self
, username
, password
):
24 if self
._get
_cookies
(self
._LOGIN
_URL
).get('etp_rt'):
27 upsell_response
= self
._download
_json
(
28 f
'{self._API_BASE}/get_upsell_data.0.json', None, 'Getting session id',
31 'device_id': 'whatvalueshouldbeforweb',
32 'device_type': 'com.crunchyroll.static',
33 'access_token': 'giKq5eY27ny3cqz',
34 'referer': self
._LOGIN
_URL
36 if upsell_response
['code'] != 'ok':
37 raise ExtractorError('Could not get session id')
38 session_id
= upsell_response
['data']['session_id']
40 login_response
= self
._download
_json
(
41 f
'{self._API_BASE}/login.1.json', None, 'Logging in',
42 data
=urllib
.parse
.urlencode({
45 'session_id': session_id
47 if login_response
['code'] != 'ok':
48 raise ExtractorError('Login failed. Server message: %s' % login_response
['message'], expected
=True)
49 if not self
._get
_cookies
(self
._LOGIN
_URL
).get('etp_rt'):
50 raise ExtractorError('Login succeeded but did not set etp_rt cookie')
52 def _get_embedded_json(self
, webpage
, display_id
):
53 initial_state
= self
._parse
_json
(self
._search
_regex
(
54 r
'__INITIAL_STATE__\s*=\s*({.+?})\s*;', webpage
, 'initial state'), display_id
)
55 app_config
= self
._parse
_json
(self
._search
_regex
(
56 r
'__APP_CONFIG__\s*=\s*({.+?})\s*;', webpage
, 'app config'), display_id
)
57 return initial_state
, app_config
59 def _get_params(self
, lang
):
60 if not CrunchyrollBaseIE
.params
:
61 if self
._get
_cookies
(f
'https://www.crunchyroll.com/{lang}').get('etp_rt'):
62 grant_type
, key
= 'etp_rt_cookie', 'accountAuthClientId'
64 grant_type
, key
= 'client_id', 'anonClientId'
66 initial_state
, app_config
= self
._get
_embedded
_json
(self
._download
_webpage
(
67 f
'https://www.crunchyroll.com/{lang}', None, note
='Retrieving main page'), None)
68 api_domain
= app_config
['cxApiParams']['apiDomain'].replace('beta.crunchyroll.com', 'www.crunchyroll.com')
70 auth_response
= self
._download
_json
(
71 f
'{api_domain}/auth/v1/token', None, note
=f
'Authenticating with grant_type={grant_type}',
73 'Authorization': 'Basic ' + str(base64
.b64encode(('%s:' % app_config
['cxApiParams'][key
]).encode('ascii')), 'ascii')
74 }, data
=f
'grant_type={grant_type}'.encode('ascii'))
75 policy_response
= self
._download
_json
(
76 f
'{api_domain}/index/v2', None, note
='Retrieving signed policy',
78 'Authorization': auth_response
['token_type'] + ' ' + auth_response
['access_token']
80 cms
= policy_response
.get('cms_web')
81 bucket
= cms
['bucket']
83 'Policy': cms
['policy'],
84 'Signature': cms
['signature'],
85 'Key-Pair-Id': cms
['key_pair_id']
87 locale
= traverse_obj(initial_state
, ('localization', 'locale'))
89 params
['locale'] = locale
90 CrunchyrollBaseIE
.params
= (api_domain
, bucket
, params
)
91 return CrunchyrollBaseIE
.params
94 class CrunchyrollBetaIE(CrunchyrollBaseIE
):
95 IE_NAME
= 'crunchyroll'
97 https?://(?:beta|www)\.crunchyroll\.com/
98 (?P<lang>(?:\w{2}(?:-\w{2})?/)?)
100 (?:/(?P<display_id>[\w-]+))?/?(?:[?#]|$)'''
102 'url': 'https://www.crunchyroll.com/watch/GY2P1Q98Y/to-the-future',
106 'duration': 1380.241,
107 'timestamp': 1459632600,
108 'description': 'md5:a022fbec4fbb023d43631032c91ed64b',
109 'title': 'World Trigger Episode 73 – To the Future',
110 'upload_date': '20160402',
111 'series': 'World Trigger',
112 'series_id': 'GR757DMKY',
113 'season': 'World Trigger',
114 'season_id': 'GR9P39NJ6',
116 'episode': 'To the Future',
117 'episode_number': 73,
118 'thumbnail': r
're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg$',
120 'params': {'skip_download': 'm3u8', 'format': 'all[format_id~=hardsub]'}
,
122 'url': 'https://www.crunchyroll.com/watch/GYE5WKQGR',
127 'timestamp': 1476788400,
128 'description': 'md5:74b67283ffddd75f6e224ca7dc031e76',
129 'title': 'SHELTER Episode – Porter Robinson presents Shelter the Animation',
130 'upload_date': '20161018',
132 'series_id': 'GYGG09WWY',
134 'season_id': 'GR09MGK4R',
136 'episode': 'Porter Robinson presents Shelter the Animation',
138 'thumbnail': r
're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg$',
140 'params': {'skip_download': True}
,
141 'skip': 'Video is Premium only',
143 'url': 'https://www.crunchyroll.com/watch/GY2P1Q98Y',
144 'only_matching': True,
146 'url': 'https://beta.crunchyroll.com/pt-br/watch/G8WUN8VKP/the-ruler-of-conspiracy',
147 'only_matching': True,
150 def _real_extract(self
, url
):
151 lang
, internal_id
, display_id
= self
._match
_valid
_url
(url
).group('lang', 'id', 'display_id')
152 api_domain
, bucket
, params
= self
._get
_params
(lang
)
154 episode_response
= self
._download
_json
(
155 f
'{api_domain}/cms/v2{bucket}/episodes/{internal_id}', display_id
,
156 note
='Retrieving episode metadata', query
=params
)
157 if episode_response
.get('is_premium_only') and not episode_response
.get('playback'):
158 raise ExtractorError('This video is for premium members only.', expected
=True)
160 stream_response
= self
._download
_json
(
161 f
'{api_domain}{episode_response["__links__"]["streams"]["href"]}', display_id
,
162 note
='Retrieving stream info', query
=params
)
163 get_streams
= lambda name
: (traverse_obj(stream_response
, name
) or {}).items()
165 requested_hardsubs
= [('' if val
== 'none' else val
) for val
in (self
._configuration
_arg
('hardsub') or ['none'])]
166 hardsub_preference
= qualities(requested_hardsubs
[::-1])
167 requested_formats
= self
._configuration
_arg
('format') or ['adaptive_hls']
169 available_formats
= {}
170 for stream_type
, streams
in get_streams('streams'):
171 if stream_type
not in requested_formats
:
173 for stream
in streams
.values():
174 if not stream
.get('url'):
176 hardsub_lang
= stream
.get('hardsub_locale') or ''
177 format_id
= join_nonempty(stream_type
, format_field(stream
, 'hardsub_locale', 'hardsub-%s'))
178 available_formats
[hardsub_lang
] = (stream_type
, format_id
, hardsub_lang
, stream
['url'])
180 if '' in available_formats
and 'all' not in requested_hardsubs
:
181 full_format_langs
= set(requested_hardsubs
)
183 'To get all formats of a hardsub language, use '
184 '"--extractor-args crunchyrollbeta:hardsub=<language_code or all>". '
185 'See https://github.com/yt-dlp/yt-dlp#crunchyrollbeta for more info',
188 full_format_langs
= set(map(str.lower
, available_formats
))
191 for stream_type
, format_id
, hardsub_lang
, stream_url
in available_formats
.values():
192 if stream_type
.endswith('hls'):
193 if hardsub_lang
.lower() in full_format_langs
:
194 adaptive_formats
= self
._extract
_m
3u8_formats
(
195 stream_url
, display_id
, 'mp4', m3u8_id
=format_id
,
196 fatal
=False, note
=f
'Downloading {format_id} HLS manifest')
198 adaptive_formats
= (self
._m
3u8_meta
_format
(stream_url
, ext
='mp4', m3u8_id
=format_id
),)
199 elif stream_type
.endswith('dash'):
200 adaptive_formats
= self
._extract
_mpd
_formats
(
201 stream_url
, display_id
, mpd_id
=format_id
,
202 fatal
=False, note
=f
'Downloading {format_id} MPD manifest')
204 self
.report_warning(f
'Encountered unknown stream_type: {stream_type!r}', display_id
, only_once
=True)
206 for f
in adaptive_formats
:
207 if f
.get('acodec') != 'none':
208 f
['language'] = stream_response
.get('audio_locale')
209 f
['quality'] = hardsub_preference(hardsub_lang
.lower())
210 formats
.extend(adaptive_formats
)
211 self
._sort
_formats
(formats
)
215 'title': '%s Episode %s – %s' % (
216 episode_response
.get('season_title'), episode_response
.get('episode'), episode_response
.get('title')),
217 'description': try_get(episode_response
, lambda x
: x
['description'].replace(r
'\r\n', '\n')),
218 'duration': float_or_none(episode_response
.get('duration_ms'), 1000),
219 'timestamp': parse_iso8601(episode_response
.get('upload_date')),
220 'series': episode_response
.get('series_title'),
221 'series_id': episode_response
.get('series_id'),
222 'season': episode_response
.get('season_title'),
223 'season_id': episode_response
.get('season_id'),
224 'season_number': episode_response
.get('season_number'),
225 'episode': episode_response
.get('title'),
226 'episode_number': episode_response
.get('sequence_number'),
229 'url': thumb
.get('source'),
230 'width': thumb
.get('width'),
231 'height': thumb
.get('height'),
232 } for thumb
in traverse_obj(episode_response
, ('images', 'thumbnail', ..., ...)) or []],
235 'url': subtitle_data
.get('url'),
236 'ext': subtitle_data
.get('format')
237 }] for lang
, subtitle_data
in get_streams('subtitles')
242 class CrunchyrollBetaShowIE(CrunchyrollBaseIE
):
243 IE_NAME
= 'crunchyroll:playlist'
244 _VALID_URL
= r
'''(?x)
245 https?://(?:beta|www)\.crunchyroll\.com/
246 (?P<lang>(?:\w{2}(?:-\w{2})?/)?)
248 (?:/(?P<display_id>[\w-]+))?/?(?:[?#]|$)'''
250 'url': 'https://www.crunchyroll.com/series/GY19NQ2QR/Girl-Friend-BETA',
253 'title': 'Girl Friend BETA',
255 'playlist_mincount': 10,
257 'url': 'https://beta.crunchyroll.com/it/series/GY19NQ2QR',
258 'only_matching': True,
261 def _real_extract(self
, url
):
262 lang
, internal_id
, display_id
= self
._match
_valid
_url
(url
).group('lang', 'id', 'display_id')
263 api_domain
, bucket
, params
= self
._get
_params
(lang
)
265 series_response
= self
._download
_json
(
266 f
'{api_domain}/cms/v2{bucket}/series/{internal_id}', display_id
,
267 note
='Retrieving series metadata', query
=params
)
269 seasons_response
= self
._download
_json
(
270 f
'{api_domain}/cms/v2{bucket}/seasons?series_id={internal_id}', display_id
,
271 note
='Retrieving season list', query
=params
)
274 for season
in seasons_response
['items']:
275 episodes_response
= self
._download
_json
(
276 f
'{api_domain}/cms/v2{bucket}/episodes?season_id={season["id"]}', display_id
,
277 note
=f
'Retrieving episode list for {season.get("slug_title")}', query
=params
)
278 for episode
in episodes_response
['items']:
279 episode_id
= episode
['id']
280 episode_display_id
= episode
['slug_title']
283 'url': f
'https://www.crunchyroll.com/{lang}watch/{episode_id}/{episode_display_id}',
284 'ie_key': CrunchyrollBetaIE
.ie_key(),
286 'title': '%s Episode %s – %s' % (episode
.get('season_title'), episode
.get('episode'), episode
.get('title')),
287 'description': try_get(episode
, lambda x
: x
['description'].replace(r
'\r\n', '\n')),
288 'duration': float_or_none(episode
.get('duration_ms'), 1000),
289 'series': episode
.get('series_title'),
290 'series_id': episode
.get('series_id'),
291 'season': episode
.get('season_title'),
292 'season_id': episode
.get('season_id'),
293 'season_number': episode
.get('season_number'),
294 'episode': episode
.get('title'),
295 'episode_number': episode
.get('sequence_number')
298 return self
.playlist_result(entries(), internal_id
, series_response
.get('title'))