]>
Commit | Line | Data |
---|---|---|
706dfe44 | 1 | import base64 |
cb1553e9 | 2 | import urllib.parse |
ac668111 | 3 | |
46279958 | 4 | from .common import InfoExtractor |
1cc79574 PH |
5 | from ..utils import ( |
6 | ExtractorError, | |
54a5be4d | 7 | float_or_none, |
706dfe44 | 8 | format_field, |
706dfe44 | 9 | join_nonempty, |
b99ba3df | 10 | parse_iso8601, |
a9d4da60 | 11 | qualities, |
706dfe44 | 12 | traverse_obj, |
245d43ca | 13 | try_get, |
c8434e83 | 14 | ) |
c8434e83 | 15 | |
34440095 | 16 | |
46279958 | 17 | class CrunchyrollBaseIE(InfoExtractor): |
7c74a015 JH |
18 | _LOGIN_URL = 'https://www.crunchyroll.com/welcome/login' |
19 | _API_BASE = 'https://api.crunchyroll.com' | |
80f48920 | 20 | _NETRC_MACHINE = 'crunchyroll' |
cb1553e9 | 21 | params = None |
05dee6c5 | 22 | |
52efa4b3 | 23 | def _perform_login(self, username, password): |
7c74a015 | 24 | if self._get_cookies(self._LOGIN_URL).get('etp_rt'): |
eb5b1fc0 S |
25 | return |
26 | ||
7c74a015 JH |
27 | upsell_response = self._download_json( |
28 | f'{self._API_BASE}/get_upsell_data.0.json', None, 'Getting session id', | |
29 | query={ | |
30 | 'sess_id': 1, | |
31 | 'device_id': 'whatvalueshouldbeforweb', | |
32 | 'device_type': 'com.crunchyroll.static', | |
33 | 'access_token': 'giKq5eY27ny3cqz', | |
34 | 'referer': self._LOGIN_URL | |
35 | }) | |
36 | if upsell_response['code'] != 'ok': | |
37 | raise ExtractorError('Could not get session id') | |
38 | session_id = upsell_response['data']['session_id'] | |
39 | ||
40 | login_response = self._download_json( | |
41 | f'{self._API_BASE}/login.1.json', None, 'Logging in', | |
cb1553e9 | 42 | data=urllib.parse.urlencode({ |
7c74a015 JH |
43 | 'account': username, |
44 | 'password': password, | |
45 | 'session_id': session_id | |
46 | }).encode('ascii')) | |
47 | if login_response['code'] != 'ok': | |
97bef011 | 48 | raise ExtractorError('Login failed. Server message: %s' % login_response['message'], expected=True) |
7c74a015 JH |
49 | if not self._get_cookies(self._LOGIN_URL).get('etp_rt'): |
50 | raise ExtractorError('Login succeeded but did not set etp_rt cookie') | |
80f48920 | 51 | |
cb1553e9 | 52 | def _get_embedded_json(self, webpage, display_id): |
f4d706a9 JH |
53 | initial_state = self._parse_json(self._search_regex( |
54 | r'__INITIAL_STATE__\s*=\s*({.+?})\s*;', webpage, 'initial state'), display_id) | |
55 | app_config = self._parse_json(self._search_regex( | |
56 | r'__APP_CONFIG__\s*=\s*({.+?})\s*;', webpage, 'app config'), display_id) | |
57 | return initial_state, app_config | |
58 | ||
f4d706a9 | 59 | def _get_params(self, lang): |
cb1553e9 JH |
60 | if not CrunchyrollBaseIE.params: |
61 | if self._get_cookies(f'https://www.crunchyroll.com/{lang}').get('etp_rt'): | |
459262ac JH |
62 | grant_type, key = 'etp_rt_cookie', 'accountAuthClientId' |
63 | else: | |
64 | grant_type, key = 'client_id', 'anonClientId' | |
65 | ||
cb1553e9 JH |
66 | initial_state, app_config = self._get_embedded_json(self._download_webpage( |
67 | f'https://www.crunchyroll.com/{lang}', None, note='Retrieving main page'), None) | |
68 | api_domain = app_config['cxApiParams']['apiDomain'].replace('beta.crunchyroll.com', 'www.crunchyroll.com') | |
459262ac | 69 | |
f4d706a9 | 70 | auth_response = self._download_json( |
459262ac | 71 | f'{api_domain}/auth/v1/token', None, note=f'Authenticating with grant_type={grant_type}', |
f4d706a9 | 72 | headers={ |
459262ac JH |
73 | 'Authorization': 'Basic ' + str(base64.b64encode(('%s:' % app_config['cxApiParams'][key]).encode('ascii')), 'ascii') |
74 | }, data=f'grant_type={grant_type}'.encode('ascii')) | |
f4d706a9 JH |
75 | policy_response = self._download_json( |
76 | f'{api_domain}/index/v2', None, note='Retrieving signed policy', | |
77 | headers={ | |
78 | 'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token'] | |
79 | }) | |
cb1553e9 | 80 | cms = policy_response.get('cms_web') |
88d62206 | 81 | bucket = cms['bucket'] |
f4d706a9 | 82 | params = { |
88d62206 JH |
83 | 'Policy': cms['policy'], |
84 | 'Signature': cms['signature'], | |
85 | 'Key-Pair-Id': cms['key_pair_id'] | |
f4d706a9 JH |
86 | } |
87 | locale = traverse_obj(initial_state, ('localization', 'locale')) | |
88 | if locale: | |
89 | params['locale'] = locale | |
cb1553e9 JH |
90 | CrunchyrollBaseIE.params = (api_domain, bucket, params) |
91 | return CrunchyrollBaseIE.params | |
f4d706a9 | 92 | |
f4d706a9 | 93 | |
cb1553e9 JH |
94 | class CrunchyrollBetaIE(CrunchyrollBaseIE): |
95 | IE_NAME = 'crunchyroll' | |
5da42f2b | 96 | _VALID_URL = r'''(?x) |
cb1553e9 | 97 | https?://(?:beta|www)\.crunchyroll\.com/ |
5da42f2b | 98 | (?P<lang>(?:\w{2}(?:-\w{2})?/)?) |
99 | watch/(?P<id>\w+) | |
100 | (?:/(?P<display_id>[\w-]+))?/?(?:[?#]|$)''' | |
dd078970 | 101 | _TESTS = [{ |
cb1553e9 | 102 | 'url': 'https://www.crunchyroll.com/watch/GY2P1Q98Y/to-the-future', |
dd078970 | 103 | 'info_dict': { |
b99ba3df | 104 | 'id': 'GY2P1Q98Y', |
dd078970 | 105 | 'ext': 'mp4', |
b99ba3df JH |
106 | 'duration': 1380.241, |
107 | 'timestamp': 1459632600, | |
dd078970 | 108 | 'description': 'md5:a022fbec4fbb023d43631032c91ed64b', |
dd078970 | 109 | 'title': 'World Trigger Episode 73 – To the Future', |
110 | 'upload_date': '20160402', | |
f4d706a9 | 111 | 'series': 'World Trigger', |
b99ba3df | 112 | 'series_id': 'GR757DMKY', |
f4d706a9 | 113 | 'season': 'World Trigger', |
b99ba3df | 114 | 'season_id': 'GR9P39NJ6', |
f4d706a9 | 115 | 'season_number': 1, |
b99ba3df JH |
116 | 'episode': 'To the Future', |
117 | 'episode_number': 73, | |
cb1553e9 | 118 | 'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg$', |
dd078970 | 119 | }, |
dfea94f8 SS |
120 | 'params': {'skip_download': 'm3u8', 'format': 'all[format_id~=hardsub]'}, |
121 | }, { | |
cb1553e9 | 122 | 'url': 'https://www.crunchyroll.com/watch/GYE5WKQGR', |
dfea94f8 SS |
123 | 'info_dict': { |
124 | 'id': 'GYE5WKQGR', | |
125 | 'ext': 'mp4', | |
126 | 'duration': 366.459, | |
127 | 'timestamp': 1476788400, | |
128 | 'description': 'md5:74b67283ffddd75f6e224ca7dc031e76', | |
129 | 'title': 'SHELTER Episode – Porter Robinson presents Shelter the Animation', | |
130 | 'upload_date': '20161018', | |
131 | 'series': 'SHELTER', | |
132 | 'series_id': 'GYGG09WWY', | |
133 | 'season': 'SHELTER', | |
134 | 'season_id': 'GR09MGK4R', | |
135 | 'season_number': 1, | |
136 | 'episode': 'Porter Robinson presents Shelter the Animation', | |
137 | 'episode_number': 0, | |
cb1553e9 | 138 | 'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg$', |
dfea94f8 SS |
139 | }, |
140 | 'params': {'skip_download': True}, | |
141 | 'skip': 'Video is Premium only', | |
f4d706a9 | 142 | }, { |
cb1553e9 | 143 | 'url': 'https://www.crunchyroll.com/watch/GY2P1Q98Y', |
f4d706a9 | 144 | 'only_matching': True, |
964b5493 | 145 | }, { |
146 | 'url': 'https://beta.crunchyroll.com/pt-br/watch/G8WUN8VKP/the-ruler-of-conspiracy', | |
147 | 'only_matching': True, | |
dd078970 | 148 | }] |
149 | ||
150 | def _real_extract(self, url): | |
f4d706a9 | 151 | lang, internal_id, display_id = self._match_valid_url(url).group('lang', 'id', 'display_id') |
f4d706a9 JH |
152 | api_domain, bucket, params = self._get_params(lang) |
153 | ||
706dfe44 JH |
154 | episode_response = self._download_json( |
155 | f'{api_domain}/cms/v2{bucket}/episodes/{internal_id}', display_id, | |
7d0f6f0c | 156 | note='Retrieving episode metadata', query=params) |
706dfe44 JH |
157 | if episode_response.get('is_premium_only') and not episode_response.get('playback'): |
158 | raise ExtractorError('This video is for premium members only.', expected=True) | |
706dfe44 | 159 | |
f62f553d JH |
160 | stream_response = self._download_json( |
161 | f'{api_domain}{episode_response["__links__"]["streams"]["href"]}', display_id, | |
162 | note='Retrieving stream info', query=params) | |
7d0f6f0c | 163 | get_streams = lambda name: (traverse_obj(stream_response, name) or {}).items() |
706dfe44 JH |
164 | |
165 | requested_hardsubs = [('' if val == 'none' else val) for val in (self._configuration_arg('hardsub') or ['none'])] | |
166 | hardsub_preference = qualities(requested_hardsubs[::-1]) | |
167 | requested_formats = self._configuration_arg('format') or ['adaptive_hls'] | |
168 | ||
dfea94f8 | 169 | available_formats = {} |
7d0f6f0c | 170 | for stream_type, streams in get_streams('streams'): |
706dfe44 JH |
171 | if stream_type not in requested_formats: |
172 | continue | |
173 | for stream in streams.values(): | |
706dfe44 JH |
174 | if not stream.get('url'): |
175 | continue | |
dfea94f8 SS |
176 | hardsub_lang = stream.get('hardsub_locale') or '' |
177 | format_id = join_nonempty(stream_type, format_field(stream, 'hardsub_locale', 'hardsub-%s')) | |
178 | available_formats[hardsub_lang] = (stream_type, format_id, hardsub_lang, stream['url']) | |
179 | ||
180 | if '' in available_formats and 'all' not in requested_hardsubs: | |
181 | full_format_langs = set(requested_hardsubs) | |
182 | self.to_screen( | |
183 | 'To get all formats of a hardsub language, use ' | |
184 | '"--extractor-args crunchyrollbeta:hardsub=<language_code or all>". ' | |
185 | 'See https://github.com/yt-dlp/yt-dlp#crunchyrollbeta for more info', | |
186 | only_once=True) | |
187 | else: | |
188 | full_format_langs = set(map(str.lower, available_formats)) | |
189 | ||
190 | formats = [] | |
191 | for stream_type, format_id, hardsub_lang, stream_url in available_formats.values(): | |
192 | if stream_type.endswith('hls'): | |
193 | if hardsub_lang.lower() in full_format_langs: | |
706dfe44 | 194 | adaptive_formats = self._extract_m3u8_formats( |
dfea94f8 | 195 | stream_url, display_id, 'mp4', m3u8_id=format_id, |
7d0f6f0c | 196 | fatal=False, note=f'Downloading {format_id} HLS manifest') |
dfea94f8 SS |
197 | else: |
198 | adaptive_formats = (self._m3u8_meta_format(stream_url, ext='mp4', m3u8_id=format_id),) | |
199 | elif stream_type.endswith('dash'): | |
200 | adaptive_formats = self._extract_mpd_formats( | |
201 | stream_url, display_id, mpd_id=format_id, | |
202 | fatal=False, note=f'Downloading {format_id} MPD manifest') | |
203 | else: | |
204 | self.report_warning(f'Encountered unknown stream_type: {stream_type!r}', display_id, only_once=True) | |
205 | continue | |
206 | for f in adaptive_formats: | |
207 | if f.get('acodec') != 'none': | |
208 | f['language'] = stream_response.get('audio_locale') | |
209 | f['quality'] = hardsub_preference(hardsub_lang.lower()) | |
210 | formats.extend(adaptive_formats) | |
706dfe44 JH |
211 | |
212 | return { | |
213 | 'id': internal_id, | |
7d0f6f0c B |
214 | 'title': '%s Episode %s – %s' % ( |
215 | episode_response.get('season_title'), episode_response.get('episode'), episode_response.get('title')), | |
216 | 'description': try_get(episode_response, lambda x: x['description'].replace(r'\r\n', '\n')), | |
706dfe44 | 217 | 'duration': float_or_none(episode_response.get('duration_ms'), 1000), |
b99ba3df | 218 | 'timestamp': parse_iso8601(episode_response.get('upload_date')), |
706dfe44 JH |
219 | 'series': episode_response.get('series_title'), |
220 | 'series_id': episode_response.get('series_id'), | |
221 | 'season': episode_response.get('season_title'), | |
222 | 'season_id': episode_response.get('season_id'), | |
223 | 'season_number': episode_response.get('season_number'), | |
224 | 'episode': episode_response.get('title'), | |
225 | 'episode_number': episode_response.get('sequence_number'), | |
7d0f6f0c B |
226 | 'formats': formats, |
227 | 'thumbnails': [{ | |
228 | 'url': thumb.get('source'), | |
229 | 'width': thumb.get('width'), | |
230 | 'height': thumb.get('height'), | |
231 | } for thumb in traverse_obj(episode_response, ('images', 'thumbnail', ..., ...)) or []], | |
232 | 'subtitles': { | |
233 | lang: [{ | |
234 | 'url': subtitle_data.get('url'), | |
235 | 'ext': subtitle_data.get('format') | |
236 | }] for lang, subtitle_data in get_streams('subtitles') | |
237 | }, | |
706dfe44 | 238 | } |
dd078970 | 239 | |
240 | ||
cb1553e9 JH |
241 | class CrunchyrollBetaShowIE(CrunchyrollBaseIE): |
242 | IE_NAME = 'crunchyroll:playlist' | |
5da42f2b | 243 | _VALID_URL = r'''(?x) |
cb1553e9 | 244 | https?://(?:beta|www)\.crunchyroll\.com/ |
5da42f2b | 245 | (?P<lang>(?:\w{2}(?:-\w{2})?/)?) |
246 | series/(?P<id>\w+) | |
247 | (?:/(?P<display_id>[\w-]+))?/?(?:[?#]|$)''' | |
dd078970 | 248 | _TESTS = [{ |
cb1553e9 | 249 | 'url': 'https://www.crunchyroll.com/series/GY19NQ2QR/Girl-Friend-BETA', |
dd078970 | 250 | 'info_dict': { |
b99ba3df | 251 | 'id': 'GY19NQ2QR', |
dd078970 | 252 | 'title': 'Girl Friend BETA', |
253 | }, | |
254 | 'playlist_mincount': 10, | |
255 | }, { | |
5da42f2b | 256 | 'url': 'https://beta.crunchyroll.com/it/series/GY19NQ2QR', |
dd078970 | 257 | 'only_matching': True, |
258 | }] | |
259 | ||
260 | def _real_extract(self, url): | |
f4d706a9 | 261 | lang, internal_id, display_id = self._match_valid_url(url).group('lang', 'id', 'display_id') |
f4d706a9 JH |
262 | api_domain, bucket, params = self._get_params(lang) |
263 | ||
264 | series_response = self._download_json( | |
265 | f'{api_domain}/cms/v2{bucket}/series/{internal_id}', display_id, | |
266 | note='Retrieving series metadata', query=params) | |
267 | ||
268 | seasons_response = self._download_json( | |
269 | f'{api_domain}/cms/v2{bucket}/seasons?series_id={internal_id}', display_id, | |
270 | note='Retrieving season list', query=params) | |
271 | ||
272 | def entries(): | |
273 | for season in seasons_response['items']: | |
274 | episodes_response = self._download_json( | |
275 | f'{api_domain}/cms/v2{bucket}/episodes?season_id={season["id"]}', display_id, | |
276 | note=f'Retrieving episode list for {season.get("slug_title")}', query=params) | |
277 | for episode in episodes_response['items']: | |
278 | episode_id = episode['id'] | |
279 | episode_display_id = episode['slug_title'] | |
280 | yield { | |
281 | '_type': 'url', | |
cb1553e9 | 282 | 'url': f'https://www.crunchyroll.com/{lang}watch/{episode_id}/{episode_display_id}', |
f4d706a9 JH |
283 | 'ie_key': CrunchyrollBetaIE.ie_key(), |
284 | 'id': episode_id, | |
285 | 'title': '%s Episode %s – %s' % (episode.get('season_title'), episode.get('episode'), episode.get('title')), | |
286 | 'description': try_get(episode, lambda x: x['description'].replace(r'\r\n', '\n')), | |
287 | 'duration': float_or_none(episode.get('duration_ms'), 1000), | |
288 | 'series': episode.get('series_title'), | |
289 | 'series_id': episode.get('series_id'), | |
290 | 'season': episode.get('season_title'), | |
291 | 'season_id': episode.get('season_id'), | |
292 | 'season_number': episode.get('season_number'), | |
293 | 'episode': episode.get('title'), | |
294 | 'episode_number': episode.get('sequence_number') | |
295 | } | |
296 | ||
297 | return self.playlist_result(entries(), internal_id, series_response.get('title')) |