# coding: utf-8
from __future__ import unicode_literals
+import base64
import re
import json
import zlib
bytes_to_intlist,
extract_attributes,
float_or_none,
+ format_field,
intlist_to_bytes,
int_or_none,
+ join_nonempty,
lowercase_escape,
merge_dicts,
qualities,
remove_end,
sanitized_Request,
+ traverse_obj,
try_get,
urlencode_postdata,
xpath_text,
def _real_extract(self, url):
lang, internal_id, display_id = self._match_valid_url(url).group('lang', 'internal_id', 'id')
webpage = self._download_webpage(url, display_id)
- episode_data = self._parse_json(
- self._search_regex(r'__INITIAL_STATE__\s*=\s*({.+?})\s*;', webpage, 'episode data'),
- display_id)['content']['byId'][internal_id]
- video_id = episode_data['external_id'].split('.')[1]
- series_id = episode_data['episode_metadata']['series_slug_title']
- return self.url_result(f'https://www.crunchyroll.com/{lang}{series_id}/{display_id}-{video_id}',
- CrunchyrollIE.ie_key(), video_id)
+ initial_state = self._parse_json(
+ self._search_regex(r'__INITIAL_STATE__\s*=\s*({.+?})\s*;', webpage, 'initial state'),
+ display_id)
+ episode_data = initial_state['content']['byId'][internal_id]
+ if not self._get_cookies(url).get('etp_rt'):
+ video_id = episode_data['external_id'].split('.')[1]
+ series_id = episode_data['episode_metadata']['series_slug_title']
+ return self.url_result(f'https://www.crunchyroll.com/{lang}{series_id}/{display_id}-{video_id}',
+ CrunchyrollIE.ie_key(), video_id)
+
+ app_config = self._parse_json(
+ self._search_regex(r'__APP_CONFIG__\s*=\s*({.+?})\s*;', webpage, 'app config'),
+ display_id)
+ client_id = app_config['cxApiParams']['accountAuthClientId']
+ api_domain = app_config['cxApiParams']['apiDomain']
+ basic_token = str(base64.b64encode(('%s:' % client_id).encode('ascii')), 'ascii')
+ auth_response = self._download_json(
+ f'{api_domain}/auth/v1/token', display_id,
+ note='Authenticating with cookie',
+ headers={
+ 'Authorization': 'Basic ' + basic_token
+ }, data='grant_type=etp_rt_cookie'.encode('ascii'))
+ policy_response = self._download_json(
+ f'{api_domain}/index/v2', display_id,
+ note='Retrieving signed policy',
+ headers={
+ 'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token']
+ })
+ bucket = policy_response['cms']['bucket']
+ params = {
+ 'Policy': policy_response['cms']['policy'],
+ 'Signature': policy_response['cms']['signature'],
+ 'Key-Pair-Id': policy_response['cms']['key_pair_id']
+ }
+ locale = traverse_obj(initial_state, ('localization', 'locale'))
+ if locale:
+ params['locale'] = locale
+ episode_response = self._download_json(
+ f'{api_domain}/cms/v2{bucket}/episodes/{internal_id}', display_id,
+ note='Retrieving episode metadata',
+ query=params)
+ if episode_response.get('is_premium_only') and not episode_response.get('playback'):
+ raise ExtractorError('This video is for premium members only.', expected=True)
+ stream_response = self._download_json(
+ episode_response['playback'], display_id,
+ note='Retrieving stream info')
+
+ thumbnails = []
+ for thumbnails_data in traverse_obj(episode_response, ('images', 'thumbnail')):
+ for thumbnail_data in thumbnails_data:
+ thumbnails.append({
+ 'url': thumbnail_data.get('source'),
+ 'width': thumbnail_data.get('width'),
+ 'height': thumbnail_data.get('height'),
+ })
+ subtitles = {}
+ for lang, subtitle_data in stream_response.get('subtitles').items():
+ subtitles[lang] = [{
+ 'url': subtitle_data.get('url'),
+ 'ext': subtitle_data.get('format')
+ }]
+
+ requested_hardsubs = [('' if val == 'none' else val) for val in (self._configuration_arg('hardsub') or ['none'])]
+ hardsub_preference = qualities(requested_hardsubs[::-1])
+ requested_formats = self._configuration_arg('format') or ['adaptive_hls']
+
+ formats = []
+ for stream_type, streams in stream_response.get('streams', {}).items():
+ if stream_type not in requested_formats:
+ continue
+ for stream in streams.values():
+ hardsub_lang = stream.get('hardsub_locale') or ''
+ if hardsub_lang.lower() not in requested_hardsubs:
+ continue
+ format_id = join_nonempty(
+ stream_type,
+ format_field(stream, 'hardsub_locale', 'hardsub-%s'))
+ if not stream.get('url'):
+ continue
+ if stream_type.split('_')[-1] == 'hls':
+ adaptive_formats = self._extract_m3u8_formats(
+ stream['url'], display_id, 'mp4', m3u8_id=format_id,
+ note='Downloading %s information' % format_id,
+ fatal=False)
+ elif stream_type.split('_')[-1] == 'dash':
+ adaptive_formats = self._extract_mpd_formats(
+ stream['url'], display_id, mpd_id=format_id,
+ note='Downloading %s information' % format_id,
+ fatal=False)
+ for f in adaptive_formats:
+ if f.get('acodec') != 'none':
+ f['language'] = stream_response.get('audio_locale')
+ f['quality'] = hardsub_preference(hardsub_lang.lower())
+ formats.extend(adaptive_formats)
+ self._sort_formats(formats)
+
+ return {
+ 'id': internal_id,
+ 'title': '%s Episode %s – %s' % (episode_response.get('season_title'), episode_response.get('episode'), episode_response.get('title')),
+ 'description': episode_response.get('description').replace(r'\r\n', '\n'),
+ 'duration': float_or_none(episode_response.get('duration_ms'), 1000),
+ 'thumbnails': thumbnails,
+ 'series': episode_response.get('series_title'),
+ 'series_id': episode_response.get('series_id'),
+ 'season': episode_response.get('season_title'),
+ 'season_id': episode_response.get('season_id'),
+ 'season_number': episode_response.get('season_number'),
+ 'episode': episode_response.get('title'),
+ 'episode_number': episode_response.get('sequence_number'),
+ 'subtitles': subtitles,
+ 'formats': formats
+ }
class CrunchyrollBetaShowIE(CrunchyrollBaseIE):