X-Git-Url: https://jfr.im/git/yt-dlp.git/blobdiff_plain/f324fe8c590d3f4737cfd8b5a41eaa60edc546dc..61edf57f8f13f6dfd81154174e647eb5fdd26089:/yt_dlp/extractor/viu.py diff --git a/yt_dlp/extractor/viu.py b/yt_dlp/extractor/viu.py index d27091c94..01e59352b 100644 --- a/yt_dlp/extractor/viu.py +++ b/yt_dlp/extractor/viu.py @@ -1,17 +1,19 @@ -import re import json -import uuid import random +import re import urllib.parse +import uuid from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( ExtractorError, int_or_none, + remove_end, + smuggle_url, strip_or_none, + traverse_obj, try_get, - smuggle_url, + unified_timestamp, unsmuggle_url, url_or_none, ) @@ -65,7 +67,7 @@ def _real_extract(self, url): 'clip/load', video_id, 'Downloading video data', query={ 'appid': 'viu_desktop', 'fmt': 'json', - 'id': video_id + 'id': video_id, })['item'][0] title = video_data['title'] @@ -79,14 +81,13 @@ def _real_extract(self, url): # hls_file = video_data.get('hlsfile') hls_file = video_data.get('jwhlsfile') if url_path and tdirforwhole and hls_file: - m3u8_url = '%s/%s/%s' % (url_path, tdirforwhole, hls_file) + m3u8_url = f'{url_path}/{tdirforwhole}/{hls_file}' else: # m3u8_url = re.sub( # r'(/hlsc_)[a-z]+(\d+\.m3u8)', # r'\1whe\2', video_data['href']) m3u8_url = video_data['href'] formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4') - self._sort_formats(formats) for key, value in video_data.items(): mobj = re.match(r'^subtitle_(?P[^_]+)_(?P(vtt|srt))', key) @@ -94,7 +95,7 @@ def _real_extract(self, url): continue subtitles.setdefault(mobj.group('lang'), []).append({ 'url': value, - 'ext': mobj.group('ext') + 'ext': mobj.group('ext'), }) return { @@ -130,7 +131,7 @@ def _real_extract(self, url): 'Downloading playlist info', query={ 'appid': 'viu_desktop', 'fmt': 'json', - 'id': 'playlist-' + playlist_id + 'id': 'playlist-' + playlist_id, })['container'] entries = [] @@ -138,7 +139,7 @@ def _real_extract(self, url): item_id = item.get('id') if not item_id: continue - item_id = compat_str(item_id) + item_id = str(item_id) entries.append(self.url_result( 'viu:' + item_id, 'Viu', item_id)) @@ -225,14 +226,14 @@ def _login(self, country_code, video_id): return headers = { 'Authorization': f'Bearer {self._auth_codes[country_code]}', - 'Content-Type': 'application/json' + 'Content-Type': 'application/json', } data = self._download_json( 'https://api-gateway-global.viu.com/api/account/validate', video_id, 'Validating email address', headers=headers, data=json.dumps({ 'principal': username, - 'provider': 'email' + 'provider': 'email', }).encode()) if not data.get('exists'): raise ExtractorError('Invalid email address') @@ -252,7 +253,7 @@ def _login(self, country_code, video_id): return self._user_token def _get_token(self, country_code, video_id): - rand = ''.join(random.choice('0123456789') for _ in range(10)) + rand = ''.join(random.choices('0123456789', k=10)) return self._download_json( f'https://api-gateway-global.viu.com/api/auth/token?v={rand}000', video_id, headers={'Content-Type': 'application/json'}, note='Getting bearer token', @@ -262,8 +263,8 @@ def _get_token(self, country_code, video_id): 'platformFlagLabel': 'web', 'language': 'en', 'uuid': str(uuid.uuid4()), - 'carrierId': '0' - }).encode('utf-8'))['token'] + 'carrierId': '0', + }).encode())['token'] def _real_extract(self, url): url, idata = unsmuggle_url(url, {}) @@ -317,7 +318,7 @@ def download_playback(): headers={ 'Authorization': f'Bearer {self._auth_codes[country_code]}', 'Referer': url, - 'Origin': url + 'Origin': url, }) return self._detect_error(stream_data).get('stream') @@ -363,9 +364,8 @@ def download_playback(): 'url': stream_url, 'height': height, 'ext': 'mp4', - 'filesize': try_get(stream_data, lambda x: x['size'][vid_format], int) + 'filesize': try_get(stream_data, lambda x: x['size'][vid_format], int), }) - self._sort_formats(formats) subtitles = {} for sub in video_data.get('subtitle') or []: @@ -396,3 +396,146 @@ def download_playback(): 'formats': formats, 'subtitles': subtitles, } + + +class ViuOTTIndonesiaBaseIE(InfoExtractor): + _BASE_QUERY = { + 'ver': 1.0, + 'fmt': 'json', + 'aver': 5.0, + 'appver': 2.0, + 'appid': 'viu_desktop', + 'platform': 'desktop', + } + + _DEVICE_ID = str(uuid.uuid4()) + _SESSION_ID = str(uuid.uuid4()) + _TOKEN = None + + _HEADERS = { + 'x-session-id': _SESSION_ID, + 'x-client': 'browser', + } + + _AGE_RATINGS_MAPPER = { + 'ADULTS': 18, + 'teens': 13, + } + + def _real_initialize(self): + ViuOTTIndonesiaBaseIE._TOKEN = self._download_json( + 'https://um.viuapi.io/user/identity', None, + headers={'Content-type': 'application/json', **self._HEADERS}, + query={**self._BASE_QUERY, 'iid': self._DEVICE_ID}, + data=json.dumps({'deviceId': self._DEVICE_ID}).encode(), + note='Downloading token information')['token'] + + +class ViuOTTIndonesiaIE(ViuOTTIndonesiaBaseIE): + _VALID_URL = r'https?://www\.viu\.com/ott/\w+/\w+/all/video-[\w-]+-(?P\d+)' + _TESTS = [{ + 'url': 'https://www.viu.com/ott/id/id/all/video-japanese-drama-tv_shows-detective_conan_episode_793-1165863142?containerId=playlist-26271226', + 'info_dict': { + 'id': '1165863142', + 'ext': 'mp4', + 'episode_number': 793, + 'episode': 'Episode 793', + 'title': 'Detective Conan - Episode 793', + 'duration': 1476, + 'description': 'md5:b79d55345bc1e0217ece22616267c9a5', + 'thumbnail': 'https://vuclipi-a.akamaihd.net/p/cloudinary/h_171,w_304,dpr_1.5,f_auto,c_thumb,q_auto:low/1165863189/d-1', + 'upload_date': '20210101', + 'timestamp': 1609459200, + }, + }, { + 'url': 'https://www.viu.com/ott/id/id/all/video-korean-reality-tv_shows-entertainment_weekly_episode_1622-1118617054', + 'info_dict': { + 'id': '1118617054', + 'ext': 'mp4', + 'episode_number': 1622, + 'episode': 'Episode 1622', + 'description': 'md5:6d68ca450004020113e9bf27ad99f0f8', + 'title': 'Entertainment Weekly - Episode 1622', + 'duration': 4729, + 'thumbnail': 'https://vuclipi-a.akamaihd.net/p/cloudinary/h_171,w_304,dpr_1.5,f_auto,c_thumb,q_auto:low/1120187848/d-1', + 'timestamp': 1420070400, + 'upload_date': '20150101', + 'cast': ['Shin Hyun-joon', 'Lee Da-Hee'], + }, + }, { + # age-limit test + 'url': 'https://www.viu.com/ott/id/id/all/video-japanese-trailer-tv_shows-trailer_jujutsu_kaisen_ver_01-1166044219?containerId=playlist-26273140', + 'info_dict': { + 'id': '1166044219', + 'ext': 'mp4', + 'upload_date': '20200101', + 'timestamp': 1577836800, + 'title': 'Trailer \'Jujutsu Kaisen\' Ver.01', + 'duration': 92, + 'thumbnail': 'https://vuclipi-a.akamaihd.net/p/cloudinary/h_171,w_304,dpr_1.5,f_auto,c_thumb,q_auto:low/1166044240/d-1', + 'description': 'Trailer \'Jujutsu Kaisen\' Ver.01', + 'cast': ['Junya Enoki', ' Yûichi Nakamura', ' Yuma Uchida', 'Asami Seto'], + 'age_limit': 13, + }, + }, { + # json ld metadata type equal to Movie instead of TVEpisodes + 'url': 'https://www.viu.com/ott/id/id/all/video-japanese-animation-movies-demon_slayer_kimetsu_no_yaiba_the_movie_mugen_train-1165892707?containerId=1675060691786', + 'info_dict': { + 'id': '1165892707', + 'ext': 'mp4', + 'timestamp': 1577836800, + 'upload_date': '20200101', + 'title': 'Demon Slayer - Kimetsu no Yaiba - The Movie: Mugen Train', + 'age_limit': 13, + 'cast': 'count:9', + 'thumbnail': 'https://vuclipi-a.akamaihd.net/p/cloudinary/h_171,w_304,dpr_1.5,f_auto,c_thumb,q_auto:low/1165895279/d-1', + 'description': 'md5:1ce9c35a3aeab384085533f746c87469', + 'duration': 7021, + }, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + video_data = self._download_json( + f'https://um.viuapi.io/drm/v1/content/{display_id}', display_id, data=b'', + headers={'Authorization': ViuOTTIndonesiaBaseIE._TOKEN, **self._HEADERS, 'ccode': 'ID'}) + formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_data['playUrl'], display_id) + + initial_state = self._search_json( + r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', + display_id)['content']['clipDetails'] + for key, url in initial_state.items(): + lang, ext = self._search_regex( + r'^subtitle_(?P[\w-]+)_(?P\w+)$', key, 'subtitle metadata', + default=(None, None), group=('lang', 'ext')) + if lang and ext: + subtitles.setdefault(lang, []).append({ + 'ext': ext, + 'url': url, + }) + + if ext == 'vtt': + subtitles[lang].append({ + 'ext': 'srt', + 'url': f'{remove_end(initial_state[key], "vtt")}srt', + }) + + episode = traverse_obj(list(filter( + lambda x: x.get('@type') in ('TVEpisode', 'Movie'), self._yield_json_ld(webpage, display_id))), 0) or {} + return { + 'id': display_id, + 'title': (traverse_obj(initial_state, 'title', 'display_title') + or episode.get('name')), + 'description': initial_state.get('description') or episode.get('description'), + 'duration': initial_state.get('duration'), + 'thumbnail': traverse_obj(episode, ('image', 'url')), + 'timestamp': unified_timestamp(episode.get('dateCreated')), + 'formats': formats, + 'subtitles': subtitles, + 'episode_number': (traverse_obj(initial_state, 'episode_no', 'episodeno', expected_type=int_or_none) + or int_or_none(episode.get('episodeNumber'))), + 'cast': traverse_obj(episode, ('actor', ..., 'name'), default=None), + 'age_limit': self._AGE_RATINGS_MAPPER.get(initial_state.get('internal_age_rating')), + }