[ie/mlbtv] Fix extraction (#10296)

[yt-dlp.git] / yt_dlp / extractor / viu.py
diff --git a/yt_dlp/extractor/viu.py b/yt_dlp/extractor/viu.py

index d27091c94e78643c320518b651ebbdfae01fc0f5..01e59352bf33927a313865d415ebc0f531aa9d40 100644 (file)
--- a/yt_dlp/extractor/viu.py
+++ b/yt_dlp/extractor/viu.py
@@ -1,17 +1,19 @@
-import re
  import json
-import uuid
  import random
+import re
  import urllib.parse
+import uuid
  
  from .common import InfoExtractor
-from ..compat import compat_str
  from ..utils import (
      ExtractorError,
      int_or_none,
+    remove_end,
+    smuggle_url,
      strip_or_none,
+    traverse_obj,
      try_get,
-    smuggle_url,
+    unified_timestamp,
      unsmuggle_url,
      url_or_none,
  )
@@ -65,7 +67,7 @@ def _real_extract(self, url):
              'clip/load', video_id, 'Downloading video data', query={
                  'appid': 'viu_desktop',
                  'fmt': 'json',
-                'id': video_id
+                'id': video_id,
              })['item'][0]
  
          title = video_data['title']
@@ -79,14 +81,13 @@ def _real_extract(self, url):
          # hls_file = video_data.get('hlsfile')
          hls_file = video_data.get('jwhlsfile')
          if url_path and tdirforwhole and hls_file:
-            m3u8_url = '%s/%s/%s' % (url_path, tdirforwhole, hls_file)
+            m3u8_url = f'{url_path}/{tdirforwhole}/{hls_file}'
          else:
              # m3u8_url = re.sub(
              #     r'(/hlsc_)[a-z]+(\d+\.m3u8)',
              #     r'\1whe\2', video_data['href'])
              m3u8_url = video_data['href']
          formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4')
-        self._sort_formats(formats)
  
          for key, value in video_data.items():
              mobj = re.match(r'^subtitle_(?P<lang>[^_]+)_(?P<ext>(vtt|srt))', key)
@@ -94,7 +95,7 @@ def _real_extract(self, url):
                  continue
              subtitles.setdefault(mobj.group('lang'), []).append({
                  'url': value,
-                'ext': mobj.group('ext')
+                'ext': mobj.group('ext'),
              })
  
          return {
@@ -130,7 +131,7 @@ def _real_extract(self, url):
              'Downloading playlist info', query={
                  'appid': 'viu_desktop',
                  'fmt': 'json',
-                'id': 'playlist-' + playlist_id
+                'id': 'playlist-' + playlist_id,
              })['container']
  
          entries = []
@@ -138,7 +139,7 @@ def _real_extract(self, url):
              item_id = item.get('id')
              if not item_id:
                  continue
-            item_id = compat_str(item_id)
+            item_id = str(item_id)
              entries.append(self.url_result(
                  'viu:' + item_id, 'Viu', item_id))
  
@@ -225,14 +226,14 @@ def _login(self, country_code, video_id):
                  return
              headers = {
                  'Authorization': f'Bearer {self._auth_codes[country_code]}',
-                'Content-Type': 'application/json'
+                'Content-Type': 'application/json',
              }
              data = self._download_json(
                  'https://api-gateway-global.viu.com/api/account/validate',
                  video_id, 'Validating email address', headers=headers,
                  data=json.dumps({
                      'principal': username,
-                    'provider': 'email'
+                    'provider': 'email',
                  }).encode())
              if not data.get('exists'):
                  raise ExtractorError('Invalid email address')
@@ -252,7 +253,7 @@ def _login(self, country_code, video_id):
          return self._user_token
  
      def _get_token(self, country_code, video_id):
-        rand = ''.join(random.choice('0123456789') for _ in range(10))
+        rand = ''.join(random.choices('0123456789', k=10))
          return self._download_json(
              f'https://api-gateway-global.viu.com/api/auth/token?v={rand}000', video_id,
              headers={'Content-Type': 'application/json'}, note='Getting bearer token',
@@ -262,8 +263,8 @@ def _get_token(self, country_code, video_id):
                  'platformFlagLabel': 'web',
                  'language': 'en',
                  'uuid': str(uuid.uuid4()),
-                'carrierId': '0'
-            }).encode('utf-8'))['token']
+                'carrierId': '0',
+            }).encode())['token']
  
      def _real_extract(self, url):
          url, idata = unsmuggle_url(url, {})
@@ -317,7 +318,7 @@ def download_playback():
                  headers={
                      'Authorization': f'Bearer {self._auth_codes[country_code]}',
                      'Referer': url,
-                    'Origin': url
+                    'Origin': url,
                  })
              return self._detect_error(stream_data).get('stream')
  
@@ -363,9 +364,8 @@ def download_playback():
                  'url': stream_url,
                  'height': height,
                  'ext': 'mp4',
-                'filesize': try_get(stream_data, lambda x: x['size'][vid_format], int)
+                'filesize': try_get(stream_data, lambda x: x['size'][vid_format], int),
              })
-        self._sort_formats(formats)
  
          subtitles = {}
          for sub in video_data.get('subtitle') or []:
@@ -396,3 +396,146 @@ def download_playback():
              'formats': formats,
              'subtitles': subtitles,
          }
+
+
+class ViuOTTIndonesiaBaseIE(InfoExtractor):
+    _BASE_QUERY = {
+        'ver': 1.0,
+        'fmt': 'json',
+        'aver': 5.0,
+        'appver': 2.0,
+        'appid': 'viu_desktop',
+        'platform': 'desktop',
+    }
+
+    _DEVICE_ID = str(uuid.uuid4())
+    _SESSION_ID = str(uuid.uuid4())
+    _TOKEN = None
+
+    _HEADERS = {
+        'x-session-id': _SESSION_ID,
+        'x-client': 'browser',
+    }
+
+    _AGE_RATINGS_MAPPER = {
+        'ADULTS': 18,
+        'teens': 13,
+    }
+
+    def _real_initialize(self):
+        ViuOTTIndonesiaBaseIE._TOKEN = self._download_json(
+            'https://um.viuapi.io/user/identity', None,
+            headers={'Content-type': 'application/json', **self._HEADERS},
+            query={**self._BASE_QUERY, 'iid': self._DEVICE_ID},
+            data=json.dumps({'deviceId': self._DEVICE_ID}).encode(),
+            note='Downloading token information')['token']
+
+
+class ViuOTTIndonesiaIE(ViuOTTIndonesiaBaseIE):
+    _VALID_URL = r'https?://www\.viu\.com/ott/\w+/\w+/all/video-[\w-]+-(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'https://www.viu.com/ott/id/id/all/video-japanese-drama-tv_shows-detective_conan_episode_793-1165863142?containerId=playlist-26271226',
+        'info_dict': {
+            'id': '1165863142',
+            'ext': 'mp4',
+            'episode_number': 793,
+            'episode': 'Episode 793',
+            'title': 'Detective Conan - Episode 793',
+            'duration': 1476,
+            'description': 'md5:b79d55345bc1e0217ece22616267c9a5',
+            'thumbnail': 'https://vuclipi-a.akamaihd.net/p/cloudinary/h_171,w_304,dpr_1.5,f_auto,c_thumb,q_auto:low/1165863189/d-1',
+            'upload_date': '20210101',
+            'timestamp': 1609459200,
+        },
+    }, {
+        'url': 'https://www.viu.com/ott/id/id/all/video-korean-reality-tv_shows-entertainment_weekly_episode_1622-1118617054',
+        'info_dict': {
+            'id': '1118617054',
+            'ext': 'mp4',
+            'episode_number': 1622,
+            'episode': 'Episode 1622',
+            'description': 'md5:6d68ca450004020113e9bf27ad99f0f8',
+            'title': 'Entertainment Weekly - Episode 1622',
+            'duration': 4729,
+            'thumbnail': 'https://vuclipi-a.akamaihd.net/p/cloudinary/h_171,w_304,dpr_1.5,f_auto,c_thumb,q_auto:low/1120187848/d-1',
+            'timestamp': 1420070400,
+            'upload_date': '20150101',
+            'cast': ['Shin Hyun-joon', 'Lee Da-Hee'],
+        },
+    }, {
+        # age-limit test
+        'url': 'https://www.viu.com/ott/id/id/all/video-japanese-trailer-tv_shows-trailer_jujutsu_kaisen_ver_01-1166044219?containerId=playlist-26273140',
+        'info_dict': {
+            'id': '1166044219',
+            'ext': 'mp4',
+            'upload_date': '20200101',
+            'timestamp': 1577836800,
+            'title': 'Trailer \'Jujutsu Kaisen\' Ver.01',
+            'duration': 92,
+            'thumbnail': 'https://vuclipi-a.akamaihd.net/p/cloudinary/h_171,w_304,dpr_1.5,f_auto,c_thumb,q_auto:low/1166044240/d-1',
+            'description': 'Trailer \'Jujutsu Kaisen\' Ver.01',
+            'cast': ['Junya Enoki', ' Yûichi Nakamura', ' Yuma Uchida', 'Asami Seto'],
+            'age_limit': 13,
+        },
+    }, {
+        # json ld metadata type equal to Movie instead of TVEpisodes
+        'url': 'https://www.viu.com/ott/id/id/all/video-japanese-animation-movies-demon_slayer_kimetsu_no_yaiba_the_movie_mugen_train-1165892707?containerId=1675060691786',
+        'info_dict': {
+            'id': '1165892707',
+            'ext': 'mp4',
+            'timestamp': 1577836800,
+            'upload_date': '20200101',
+            'title': 'Demon Slayer - Kimetsu no Yaiba - The Movie: Mugen Train',
+            'age_limit': 13,
+            'cast': 'count:9',
+            'thumbnail': 'https://vuclipi-a.akamaihd.net/p/cloudinary/h_171,w_304,dpr_1.5,f_auto,c_thumb,q_auto:low/1165895279/d-1',
+            'description': 'md5:1ce9c35a3aeab384085533f746c87469',
+            'duration': 7021,
+        },
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+
+        video_data = self._download_json(
+            f'https://um.viuapi.io/drm/v1/content/{display_id}', display_id, data=b'',
+            headers={'Authorization': ViuOTTIndonesiaBaseIE._TOKEN, **self._HEADERS, 'ccode': 'ID'})
+        formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_data['playUrl'], display_id)
+
+        initial_state = self._search_json(
+            r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state',
+            display_id)['content']['clipDetails']
+        for key, url in initial_state.items():
+            lang, ext = self._search_regex(
+                r'^subtitle_(?P<lang>[\w-]+)_(?P<ext>\w+)$', key, 'subtitle metadata',
+                default=(None, None), group=('lang', 'ext'))
+            if lang and ext:
+                subtitles.setdefault(lang, []).append({
+                    'ext': ext,
+                    'url': url,
+                })
+
+                if ext == 'vtt':
+                    subtitles[lang].append({
+                        'ext': 'srt',
+                        'url': f'{remove_end(initial_state[key], "vtt")}srt',
+                    })
+
+        episode = traverse_obj(list(filter(
+            lambda x: x.get('@type') in ('TVEpisode', 'Movie'), self._yield_json_ld(webpage, display_id))), 0) or {}
+        return {
+            'id': display_id,
+            'title': (traverse_obj(initial_state, 'title', 'display_title')
+                      or episode.get('name')),
+            'description': initial_state.get('description') or episode.get('description'),
+            'duration': initial_state.get('duration'),
+            'thumbnail': traverse_obj(episode, ('image', 'url')),
+            'timestamp': unified_timestamp(episode.get('dateCreated')),
+            'formats': formats,
+            'subtitles': subtitles,
+            'episode_number': (traverse_obj(initial_state, 'episode_no', 'episodeno', expected_type=int_or_none)
+                               or int_or_none(episode.get('episodeNumber'))),
+            'cast': traverse_obj(episode, ('actor', ..., 'name'), default=None),
+            'age_limit': self._AGE_RATINGS_MAPPER.get(initial_state.get('internal_age_rating')),
+        }