[ie/matchtv] Fix extractor (#10190)

[yt-dlp.git] / yt_dlp / extractor / rcti.py
diff --git a/yt_dlp/extractor/rcti.py b/yt_dlp/extractor/rcti.py

index 0cfecbc9a8301602900b6097373d22acc373aee0..61b73a550c308413ac4341de0a911444841f2977 100644 (file)
--- a/yt_dlp/extractor/rcti.py
+++ b/yt_dlp/extractor/rcti.py
@@ -3,13 +3,13 @@
  import time
  
  from .common import InfoExtractor
-from ..compat import compat_HTTPError
+from ..networking.exceptions import HTTPError
  from ..utils import (
-    dict_get,
      ExtractorError,
+    dict_get,
      strip_or_none,
      traverse_obj,
-    try_get
+    try_get,
  )
  
  
@@ -28,7 +28,7 @@ def _call_api(self, url, video_id, note=None):
  
  
  class RCTIPlusIE(RCTIPlusBaseIE):
-    _VALID_URL = r'https://www\.rctiplus\.com/(?:programs/\d+?/.*?/)?(?P<type>episode|clip|extra|live-event|missed-event)/(?P<id>\d+)/(?P<display_id>[^/?#&]+)'
+    _VALID_URL = r'https?://www\.rctiplus\.com/(?:programs/\d+?/.*?/)?(?P<type>episode|clip|extra|live-event|missed-event)/(?P<id>\d+)/(?P<display_id>[^/?#&]+)'
      _TESTS = [{
          'url': 'https://www.rctiplus.com/programs/1259/kiko-untuk-lola/episode/22124/untuk-lola',
          'md5': '56ed45affad45fa18d5592a1bc199997',
@@ -154,39 +154,39 @@ def _real_extract(self, url):
              is_upcoming = try_get(video_json, lambda x: x['current_date'] < x['start_date'])
          if is_upcoming:
              self.raise_no_formats(
-                'This event will start at %s.' % video_json['live_label'] if video_json.get('live_label') else 'This event has not started yet.', expected=True)
+                'This event will start at {}.'.format(video_json['live_label']) if video_json.get('live_label') else 'This event has not started yet.', expected=True)
          if 'akamaized' in video_url:
              # For some videos hosted on Akamai's CDN (possibly AES-encrypted ones?), a session needs to at least be made via Conviva's API
              conviva_json_data = {
                  **self._CONVIVA_JSON_TEMPLATE,
                  'url': video_url,
-                'sst': int(time.time())
+                'sst': int(time.time()),
              }
              conviva_json_res = self._download_json(
                  'https://ff84ae928c3b33064b76dec08f12500465e59a6f.cws.conviva.com/0/wsg', display_id,
                  'Creating Conviva session', 'Failed to create Conviva session',
-                fatal=False, data=json.dumps(conviva_json_data).encode('utf-8'))
+                fatal=False, data=json.dumps(conviva_json_data).encode())
              if conviva_json_res and conviva_json_res.get('err') != 'ok':
-                self.report_warning('Conviva said: %s' % str(conviva_json_res.get('err')))
+                self.report_warning('Conviva said: {}'.format(str(conviva_json_res.get('err'))))
  
          video_meta, meta_paths = self._call_api(
-            'https://api.rctiplus.com/api/v1/%s/%s' % (video_type, video_id), display_id, 'Downloading video metadata')
+            f'https://api.rctiplus.com/api/v1/{video_type}/{video_id}', display_id, 'Downloading video metadata')
  
          thumbnails, image_path = [], meta_paths.get('image_path', 'https://rstatic.akamaized.net/media/')
          if video_meta.get('portrait_image'):
              thumbnails.append({
                  'id': 'portrait_image',
-                'url': '%s%d%s' % (image_path, 2000, video_meta['portrait_image'])  # 2000px seems to be the highest resolution that can be given
+                'url': '{}{}{}'.format(image_path, 2000, video_meta['portrait_image']),  # 2000px seems to be the highest resolution that can be given
              })
          if video_meta.get('landscape_image'):
              thumbnails.append({
                  'id': 'landscape_image',
-                'url': '%s%d%s' % (image_path, 2000, video_meta['landscape_image'])
+                'url': '{}{}{}'.format(image_path, 2000, video_meta['landscape_image']),
              })
          try:
              formats = self._extract_m3u8_formats(video_url, display_id, 'mp4', headers={'Referer': 'https://www.rctiplus.com/'})
          except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+            if isinstance(e.cause, HTTPError) and e.cause.status == 403:
                  self.raise_geo_restricted(countries=['ID'], metadata_available=True)
              else:
                  raise e
@@ -194,8 +194,6 @@ def _real_extract(self, url):
              if 'akamaized' in f['url'] or 'cloudfront' in f['url']:
                  f.setdefault('http_headers', {})['Referer'] = 'https://www.rctiplus.com/'  # Referer header is required for akamai/cloudfront CDNs
  
-        self._sort_formats(formats)
-
          return {
              'id': video_meta.get('product_id') or video_json.get('product_id'),
              'title': dict_get(video_meta, ('title', 'name')) or dict_get(video_json, ('content_name', 'assets_name')),
@@ -220,7 +218,7 @@ def _real_extract(self, url):
  
  
  class RCTIPlusSeriesIE(RCTIPlusBaseIE):
-    _VALID_URL = r'https://www\.rctiplus\.com/programs/(?P<id>\d+)/(?P<display_id>[^/?#&]+)(?:/(?P<type>episodes|extras|clips))?'
+    _VALID_URL = r'https?://www\.rctiplus\.com/programs/(?P<id>\d+)/(?P<display_id>[^/?#&]+)(?:/(?P<type>episodes|extras|clips))?'
      _TESTS = [{
          'url': 'https://www.rctiplus.com/programs/829/putri-untuk-pangeran',
          'playlist_mincount': 1019,
@@ -231,7 +229,7 @@ class RCTIPlusSeriesIE(RCTIPlusBaseIE):
              'age_limit': 2,
              'cast': ['Verrel Bramasta', 'Ranty Maria', 'Riza Syah', 'Ivan Fadilla', 'Nicole Parham', 'Dll', 'Aviv Elham'],
              'display_id': 'putri-untuk-pangeran',
-            'tag': 'count:18',
+            'tags': 'count:18',
          },
      }, {  # No episodes
          'url': 'https://www.rctiplus.com/programs/615/inews-pagi',
@@ -241,9 +239,9 @@ class RCTIPlusSeriesIE(RCTIPlusBaseIE):
              'title': 'iNews Pagi',
              'description': 'md5:f18ee3d4643cfb41c358e5a9b693ee04',
              'age_limit': 2,
-            'tag': 'count:11',
+            'tags': 'count:11',
              'display_id': 'inews-pagi',
-        }
+        },
      }]
      _AGE_RATINGS = {  # Based off https://id.wikipedia.org/wiki/Sistem_rating_konten_televisi with additional ratings
          'S-SU': 2,
@@ -257,13 +255,13 @@ class RCTIPlusSeriesIE(RCTIPlusBaseIE):
  
      @classmethod
      def suitable(cls, url):
-        return False if RCTIPlusIE.suitable(url) else super(RCTIPlusSeriesIE, cls).suitable(url)
+        return False if RCTIPlusIE.suitable(url) else super().suitable(url)
  
      def _entries(self, url, display_id=None, note='Downloading entries JSON', metadata={}):
          total_pages = 0
          try:
              total_pages = self._call_api(
-                '%s&length=20&page=0' % url,
+                f'{url}&length=20&page=0',
                  display_id, note)[1]['pagination']['total_page']
          except ExtractorError as e:
              if 'not found' in str(e):
@@ -274,8 +272,8 @@ def _entries(self, url, display_id=None, note='Downloading entries JSON', metada
  
          for page_num in range(1, total_pages + 1):
              episode_list = self._call_api(
-                '%s&length=20&page=%s' % (url, page_num),
-                display_id, '%s page %s' % (note, page_num))[0] or []
+                f'{url}&length=20&page={page_num}',
+                display_id, f'{note} page {page_num}')[0] or []
  
              for video_json in episode_list:
                  yield {
@@ -290,7 +288,7 @@ def _entries(self, url, display_id=None, note='Downloading entries JSON', metada
                      'duration': video_json.get('duration'),
                      'season_number': video_json.get('season'),
                      'episode_number': video_json.get('episode'),
-                    **metadata
+                    **metadata,
                  }
  
      def _series_entries(self, series_id, display_id=None, video_type=None, metadata={}):
@@ -329,8 +327,8 @@ def _real_extract(self, url):
              'age_limit': try_get(series_meta, lambda x: self._AGE_RATINGS[x['age_restriction'][0]['code']]),
              'cast': traverse_obj(series_meta, (('starring', 'creator', 'writer'), ..., 'name'),
                                   expected_type=lambda x: strip_or_none(x) or None),
-            'tag': traverse_obj(series_meta, ('tag', ..., 'name'),
-                                expected_type=lambda x: strip_or_none(x) or None),
+            'tags': traverse_obj(series_meta, ('tag', ..., 'name'),
+                                 expected_type=lambda x: strip_or_none(x) or None),
          }
          return self.playlist_result(
              self._series_entries(series_id, display_id, video_type, metadata), series_id,
@@ -338,7 +336,7 @@ def _real_extract(self, url):
  
  
  class RCTIPlusTVIE(RCTIPlusBaseIE):
-    _VALID_URL = r'https://www\.rctiplus\.com/((tv/(?P<tvname>\w+))|(?P<eventname>live-event|missed-event))'
+    _VALID_URL = r'https?://www\.rctiplus\.com/((tv/(?P<tvname>\w+))|(?P<eventname>live-event|missed-event))'
      _TESTS = [{
          'url': 'https://www.rctiplus.com/tv/rcti',
          'info_dict': {
@@ -350,7 +348,7 @@ class RCTIPlusTVIE(RCTIPlusBaseIE):
          },
          'params': {
              'skip_download': True,
-        }
+        },
      }, {
          # Returned video will always change
          'url': 'https://www.rctiplus.com/live-event',
@@ -363,7 +361,7 @@ class RCTIPlusTVIE(RCTIPlusBaseIE):
  
      @classmethod
      def suitable(cls, url):
-        return False if RCTIPlusIE.suitable(url) else super(RCTIPlusTVIE, cls).suitable(url)
+        return False if RCTIPlusIE.suitable(url) else super().suitable(url)
  
      def _real_extract(self, url):
          match = self._match_valid_url(url).groupdict()