[ie/matchtv] Fix extractor (#10190)

[yt-dlp.git] / yt_dlp / extractor / ertgr.py
diff --git a/yt_dlp/extractor/ertgr.py b/yt_dlp/extractor/ertgr.py

index 507f0a5c11d79b9efe6c519a8b960add5100e45b..864aa6dc5a6e7fbb31db5e4c5f518ec185a5f2c1 100644 (file)
--- a/yt_dlp/extractor/ertgr.py
+++ b/yt_dlp/extractor/ertgr.py
@@ -2,20 +2,18 @@
  import re
  
  from .common import InfoExtractor
-from ..compat import compat_str
  from ..utils import (
+    ExtractorError,
      clean_html,
      determine_ext,
-    ExtractorError,
      dict_get,
      int_or_none,
      merge_dicts,
-    parse_qs,
      parse_age_limit,
      parse_iso8601,
+    parse_qs,
      str_or_none,
      try_get,
-    unescapeHTML,
      url_or_none,
      variadic,
  )
@@ -31,19 +29,19 @@ def _call_api(
          headers = headers or {}
          if data:
              headers['Content-Type'] = headers_as_param['Content-Type'] = 'application/json;charset=utf-8'
-            data = json.dumps(merge_dicts(platform_codename, data)).encode('utf-8')
+            data = json.dumps(merge_dicts(platform_codename, data)).encode()
          query = merge_dicts(
              {} if data else platform_codename,
              {'$headers': json.dumps(headers_as_param)},
              params)
          response = self._download_json(
-            'https://api.app.ertflix.gr/v%s/%s' % (str(api_version), method),
+            f'https://api.app.ertflix.gr/v{api_version!s}/{method}',
              video_id, fatal=False, query=query, data=data, headers=headers)
          if try_get(response, lambda x: x['Result']['Success']) is True:
              return response
  
      def _call_api_get_tiles(self, video_id, *tile_ids):
-        requested_tile_ids = [video_id] + list(tile_ids)
+        requested_tile_ids = [video_id, *tile_ids]
          requested_tiles = [{'Id': tile_id} for tile_id in requested_tile_ids]
          tiles_response = self._call_api(
              video_id, method='Tile/GetTiles', api_version=2,
@@ -74,7 +72,7 @@ class ERTFlixCodenameIE(ERTFlixBaseIE):
      },
      ]
  
-    def _extract_formats_and_subs(self, video_id, allow_none=True):
+    def _extract_formats_and_subs(self, video_id):
          media_info = self._call_api(video_id, codename=video_id)
          formats, subs = [], {}
          for media_file in try_get(media_info, lambda x: x['MediaFiles'], list) or []:
@@ -98,8 +96,6 @@ def _extract_formats_and_subs(self, video_id, allow_none=True):
                  formats.extend(formats_)
                  self._merge_subtitles(subs_, target=subs)
  
-        if formats or not allow_none:
-            self._sort_formats(formats)
          return formats, subs
  
      def _real_extract(self, url):
@@ -119,7 +115,7 @@ def _real_extract(self, url):
  class ERTFlixIE(ERTFlixBaseIE):
      IE_NAME = 'ertflix'
      IE_DESC = 'ERTFLIX videos'
-    _VALID_URL = r'https?://www\.ertflix\.gr/(?:series|vod)/(?P<id>[a-z]{3}\.\d+)'
+    _VALID_URL = r'https?://www\.ertflix\.gr/(?:[^/]+/)?(?:series|vod)/(?P<id>[a-z]{3}\.\d+)'
      _TESTS = [{
          'url': 'https://www.ertflix.gr/vod/vod.173258-aoratoi-ergates',
          'md5': '6479d5e60fd7e520b07ba5411dcdd6e7',
@@ -171,12 +167,15 @@ class ERTFlixIE(ERTFlixBaseIE):
              'title': 'Το δίκτυο',
          },
          'playlist_mincount': 9,
+    }, {
+        'url': 'https://www.ertflix.gr/en/vod/vod.127652-ta-kalytera-mas-chronia-ep1-mia-volta-sto-feggari',
+        'only_matching': True,
      }]
  
      def _extract_episode(self, episode):
-        codename = try_get(episode, lambda x: x['Codename'], compat_str)
+        codename = try_get(episode, lambda x: x['Codename'], str)
          title = episode.get('Title')
-        description = clean_html(dict_get(episode, ('ShortDescription', 'TinyDescription', )))
+        description = clean_html(dict_get(episode, ('ShortDescription', 'TinyDescription')))
          if not codename or not title or not episode.get('HasPlayableStream', True):
              return
          thumbnail = next((
@@ -195,7 +194,7 @@ def _extract_episode(self, episode):
              'timestamp': parse_iso8601(episode.get('PublishDate')),
              'duration': episode.get('DurationSeconds'),
              'age_limit': self._parse_age_rating(episode),
-            'url': 'ertflix:%s' % (codename, ),
+            'url': f'ertflix:{codename}',
          }
  
      @staticmethod
@@ -212,7 +211,7 @@ def _extract_series(self, video_id, season_titles=None, season_numbers=None):
          series_info = {
              'age_limit': self._parse_age_rating(series),
              'title': series.get('Title'),
-            'description': dict_get(series, ('ShortDescription', 'TinyDescription', )),
+            'description': dict_get(series, ('ShortDescription', 'TinyDescription')),
          }
          if season_numbers:
              season_titles = season_titles or []
@@ -272,6 +271,7 @@ class ERTWebtvEmbedIE(InfoExtractor):
      IE_DESC = 'ert.gr webtv embedded videos'
      _BASE_PLAYER_URL_RE = re.escape('//www.ert.gr/webtv/live-uni/vod/dt-uni-vod.php')
      _VALID_URL = rf'https?:{_BASE_PLAYER_URL_RE}\?([^#]+&)?f=(?P<id>[^#&]+)'
+    _EMBED_REGEX = [rf'<iframe[^>]+?src=(?P<_q1>["\'])(?P<url>(?:https?:)?{_BASE_PLAYER_URL_RE}\?(?:(?!(?P=_q1)).)+)(?P=_q1)']
  
      _TESTS = [{
          'url': 'https://www.ert.gr/webtv/live-uni/vod/dt-uni-vod.php?f=trailers/E2251_TO_DIKTYO_E09_16-01_1900.mp4&bgimg=/photos/2022/1/to_diktio_ep09_i_istoria_tou_diadiktiou_stin_Ellada_1021x576.jpg',
@@ -280,27 +280,15 @@ class ERTWebtvEmbedIE(InfoExtractor):
              'id': 'trailers/E2251_TO_DIKTYO_E09_16-01_1900.mp4',
              'title': 'md5:914f06a73cd8b62fbcd6fb90c636e497',
              'ext': 'mp4',
-            'thumbnail': 'https://program.ert.gr/photos/2022/1/to_diktio_ep09_i_istoria_tou_diadiktiou_stin_Ellada_1021x576.jpg'
+            'thumbnail': 'https://program.ert.gr/photos/2022/1/to_diktio_ep09_i_istoria_tou_diadiktiou_stin_Ellada_1021x576.jpg',
          },
      }]
  
-    @classmethod
-    def _extract_urls(cls, webpage):
-        EMBED_URL_RE = rf'(?:https?:)?{cls._BASE_PLAYER_URL_RE}\?(?:(?!(?P=_q1)).)+'
-        EMBED_RE = rf'<iframe[^>]+?src=(?P<_q1>["\'])(?P<url>{EMBED_URL_RE})(?P=_q1)'
-
-        for mobj in re.finditer(EMBED_RE, webpage):
-            url = unescapeHTML(mobj.group('url'))
-            if not cls.suitable(url):
-                continue
-            yield url
-
      def _real_extract(self, url):
          video_id = self._match_id(url)
          formats, subs = self._extract_m3u8_formats_and_subtitles(
              f'https://mediastream.ert.gr/vodedge/_definst_/mp4:dvrorigin/{video_id}/playlist.m3u8',
              video_id, 'mp4')
-        self._sort_formats(formats)
          thumbnail_id = parse_qs(url).get('bgimg', [None])[0]
          if thumbnail_id and not thumbnail_id.startswith('http'):
              thumbnail_id = f'https://program.ert.gr{thumbnail_id}'