]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/ertgr.py
[ie/matchtv] Fix extractor (#10190)
[yt-dlp.git] / yt_dlp / extractor / ertgr.py
index 507f0a5c11d79b9efe6c519a8b960add5100e45b..864aa6dc5a6e7fbb31db5e4c5f518ec185a5f2c1 100644 (file)
@@ -2,20 +2,18 @@
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_str
 from ..utils import (
+    ExtractorError,
     clean_html,
     determine_ext,
-    ExtractorError,
     dict_get,
     int_or_none,
     merge_dicts,
-    parse_qs,
     parse_age_limit,
     parse_iso8601,
+    parse_qs,
     str_or_none,
     try_get,
-    unescapeHTML,
     url_or_none,
     variadic,
 )
@@ -31,19 +29,19 @@ def _call_api(
         headers = headers or {}
         if data:
             headers['Content-Type'] = headers_as_param['Content-Type'] = 'application/json;charset=utf-8'
-            data = json.dumps(merge_dicts(platform_codename, data)).encode('utf-8')
+            data = json.dumps(merge_dicts(platform_codename, data)).encode()
         query = merge_dicts(
             {} if data else platform_codename,
             {'$headers': json.dumps(headers_as_param)},
             params)
         response = self._download_json(
-            'https://api.app.ertflix.gr/v%s/%s' % (str(api_version), method),
+            f'https://api.app.ertflix.gr/v{api_version!s}/{method}',
             video_id, fatal=False, query=query, data=data, headers=headers)
         if try_get(response, lambda x: x['Result']['Success']) is True:
             return response
 
     def _call_api_get_tiles(self, video_id, *tile_ids):
-        requested_tile_ids = [video_id] + list(tile_ids)
+        requested_tile_ids = [video_id, *tile_ids]
         requested_tiles = [{'Id': tile_id} for tile_id in requested_tile_ids]
         tiles_response = self._call_api(
             video_id, method='Tile/GetTiles', api_version=2,
@@ -74,7 +72,7 @@ class ERTFlixCodenameIE(ERTFlixBaseIE):
     },
     ]
 
-    def _extract_formats_and_subs(self, video_id, allow_none=True):
+    def _extract_formats_and_subs(self, video_id):
         media_info = self._call_api(video_id, codename=video_id)
         formats, subs = [], {}
         for media_file in try_get(media_info, lambda x: x['MediaFiles'], list) or []:
@@ -98,8 +96,6 @@ def _extract_formats_and_subs(self, video_id, allow_none=True):
                 formats.extend(formats_)
                 self._merge_subtitles(subs_, target=subs)
 
-        if formats or not allow_none:
-            self._sort_formats(formats)
         return formats, subs
 
     def _real_extract(self, url):
@@ -119,7 +115,7 @@ def _real_extract(self, url):
 class ERTFlixIE(ERTFlixBaseIE):
     IE_NAME = 'ertflix'
     IE_DESC = 'ERTFLIX videos'
-    _VALID_URL = r'https?://www\.ertflix\.gr/(?:series|vod)/(?P<id>[a-z]{3}\.\d+)'
+    _VALID_URL = r'https?://www\.ertflix\.gr/(?:[^/]+/)?(?:series|vod)/(?P<id>[a-z]{3}\.\d+)'
     _TESTS = [{
         'url': 'https://www.ertflix.gr/vod/vod.173258-aoratoi-ergates',
         'md5': '6479d5e60fd7e520b07ba5411dcdd6e7',
@@ -171,12 +167,15 @@ class ERTFlixIE(ERTFlixBaseIE):
             'title': 'Το δίκτυο',
         },
         'playlist_mincount': 9,
+    }, {
+        'url': 'https://www.ertflix.gr/en/vod/vod.127652-ta-kalytera-mas-chronia-ep1-mia-volta-sto-feggari',
+        'only_matching': True,
     }]
 
     def _extract_episode(self, episode):
-        codename = try_get(episode, lambda x: x['Codename'], compat_str)
+        codename = try_get(episode, lambda x: x['Codename'], str)
         title = episode.get('Title')
-        description = clean_html(dict_get(episode, ('ShortDescription', 'TinyDescription')))
+        description = clean_html(dict_get(episode, ('ShortDescription', 'TinyDescription')))
         if not codename or not title or not episode.get('HasPlayableStream', True):
             return
         thumbnail = next((
@@ -195,7 +194,7 @@ def _extract_episode(self, episode):
             'timestamp': parse_iso8601(episode.get('PublishDate')),
             'duration': episode.get('DurationSeconds'),
             'age_limit': self._parse_age_rating(episode),
-            'url': 'ertflix:%s' % (codename, ),
+            'url': f'ertflix:{codename}',
         }
 
     @staticmethod
@@ -212,7 +211,7 @@ def _extract_series(self, video_id, season_titles=None, season_numbers=None):
         series_info = {
             'age_limit': self._parse_age_rating(series),
             'title': series.get('Title'),
-            'description': dict_get(series, ('ShortDescription', 'TinyDescription')),
+            'description': dict_get(series, ('ShortDescription', 'TinyDescription')),
         }
         if season_numbers:
             season_titles = season_titles or []
@@ -272,6 +271,7 @@ class ERTWebtvEmbedIE(InfoExtractor):
     IE_DESC = 'ert.gr webtv embedded videos'
     _BASE_PLAYER_URL_RE = re.escape('//www.ert.gr/webtv/live-uni/vod/dt-uni-vod.php')
     _VALID_URL = rf'https?:{_BASE_PLAYER_URL_RE}\?([^#]+&)?f=(?P<id>[^#&]+)'
+    _EMBED_REGEX = [rf'<iframe[^>]+?src=(?P<_q1>["\'])(?P<url>(?:https?:)?{_BASE_PLAYER_URL_RE}\?(?:(?!(?P=_q1)).)+)(?P=_q1)']
 
     _TESTS = [{
         'url': 'https://www.ert.gr/webtv/live-uni/vod/dt-uni-vod.php?f=trailers/E2251_TO_DIKTYO_E09_16-01_1900.mp4&bgimg=/photos/2022/1/to_diktio_ep09_i_istoria_tou_diadiktiou_stin_Ellada_1021x576.jpg',
@@ -280,27 +280,15 @@ class ERTWebtvEmbedIE(InfoExtractor):
             'id': 'trailers/E2251_TO_DIKTYO_E09_16-01_1900.mp4',
             'title': 'md5:914f06a73cd8b62fbcd6fb90c636e497',
             'ext': 'mp4',
-            'thumbnail': 'https://program.ert.gr/photos/2022/1/to_diktio_ep09_i_istoria_tou_diadiktiou_stin_Ellada_1021x576.jpg'
+            'thumbnail': 'https://program.ert.gr/photos/2022/1/to_diktio_ep09_i_istoria_tou_diadiktiou_stin_Ellada_1021x576.jpg',
         },
     }]
 
-    @classmethod
-    def _extract_urls(cls, webpage):
-        EMBED_URL_RE = rf'(?:https?:)?{cls._BASE_PLAYER_URL_RE}\?(?:(?!(?P=_q1)).)+'
-        EMBED_RE = rf'<iframe[^>]+?src=(?P<_q1>["\'])(?P<url>{EMBED_URL_RE})(?P=_q1)'
-
-        for mobj in re.finditer(EMBED_RE, webpage):
-            url = unescapeHTML(mobj.group('url'))
-            if not cls.suitable(url):
-                continue
-            yield url
-
     def _real_extract(self, url):
         video_id = self._match_id(url)
         formats, subs = self._extract_m3u8_formats_and_subtitles(
             f'https://mediastream.ert.gr/vodedge/_definst_/mp4:dvrorigin/{video_id}/playlist.m3u8',
             video_id, 'mp4')
-        self._sort_formats(formats)
         thumbnail_id = parse_qs(url).get('bgimg', [None])[0]
         if thumbnail_id and not thumbnail_id.startswith('http'):
             thumbnail_id = f'https://program.ert.gr{thumbnail_id}'