yt_dlp/extractor/ertgr.py

   1 import json
   2 import re
   3
   4 from .common import InfoExtractor
   5 from ..compat import compat_str
   6 from ..utils import (
   7     clean_html,
   8     determine_ext,
   9     ExtractorError,
  10     dict_get,
  11     int_or_none,
  12     merge_dicts,
  13     parse_qs,
  14     parse_age_limit,
  15     parse_iso8601,
  16     str_or_none,
  17     try_get,
  18     unescapeHTML,
  19     url_or_none,
  20     variadic,
  21 )
  22
  23
  24 class ERTFlixBaseIE(InfoExtractor):
  25     def _call_api(
  26             self, video_id, method='Player/AcquireContent', api_version=1,
  27             param_headers=None, data=None, headers=None, **params):
  28         platform_codename = {'platformCodename': 'www'}
  29         headers_as_param = {'X-Api-Date-Format': 'iso', 'X-Api-Camel-Case': False}
  30         headers_as_param.update(param_headers or {})
  31         headers = headers or {}
  32         if data:
  33             headers['Content-Type'] = headers_as_param['Content-Type'] = 'application/json;charset=utf-8'
  34             data = json.dumps(merge_dicts(platform_codename, data)).encode('utf-8')
  35         query = merge_dicts(
  36             {} if data else platform_codename,
  37             {'$headers': json.dumps(headers_as_param)},
  38             params)
  39         response = self._download_json(
  40             'https://api.app.ertflix.gr/v%s/%s' % (str(api_version), method),
  41             video_id, fatal=False, query=query, data=data, headers=headers)
  42         if try_get(response, lambda x: x['Result']['Success']) is True:
  43             return response
  44
  45     def _call_api_get_tiles(self, video_id, *tile_ids):
  46         requested_tile_ids = [video_id] + list(tile_ids)
  47         requested_tiles = [{'Id': tile_id} for tile_id in requested_tile_ids]
  48         tiles_response = self._call_api(
  49             video_id, method='Tile/GetTiles', api_version=2,
  50             data={'RequestedTiles': requested_tiles})
  51         tiles = try_get(tiles_response, lambda x: x['Tiles'], list) or []
  52         if tile_ids:
  53             if sorted([tile['Id'] for tile in tiles]) != sorted(requested_tile_ids):
  54                 raise ExtractorError('Requested tiles not found', video_id=video_id)
  55             return tiles
  56         try:
  57             return next(tile for tile in tiles if tile['Id'] == video_id)
  58         except StopIteration:
  59             raise ExtractorError('No matching tile found', video_id=video_id)
  60
  61
  62 class ERTFlixCodenameIE(ERTFlixBaseIE):
  63     IE_NAME = 'ertflix:codename'
  64     IE_DESC = 'ERTFLIX videos by codename'
  65     _VALID_URL = r'ertflix:(?P<id>[\w-]+)'
  66     _TESTS = [{
  67         'url': 'ertflix:monogramma-praxitelis-tzanoylinos',
  68         'md5': '5b9c2cd171f09126167e4082fc1dd0ef',
  69         'info_dict': {
  70             'id': 'monogramma-praxitelis-tzanoylinos',
  71             'ext': 'mp4',
  72             'title': 'md5:ef0b439902963d56c43ac83c3f41dd0e',
  73         },
  74     },
  75     ]
  76
  77     def _extract_formats_and_subs(self, video_id, allow_none=True):
  78         media_info = self._call_api(video_id, codename=video_id)
  79         formats, subs = [], {}
  80         for media_file in try_get(media_info, lambda x: x['MediaFiles'], list) or []:
  81             for media in try_get(media_file, lambda x: x['Formats'], list) or []:
  82                 fmt_url = url_or_none(try_get(media, lambda x: x['Url']))
  83                 if not fmt_url:
  84                     continue
  85                 ext = determine_ext(fmt_url)
  86                 if ext == 'm3u8':
  87                     formats_, subs_ = self._extract_m3u8_formats_and_subtitles(
  88                         fmt_url, video_id, m3u8_id='hls', ext='mp4', fatal=False)
  89                 elif ext == 'mpd':
  90                     formats_, subs_ = self._extract_mpd_formats_and_subtitles(
  91                         fmt_url, video_id, mpd_id='dash', fatal=False)
  92                 else:
  93                     formats.append({
  94                         'url': fmt_url,
  95                         'format_id': str_or_none(media.get('Id')),
  96                     })
  97                     continue
  98                 formats.extend(formats_)
  99                 self._merge_subtitles(subs_, target=subs)
 100
 101         if formats or not allow_none:
 102             self._sort_formats(formats)
 103         return formats, subs
 104
 105     def _real_extract(self, url):
 106         video_id = self._match_id(url)
 107
 108         formats, subs = self._extract_formats_and_subs(video_id)
 109
 110         if formats:
 111             return {
 112                 'id': video_id,
 113                 'formats': formats,
 114                 'subtitles': subs,
 115                 'title': self._generic_title(url),
 116             }
 117
 118
 119 class ERTFlixIE(ERTFlixBaseIE):
 120     IE_NAME = 'ertflix'
 121     IE_DESC = 'ERTFLIX videos'
 122     _VALID_URL = r'https?://www\.ertflix\.gr/(?:[^/]+/)?(?:series|vod)/(?P<id>[a-z]{3}\.\d+)'
 123     _TESTS = [{
 124         'url': 'https://www.ertflix.gr/vod/vod.173258-aoratoi-ergates',
 125         'md5': '6479d5e60fd7e520b07ba5411dcdd6e7',
 126         'info_dict': {
 127             'id': 'aoratoi-ergates',
 128             'ext': 'mp4',
 129             'title': 'md5:c1433d598fbba0211b0069021517f8b4',
 130             'description': 'md5:01a64d113c31957eb7eb07719ab18ff4',
 131             'thumbnail': r're:https?://.+\.jpg',
 132             'episode_id': 'vod.173258',
 133             'timestamp': 1639648800,
 134             'upload_date': '20211216',
 135             'duration': 3166,
 136             'age_limit': 8,
 137         },
 138     }, {
 139         'url': 'https://www.ertflix.gr/series/ser.3448-monogramma',
 140         'info_dict': {
 141             'id': 'ser.3448',
 142             'age_limit': 8,
 143             'description': 'Η εκπομπή σαράντα ετών που σημάδεψε τον πολιτισμό μας.',
 144             'title': 'Μονόγραμμα',
 145         },
 146         'playlist_mincount': 64,
 147     }, {
 148         'url': 'https://www.ertflix.gr/series/ser.3448-monogramma?season=1',
 149         'info_dict': {
 150             'id': 'ser.3448',
 151             'age_limit': 8,
 152             'description': 'Η εκπομπή σαράντα ετών που σημάδεψε τον πολιτισμό μας.',
 153             'title': 'Μονόγραμμα',
 154         },
 155         'playlist_count': 22,
 156     }, {
 157         'url': 'https://www.ertflix.gr/series/ser.3448-monogramma?season=1&season=2021%20-%202022',
 158         'info_dict': {
 159             'id': 'ser.3448',
 160             'age_limit': 8,
 161             'description': 'Η εκπομπή σαράντα ετών που σημάδεψε τον πολιτισμό μας.',
 162             'title': 'Μονόγραμμα',
 163         },
 164         'playlist_mincount': 36,
 165     }, {
 166         'url': 'https://www.ertflix.gr/series/ser.164991-to-diktuo-1?season=1-9',
 167         'info_dict': {
 168             'id': 'ser.164991',
 169             'age_limit': 8,
 170             'description': 'Η πρώτη ελληνική εκπομπή με θεματολογία αποκλειστικά γύρω από το ίντερνετ.',
 171             'title': 'Το δίκτυο',
 172         },
 173         'playlist_mincount': 9,
 174     }, {
 175         'url': 'https://www.ertflix.gr/en/vod/vod.127652-ta-kalytera-mas-chronia-ep1-mia-volta-sto-feggari',
 176         'only_matching': True,
 177     }]
 178
 179     def _extract_episode(self, episode):
 180         codename = try_get(episode, lambda x: x['Codename'], compat_str)
 181         title = episode.get('Title')
 182         description = clean_html(dict_get(episode, ('ShortDescription', 'TinyDescription', )))
 183         if not codename or not title or not episode.get('HasPlayableStream', True):
 184             return
 185         thumbnail = next((
 186             url_or_none(thumb.get('Url'))
 187             for thumb in variadic(dict_get(episode, ('Images', 'Image')) or {})
 188             if thumb.get('IsMain')),
 189             None)
 190         return {
 191             '_type': 'url_transparent',
 192             'thumbnail': thumbnail,
 193             'id': codename,
 194             'episode_id': episode.get('Id'),
 195             'title': title,
 196             'alt_title': episode.get('Subtitle'),
 197             'description': description,
 198             'timestamp': parse_iso8601(episode.get('PublishDate')),
 199             'duration': episode.get('DurationSeconds'),
 200             'age_limit': self._parse_age_rating(episode),
 201             'url': 'ertflix:%s' % (codename, ),
 202         }
 203
 204     @staticmethod
 205     def _parse_age_rating(info_dict):
 206         return parse_age_limit(
 207             info_dict.get('AgeRating')
 208             or (info_dict.get('IsAdultContent') and 18)
 209             or (info_dict.get('IsKidsContent') and 0))
 210
 211     def _extract_series(self, video_id, season_titles=None, season_numbers=None):
 212         media_info = self._call_api(video_id, method='Tile/GetSeriesDetails', id=video_id)
 213
 214         series = try_get(media_info, lambda x: x['Series'], dict) or {}
 215         series_info = {
 216             'age_limit': self._parse_age_rating(series),
 217             'title': series.get('Title'),
 218             'description': dict_get(series, ('ShortDescription', 'TinyDescription', )),
 219         }
 220         if season_numbers:
 221             season_titles = season_titles or []
 222             for season in try_get(series, lambda x: x['Seasons'], list) or []:
 223                 if season.get('SeasonNumber') in season_numbers and season.get('Title'):
 224                     season_titles.append(season['Title'])
 225
 226         def gen_episode(m_info, season_titles):
 227             for episode_group in try_get(m_info, lambda x: x['EpisodeGroups'], list) or []:
 228                 if season_titles and episode_group.get('Title') not in season_titles:
 229                     continue
 230                 episodes = try_get(episode_group, lambda x: x['Episodes'], list)
 231                 if not episodes:
 232                     continue
 233                 season_info = {
 234                     'season': episode_group.get('Title'),
 235                     'season_number': int_or_none(episode_group.get('SeasonNumber')),
 236                 }
 237                 try:
 238                     episodes = [(int(ep['EpisodeNumber']), ep) for ep in episodes]
 239                     episodes.sort()
 240                 except (KeyError, ValueError):
 241                     episodes = enumerate(episodes, 1)
 242                 for n, episode in episodes:
 243                     info = self._extract_episode(episode)
 244                     if info is None:
 245                         continue
 246                     info['episode_number'] = n
 247                     info.update(season_info)
 248                     yield info
 249
 250         return self.playlist_result(
 251             gen_episode(media_info, season_titles), playlist_id=video_id, **series_info)
 252
 253     def _real_extract(self, url):
 254         video_id = self._match_id(url)
 255         if video_id.startswith('ser.'):
 256             param_season = parse_qs(url).get('season', [None])
 257             param_season = [
 258                 (have_number, int_or_none(v) if have_number else str_or_none(v))
 259                 for have_number, v in
 260                 [(int_or_none(ps) is not None, ps) for ps in param_season]
 261                 if v is not None
 262             ]
 263             season_kwargs = {
 264                 k: [v for is_num, v in param_season if is_num is c] or None
 265                 for k, c in
 266                 [('season_titles', False), ('season_numbers', True)]
 267             }
 268             return self._extract_series(video_id, **season_kwargs)
 269
 270         return self._extract_episode(self._call_api_get_tiles(video_id))
 271
 272
 273 class ERTWebtvEmbedIE(InfoExtractor):
 274     IE_NAME = 'ertwebtv:embed'
 275     IE_DESC = 'ert.gr webtv embedded videos'
 276     _BASE_PLAYER_URL_RE = re.escape('//www.ert.gr/webtv/live-uni/vod/dt-uni-vod.php')
 277     _VALID_URL = rf'https?:{_BASE_PLAYER_URL_RE}\?([^#]+&)?f=(?P<id>[^#&]+)'
 278
 279     _TESTS = [{
 280         'url': 'https://www.ert.gr/webtv/live-uni/vod/dt-uni-vod.php?f=trailers/E2251_TO_DIKTYO_E09_16-01_1900.mp4&bgimg=/photos/2022/1/to_diktio_ep09_i_istoria_tou_diadiktiou_stin_Ellada_1021x576.jpg',
 281         'md5': 'f9e9900c25c26f4ecfbddbb4b6305854',
 282         'info_dict': {
 283             'id': 'trailers/E2251_TO_DIKTYO_E09_16-01_1900.mp4',
 284             'title': 'md5:914f06a73cd8b62fbcd6fb90c636e497',
 285             'ext': 'mp4',
 286             'thumbnail': 'https://program.ert.gr/photos/2022/1/to_diktio_ep09_i_istoria_tou_diadiktiou_stin_Ellada_1021x576.jpg'
 287         },
 288     }]
 289
 290     @classmethod
 291     def _extract_urls(cls, webpage):
 292         EMBED_URL_RE = rf'(?:https?:)?{cls._BASE_PLAYER_URL_RE}\?(?:(?!(?P=_q1)).)+'
 293         EMBED_RE = rf'<iframe[^>]+?src=(?P<_q1>["\'])(?P<url>{EMBED_URL_RE})(?P=_q1)'
 294
 295         for mobj in re.finditer(EMBED_RE, webpage):
 296             url = unescapeHTML(mobj.group('url'))
 297             if not cls.suitable(url):
 298                 continue
 299             yield url
 300
 301     def _real_extract(self, url):
 302         video_id = self._match_id(url)
 303         formats, subs = self._extract_m3u8_formats_and_subtitles(
 304             f'https://mediastream.ert.gr/vodedge/_definst_/mp4:dvrorigin/{video_id}/playlist.m3u8',
 305             video_id, 'mp4')
 306         self._sort_formats(formats)
 307         thumbnail_id = parse_qs(url).get('bgimg', [None])[0]
 308         if thumbnail_id and not thumbnail_id.startswith('http'):
 309             thumbnail_id = f'https://program.ert.gr{thumbnail_id}'
 310         return {
 311             'id': video_id,
 312             'title': f'VOD - {video_id}',
 313             'thumbnail': thumbnail_id,
 314             'formats': formats,
 315             'subtitles': subs,
 316         }