]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/ertgr.py
[ertgr] Add new extractors (#2338)
[yt-dlp.git] / yt_dlp / extractor / ertgr.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import json
5 import re
6
7 from .common import InfoExtractor
8 from ..compat import compat_str
9 from ..utils import (
10 clean_html,
11 determine_ext,
12 ExtractorError,
13 dict_get,
14 int_or_none,
15 merge_dicts,
16 parse_qs,
17 parse_age_limit,
18 parse_iso8601,
19 str_or_none,
20 try_get,
21 unescapeHTML,
22 url_or_none,
23 variadic,
24 )
25
26
27 class ERTFlixBaseIE(InfoExtractor):
28 def _call_api(
29 self, video_id, method='Player/AcquireContent', api_version=1,
30 param_headers=None, data=None, headers=None, **params):
31 platform_codename = {'platformCodename': 'www'}
32 headers_as_param = {'X-Api-Date-Format': 'iso', 'X-Api-Camel-Case': False}
33 headers_as_param.update(param_headers or {})
34 headers = headers or {}
35 if data:
36 headers['Content-Type'] = headers_as_param['Content-Type'] = 'application/json;charset=utf-8'
37 data = json.dumps(merge_dicts(platform_codename, data)).encode('utf-8')
38 query = merge_dicts(
39 {} if data else platform_codename,
40 {'$headers': json.dumps(headers_as_param)},
41 params)
42 response = self._download_json(
43 'https://api.app.ertflix.gr/v%s/%s' % (str(api_version), method),
44 video_id, fatal=False, query=query, data=data, headers=headers)
45 if try_get(response, lambda x: x['Result']['Success']) is True:
46 return response
47
48 def _call_api_get_tiles(self, video_id, *tile_ids):
49 requested_tile_ids = [video_id] + list(tile_ids)
50 requested_tiles = [{'Id': tile_id} for tile_id in requested_tile_ids]
51 tiles_response = self._call_api(
52 video_id, method='Tile/GetTiles', api_version=2,
53 data={'RequestedTiles': requested_tiles})
54 tiles = try_get(tiles_response, lambda x: x['Tiles'], list) or []
55 if tile_ids:
56 if sorted([tile['Id'] for tile in tiles]) != sorted(requested_tile_ids):
57 raise ExtractorError('Requested tiles not found', video_id=video_id)
58 return tiles
59 try:
60 return next(tile for tile in tiles if tile['Id'] == video_id)
61 except StopIteration:
62 raise ExtractorError('No matching tile found', video_id=video_id)
63
64
65 class ERTFlixCodenameIE(ERTFlixBaseIE):
66 IE_NAME = 'ertflix:codename'
67 IE_DESC = 'ERTFLIX videos by codename'
68 _VALID_URL = r'ertflix:(?P<id>[\w-]+)'
69 _TESTS = [{
70 'url': 'ertflix:monogramma-praxitelis-tzanoylinos',
71 'md5': '5b9c2cd171f09126167e4082fc1dd0ef',
72 'info_dict': {
73 'id': 'monogramma-praxitelis-tzanoylinos',
74 'ext': 'mp4',
75 'title': 'md5:ef0b439902963d56c43ac83c3f41dd0e',
76 },
77 },
78 ]
79
80 def _extract_formats_and_subs(self, video_id, allow_none=True):
81 media_info = self._call_api(video_id, codename=video_id)
82 formats, subs = [], {}
83 for media_file in try_get(media_info, lambda x: x['MediaFiles'], list) or []:
84 for media in try_get(media_file, lambda x: x['Formats'], list) or []:
85 fmt_url = url_or_none(try_get(media, lambda x: x['Url']))
86 if not fmt_url:
87 continue
88 ext = determine_ext(fmt_url)
89 if ext == 'm3u8':
90 formats_, subs_ = self._extract_m3u8_formats_and_subtitles(
91 fmt_url, video_id, m3u8_id='hls', ext='mp4', fatal=False)
92 elif ext == 'mpd':
93 formats_, subs_ = self._extract_mpd_formats_and_subtitles(
94 fmt_url, video_id, mpd_id='dash', fatal=False)
95 else:
96 formats.append({
97 'url': fmt_url,
98 'format_id': str_or_none(media.get('Id')),
99 })
100 continue
101 formats.extend(formats_)
102 self._merge_subtitles(subs_, target=subs)
103
104 if formats or not allow_none:
105 self._sort_formats(formats)
106 return formats, subs
107
108 def _real_extract(self, url):
109 video_id = self._match_id(url)
110
111 formats, subs = self._extract_formats_and_subs(video_id)
112
113 if formats:
114 return {
115 'id': video_id,
116 'formats': formats,
117 'subtitles': subs,
118 'title': self._generic_title(url),
119 }
120
121
122 class ERTFlixIE(ERTFlixBaseIE):
123 IE_NAME = 'ertflix'
124 IE_DESC = 'ERTFLIX videos'
125 _VALID_URL = r'https?://www\.ertflix\.gr/(?:series|vod)/(?P<id>[a-z]{3}\.\d+)'
126 _TESTS = [{
127 'url': 'https://www.ertflix.gr/vod/vod.173258-aoratoi-ergates',
128 'md5': '6479d5e60fd7e520b07ba5411dcdd6e7',
129 'info_dict': {
130 'id': 'aoratoi-ergates',
131 'ext': 'mp4',
132 'title': 'md5:c1433d598fbba0211b0069021517f8b4',
133 'description': 'md5:01a64d113c31957eb7eb07719ab18ff4',
134 'thumbnail': r're:https?://.+\.jpg',
135 'episode_id': 'vod.173258',
136 'timestamp': 1639648800,
137 'upload_date': '20211216',
138 'duration': 3166,
139 'age_limit': 8,
140 },
141 }, {
142 'url': 'https://www.ertflix.gr/series/ser.3448-monogramma',
143 'info_dict': {
144 'id': 'ser.3448',
145 'age_limit': 8,
146 'description': 'Η εκπομπή σαράντα ετών που σημάδεψε τον πολιτισμό μας.',
147 'title': 'Μονόγραμμα',
148 },
149 'playlist_mincount': 64,
150 }, {
151 'url': 'https://www.ertflix.gr/series/ser.3448-monogramma?season=1',
152 'info_dict': {
153 'id': 'ser.3448',
154 'age_limit': 8,
155 'description': 'Η εκπομπή σαράντα ετών που σημάδεψε τον πολιτισμό μας.',
156 'title': 'Μονόγραμμα',
157 },
158 'playlist_count': 22,
159 }, {
160 'url': 'https://www.ertflix.gr/series/ser.3448-monogramma?season=1&season=2021%20-%202022',
161 'info_dict': {
162 'id': 'ser.3448',
163 'age_limit': 8,
164 'description': 'Η εκπομπή σαράντα ετών που σημάδεψε τον πολιτισμό μας.',
165 'title': 'Μονόγραμμα',
166 },
167 'playlist_mincount': 36,
168 }, {
169 'url': 'https://www.ertflix.gr/series/ser.164991-to-diktuo-1?season=1-9',
170 'info_dict': {
171 'id': 'ser.164991',
172 'age_limit': 8,
173 'description': 'Η πρώτη ελληνική εκπομπή με θεματολογία αποκλειστικά γύρω από το ίντερνετ.',
174 'title': 'Το δίκτυο',
175 },
176 'playlist_mincount': 9,
177 }]
178
179 def _extract_episode(self, episode):
180 codename = try_get(episode, lambda x: x['Codename'], compat_str)
181 title = episode.get('Title')
182 description = clean_html(dict_get(episode, ('ShortDescription', 'TinyDescription', )))
183 if not codename or not title or not episode.get('HasPlayableStream', True):
184 return
185 thumbnail = next((
186 url_or_none(thumb.get('Url'))
187 for thumb in variadic(dict_get(episode, ('Images', 'Image')) or {})
188 if thumb.get('IsMain')),
189 None)
190 return {
191 '_type': 'url_transparent',
192 'thumbnail': thumbnail,
193 'id': codename,
194 'episode_id': episode.get('Id'),
195 'title': title,
196 'alt_title': episode.get('Subtitle'),
197 'description': description,
198 'timestamp': parse_iso8601(episode.get('PublishDate')),
199 'duration': episode.get('DurationSeconds'),
200 'age_limit': self._parse_age_rating(episode),
201 'url': 'ertflix:%s' % (codename, ),
202 }
203
204 @staticmethod
205 def _parse_age_rating(info_dict):
206 return parse_age_limit(
207 info_dict.get('AgeRating')
208 or (info_dict.get('IsAdultContent') and 18)
209 or (info_dict.get('IsKidsContent') and 0))
210
211 def _extract_series(self, video_id, season_titles=None, season_numbers=None):
212 media_info = self._call_api(video_id, method='Tile/GetSeriesDetails', id=video_id)
213
214 series = try_get(media_info, lambda x: x['Series'], dict) or {}
215 series_info = {
216 'age_limit': self._parse_age_rating(series),
217 'title': series.get('Title'),
218 'description': dict_get(series, ('ShortDescription', 'TinyDescription', )),
219 }
220 if season_numbers:
221 season_titles = season_titles or []
222 for season in try_get(series, lambda x: x['Seasons'], list) or []:
223 if season.get('SeasonNumber') in season_numbers and season.get('Title'):
224 season_titles.append(season['Title'])
225
226 def gen_episode(m_info, season_titles):
227 for episode_group in try_get(m_info, lambda x: x['EpisodeGroups'], list) or []:
228 if season_titles and episode_group.get('Title') not in season_titles:
229 continue
230 episodes = try_get(episode_group, lambda x: x['Episodes'], list)
231 if not episodes:
232 continue
233 season_info = {
234 'season': episode_group.get('Title'),
235 'season_number': int_or_none(episode_group.get('SeasonNumber')),
236 }
237 try:
238 episodes = [(int(ep['EpisodeNumber']), ep) for ep in episodes]
239 episodes.sort()
240 except (KeyError, ValueError):
241 episodes = enumerate(episodes, 1)
242 for n, episode in episodes:
243 info = self._extract_episode(episode)
244 if info is None:
245 continue
246 info['episode_number'] = n
247 info.update(season_info)
248 yield info
249
250 return self.playlist_result(
251 gen_episode(media_info, season_titles), playlist_id=video_id, **series_info)
252
253 def _real_extract(self, url):
254 video_id = self._match_id(url)
255 if video_id.startswith('ser.'):
256 param_season = parse_qs(url).get('season', [None])
257 param_season = [
258 (have_number, int_or_none(v) if have_number else str_or_none(v))
259 for have_number, v in
260 [(int_or_none(ps) is not None, ps) for ps in param_season]
261 if v is not None
262 ]
263 season_kwargs = {
264 k: [v for is_num, v in param_season if is_num is c] or None
265 for k, c in
266 [('season_titles', False), ('season_numbers', True)]
267 }
268 return self._extract_series(video_id, **season_kwargs)
269
270 return self._extract_episode(self._call_api_get_tiles(video_id))
271
272
273 class ERTWebtvEmbedIE(InfoExtractor):
274 IE_NAME = 'ertwebtv:embed'
275 IE_DESC = 'ert.gr webtv embedded videos'
276 _BASE_PLAYER_URL_RE = re.escape('//www.ert.gr/webtv/live-uni/vod/dt-uni-vod.php')
277 _VALID_URL = rf'https?:{_BASE_PLAYER_URL_RE}\?([^#]+&)?f=(?P<id>[^#&]+)'
278
279 _TESTS = [{
280 'url': 'https://www.ert.gr/webtv/live-uni/vod/dt-uni-vod.php?f=trailers/E2251_TO_DIKTYO_E09_16-01_1900.mp4&bgimg=/photos/2022/1/to_diktio_ep09_i_istoria_tou_diadiktiou_stin_Ellada_1021x576.jpg',
281 'md5': 'f9e9900c25c26f4ecfbddbb4b6305854',
282 'info_dict': {
283 'id': 'trailers/E2251_TO_DIKTYO_E09_16-01_1900.mp4',
284 'title': 'md5:914f06a73cd8b62fbcd6fb90c636e497',
285 'ext': 'mp4',
286 'thumbnail': 'https://program.ert.gr/photos/2022/1/to_diktio_ep09_i_istoria_tou_diadiktiou_stin_Ellada_1021x576.jpg'
287 },
288 }]
289
290 @classmethod
291 def _extract_urls(cls, webpage):
292 EMBED_URL_RE = rf'(?:https?:)?{cls._BASE_PLAYER_URL_RE}\?(?:(?!(?P=_q1)).)+'
293 EMBED_RE = rf'<iframe[^>]+?src=(?P<_q1>["\'])(?P<url>{EMBED_URL_RE})(?P=_q1)'
294
295 for mobj in re.finditer(EMBED_RE, webpage):
296 url = unescapeHTML(mobj.group('url'))
297 if not cls.suitable(url):
298 continue
299 yield url
300
301 def _real_extract(self, url):
302 video_id = self._match_id(url)
303 formats, subs = self._extract_m3u8_formats_and_subtitles(
304 f'https://mediastream.ert.gr/vodedge/_definst_/mp4:dvrorigin/{video_id}/playlist.m3u8',
305 video_id, 'mp4')
306 self._sort_formats(formats)
307 thumbnail_id = parse_qs(url).get('bgimg', [None])[0]
308 if thumbnail_id and not thumbnail_id.startswith('http'):
309 thumbnail_id = f'https://program.ert.gr{thumbnail_id}'
310 return {
311 'id': video_id,
312 'title': f'VOD - {video_id}',
313 'thumbnail': thumbnail_id,
314 'formats': formats,
315 'subtitles': subs,
316 }