]>
Commit | Line | Data |
---|---|---|
2d49720f ZM |
1 | import json |
2 | import re | |
3 | ||
4 | from .common import InfoExtractor | |
5 | from ..compat import compat_str | |
6 | from ..utils import ( | |
7 | clean_html, | |
8 | determine_ext, | |
9 | ExtractorError, | |
10 | dict_get, | |
11 | int_or_none, | |
12 | merge_dicts, | |
13 | parse_qs, | |
14 | parse_age_limit, | |
15 | parse_iso8601, | |
16 | str_or_none, | |
17 | try_get, | |
18 | unescapeHTML, | |
19 | url_or_none, | |
20 | variadic, | |
21 | ) | |
22 | ||
23 | ||
24 | class ERTFlixBaseIE(InfoExtractor): | |
25 | def _call_api( | |
26 | self, video_id, method='Player/AcquireContent', api_version=1, | |
27 | param_headers=None, data=None, headers=None, **params): | |
28 | platform_codename = {'platformCodename': 'www'} | |
29 | headers_as_param = {'X-Api-Date-Format': 'iso', 'X-Api-Camel-Case': False} | |
30 | headers_as_param.update(param_headers or {}) | |
31 | headers = headers or {} | |
32 | if data: | |
33 | headers['Content-Type'] = headers_as_param['Content-Type'] = 'application/json;charset=utf-8' | |
34 | data = json.dumps(merge_dicts(platform_codename, data)).encode('utf-8') | |
35 | query = merge_dicts( | |
36 | {} if data else platform_codename, | |
37 | {'$headers': json.dumps(headers_as_param)}, | |
38 | params) | |
39 | response = self._download_json( | |
40 | 'https://api.app.ertflix.gr/v%s/%s' % (str(api_version), method), | |
41 | video_id, fatal=False, query=query, data=data, headers=headers) | |
42 | if try_get(response, lambda x: x['Result']['Success']) is True: | |
43 | return response | |
44 | ||
45 | def _call_api_get_tiles(self, video_id, *tile_ids): | |
46 | requested_tile_ids = [video_id] + list(tile_ids) | |
47 | requested_tiles = [{'Id': tile_id} for tile_id in requested_tile_ids] | |
48 | tiles_response = self._call_api( | |
49 | video_id, method='Tile/GetTiles', api_version=2, | |
50 | data={'RequestedTiles': requested_tiles}) | |
51 | tiles = try_get(tiles_response, lambda x: x['Tiles'], list) or [] | |
52 | if tile_ids: | |
53 | if sorted([tile['Id'] for tile in tiles]) != sorted(requested_tile_ids): | |
54 | raise ExtractorError('Requested tiles not found', video_id=video_id) | |
55 | return tiles | |
56 | try: | |
57 | return next(tile for tile in tiles if tile['Id'] == video_id) | |
58 | except StopIteration: | |
59 | raise ExtractorError('No matching tile found', video_id=video_id) | |
60 | ||
61 | ||
62 | class ERTFlixCodenameIE(ERTFlixBaseIE): | |
63 | IE_NAME = 'ertflix:codename' | |
64 | IE_DESC = 'ERTFLIX videos by codename' | |
65 | _VALID_URL = r'ertflix:(?P<id>[\w-]+)' | |
66 | _TESTS = [{ | |
67 | 'url': 'ertflix:monogramma-praxitelis-tzanoylinos', | |
68 | 'md5': '5b9c2cd171f09126167e4082fc1dd0ef', | |
69 | 'info_dict': { | |
70 | 'id': 'monogramma-praxitelis-tzanoylinos', | |
71 | 'ext': 'mp4', | |
72 | 'title': 'md5:ef0b439902963d56c43ac83c3f41dd0e', | |
73 | }, | |
74 | }, | |
75 | ] | |
76 | ||
77 | def _extract_formats_and_subs(self, video_id, allow_none=True): | |
78 | media_info = self._call_api(video_id, codename=video_id) | |
79 | formats, subs = [], {} | |
80 | for media_file in try_get(media_info, lambda x: x['MediaFiles'], list) or []: | |
81 | for media in try_get(media_file, lambda x: x['Formats'], list) or []: | |
82 | fmt_url = url_or_none(try_get(media, lambda x: x['Url'])) | |
83 | if not fmt_url: | |
84 | continue | |
85 | ext = determine_ext(fmt_url) | |
86 | if ext == 'm3u8': | |
87 | formats_, subs_ = self._extract_m3u8_formats_and_subtitles( | |
88 | fmt_url, video_id, m3u8_id='hls', ext='mp4', fatal=False) | |
89 | elif ext == 'mpd': | |
90 | formats_, subs_ = self._extract_mpd_formats_and_subtitles( | |
91 | fmt_url, video_id, mpd_id='dash', fatal=False) | |
92 | else: | |
93 | formats.append({ | |
94 | 'url': fmt_url, | |
95 | 'format_id': str_or_none(media.get('Id')), | |
96 | }) | |
97 | continue | |
98 | formats.extend(formats_) | |
99 | self._merge_subtitles(subs_, target=subs) | |
100 | ||
101 | if formats or not allow_none: | |
102 | self._sort_formats(formats) | |
103 | return formats, subs | |
104 | ||
105 | def _real_extract(self, url): | |
106 | video_id = self._match_id(url) | |
107 | ||
108 | formats, subs = self._extract_formats_and_subs(video_id) | |
109 | ||
110 | if formats: | |
111 | return { | |
112 | 'id': video_id, | |
113 | 'formats': formats, | |
114 | 'subtitles': subs, | |
115 | 'title': self._generic_title(url), | |
116 | } | |
117 | ||
118 | ||
119 | class ERTFlixIE(ERTFlixBaseIE): | |
120 | IE_NAME = 'ertflix' | |
121 | IE_DESC = 'ERTFLIX videos' | |
1685d460 | 122 | _VALID_URL = r'https?://www\.ertflix\.gr/(?:[^/]+/)?(?:series|vod)/(?P<id>[a-z]{3}\.\d+)' |
2d49720f ZM |
123 | _TESTS = [{ |
124 | 'url': 'https://www.ertflix.gr/vod/vod.173258-aoratoi-ergates', | |
125 | 'md5': '6479d5e60fd7e520b07ba5411dcdd6e7', | |
126 | 'info_dict': { | |
127 | 'id': 'aoratoi-ergates', | |
128 | 'ext': 'mp4', | |
129 | 'title': 'md5:c1433d598fbba0211b0069021517f8b4', | |
130 | 'description': 'md5:01a64d113c31957eb7eb07719ab18ff4', | |
131 | 'thumbnail': r're:https?://.+\.jpg', | |
132 | 'episode_id': 'vod.173258', | |
133 | 'timestamp': 1639648800, | |
134 | 'upload_date': '20211216', | |
135 | 'duration': 3166, | |
136 | 'age_limit': 8, | |
137 | }, | |
138 | }, { | |
139 | 'url': 'https://www.ertflix.gr/series/ser.3448-monogramma', | |
140 | 'info_dict': { | |
141 | 'id': 'ser.3448', | |
142 | 'age_limit': 8, | |
143 | 'description': 'Η εκπομπή σαράντα ετών που σημάδεψε τον πολιτισμό μας.', | |
144 | 'title': 'Μονόγραμμα', | |
145 | }, | |
146 | 'playlist_mincount': 64, | |
147 | }, { | |
148 | 'url': 'https://www.ertflix.gr/series/ser.3448-monogramma?season=1', | |
149 | 'info_dict': { | |
150 | 'id': 'ser.3448', | |
151 | 'age_limit': 8, | |
152 | 'description': 'Η εκπομπή σαράντα ετών που σημάδεψε τον πολιτισμό μας.', | |
153 | 'title': 'Μονόγραμμα', | |
154 | }, | |
155 | 'playlist_count': 22, | |
156 | }, { | |
157 | 'url': 'https://www.ertflix.gr/series/ser.3448-monogramma?season=1&season=2021%20-%202022', | |
158 | 'info_dict': { | |
159 | 'id': 'ser.3448', | |
160 | 'age_limit': 8, | |
161 | 'description': 'Η εκπομπή σαράντα ετών που σημάδεψε τον πολιτισμό μας.', | |
162 | 'title': 'Μονόγραμμα', | |
163 | }, | |
164 | 'playlist_mincount': 36, | |
165 | }, { | |
166 | 'url': 'https://www.ertflix.gr/series/ser.164991-to-diktuo-1?season=1-9', | |
167 | 'info_dict': { | |
168 | 'id': 'ser.164991', | |
169 | 'age_limit': 8, | |
170 | 'description': 'Η πρώτη ελληνική εκπομπή με θεματολογία αποκλειστικά γύρω από το ίντερνετ.', | |
171 | 'title': 'Το δίκτυο', | |
172 | }, | |
173 | 'playlist_mincount': 9, | |
1685d460 | 174 | }, { |
175 | 'url': 'https://www.ertflix.gr/en/vod/vod.127652-ta-kalytera-mas-chronia-ep1-mia-volta-sto-feggari', | |
176 | 'only_matching': True, | |
2d49720f ZM |
177 | }] |
178 | ||
179 | def _extract_episode(self, episode): | |
180 | codename = try_get(episode, lambda x: x['Codename'], compat_str) | |
181 | title = episode.get('Title') | |
182 | description = clean_html(dict_get(episode, ('ShortDescription', 'TinyDescription', ))) | |
183 | if not codename or not title or not episode.get('HasPlayableStream', True): | |
184 | return | |
185 | thumbnail = next(( | |
186 | url_or_none(thumb.get('Url')) | |
187 | for thumb in variadic(dict_get(episode, ('Images', 'Image')) or {}) | |
188 | if thumb.get('IsMain')), | |
189 | None) | |
190 | return { | |
191 | '_type': 'url_transparent', | |
192 | 'thumbnail': thumbnail, | |
193 | 'id': codename, | |
194 | 'episode_id': episode.get('Id'), | |
195 | 'title': title, | |
196 | 'alt_title': episode.get('Subtitle'), | |
197 | 'description': description, | |
198 | 'timestamp': parse_iso8601(episode.get('PublishDate')), | |
199 | 'duration': episode.get('DurationSeconds'), | |
200 | 'age_limit': self._parse_age_rating(episode), | |
201 | 'url': 'ertflix:%s' % (codename, ), | |
202 | } | |
203 | ||
204 | @staticmethod | |
205 | def _parse_age_rating(info_dict): | |
206 | return parse_age_limit( | |
207 | info_dict.get('AgeRating') | |
208 | or (info_dict.get('IsAdultContent') and 18) | |
209 | or (info_dict.get('IsKidsContent') and 0)) | |
210 | ||
211 | def _extract_series(self, video_id, season_titles=None, season_numbers=None): | |
212 | media_info = self._call_api(video_id, method='Tile/GetSeriesDetails', id=video_id) | |
213 | ||
214 | series = try_get(media_info, lambda x: x['Series'], dict) or {} | |
215 | series_info = { | |
216 | 'age_limit': self._parse_age_rating(series), | |
217 | 'title': series.get('Title'), | |
218 | 'description': dict_get(series, ('ShortDescription', 'TinyDescription', )), | |
219 | } | |
220 | if season_numbers: | |
221 | season_titles = season_titles or [] | |
222 | for season in try_get(series, lambda x: x['Seasons'], list) or []: | |
223 | if season.get('SeasonNumber') in season_numbers and season.get('Title'): | |
224 | season_titles.append(season['Title']) | |
225 | ||
226 | def gen_episode(m_info, season_titles): | |
227 | for episode_group in try_get(m_info, lambda x: x['EpisodeGroups'], list) or []: | |
228 | if season_titles and episode_group.get('Title') not in season_titles: | |
229 | continue | |
230 | episodes = try_get(episode_group, lambda x: x['Episodes'], list) | |
231 | if not episodes: | |
232 | continue | |
233 | season_info = { | |
234 | 'season': episode_group.get('Title'), | |
235 | 'season_number': int_or_none(episode_group.get('SeasonNumber')), | |
236 | } | |
237 | try: | |
238 | episodes = [(int(ep['EpisodeNumber']), ep) for ep in episodes] | |
239 | episodes.sort() | |
240 | except (KeyError, ValueError): | |
241 | episodes = enumerate(episodes, 1) | |
242 | for n, episode in episodes: | |
243 | info = self._extract_episode(episode) | |
244 | if info is None: | |
245 | continue | |
246 | info['episode_number'] = n | |
247 | info.update(season_info) | |
248 | yield info | |
249 | ||
250 | return self.playlist_result( | |
251 | gen_episode(media_info, season_titles), playlist_id=video_id, **series_info) | |
252 | ||
253 | def _real_extract(self, url): | |
254 | video_id = self._match_id(url) | |
255 | if video_id.startswith('ser.'): | |
256 | param_season = parse_qs(url).get('season', [None]) | |
257 | param_season = [ | |
258 | (have_number, int_or_none(v) if have_number else str_or_none(v)) | |
259 | for have_number, v in | |
260 | [(int_or_none(ps) is not None, ps) for ps in param_season] | |
261 | if v is not None | |
262 | ] | |
263 | season_kwargs = { | |
264 | k: [v for is_num, v in param_season if is_num is c] or None | |
265 | for k, c in | |
266 | [('season_titles', False), ('season_numbers', True)] | |
267 | } | |
268 | return self._extract_series(video_id, **season_kwargs) | |
269 | ||
270 | return self._extract_episode(self._call_api_get_tiles(video_id)) | |
271 | ||
272 | ||
273 | class ERTWebtvEmbedIE(InfoExtractor): | |
274 | IE_NAME = 'ertwebtv:embed' | |
275 | IE_DESC = 'ert.gr webtv embedded videos' | |
276 | _BASE_PLAYER_URL_RE = re.escape('//www.ert.gr/webtv/live-uni/vod/dt-uni-vod.php') | |
277 | _VALID_URL = rf'https?:{_BASE_PLAYER_URL_RE}\?([^#]+&)?f=(?P<id>[^#&]+)' | |
278 | ||
279 | _TESTS = [{ | |
280 | 'url': 'https://www.ert.gr/webtv/live-uni/vod/dt-uni-vod.php?f=trailers/E2251_TO_DIKTYO_E09_16-01_1900.mp4&bgimg=/photos/2022/1/to_diktio_ep09_i_istoria_tou_diadiktiou_stin_Ellada_1021x576.jpg', | |
281 | 'md5': 'f9e9900c25c26f4ecfbddbb4b6305854', | |
282 | 'info_dict': { | |
283 | 'id': 'trailers/E2251_TO_DIKTYO_E09_16-01_1900.mp4', | |
284 | 'title': 'md5:914f06a73cd8b62fbcd6fb90c636e497', | |
285 | 'ext': 'mp4', | |
286 | 'thumbnail': 'https://program.ert.gr/photos/2022/1/to_diktio_ep09_i_istoria_tou_diadiktiou_stin_Ellada_1021x576.jpg' | |
287 | }, | |
288 | }] | |
289 | ||
290 | @classmethod | |
291 | def _extract_urls(cls, webpage): | |
292 | EMBED_URL_RE = rf'(?:https?:)?{cls._BASE_PLAYER_URL_RE}\?(?:(?!(?P=_q1)).)+' | |
293 | EMBED_RE = rf'<iframe[^>]+?src=(?P<_q1>["\'])(?P<url>{EMBED_URL_RE})(?P=_q1)' | |
294 | ||
295 | for mobj in re.finditer(EMBED_RE, webpage): | |
296 | url = unescapeHTML(mobj.group('url')) | |
297 | if not cls.suitable(url): | |
298 | continue | |
299 | yield url | |
300 | ||
301 | def _real_extract(self, url): | |
302 | video_id = self._match_id(url) | |
303 | formats, subs = self._extract_m3u8_formats_and_subtitles( | |
304 | f'https://mediastream.ert.gr/vodedge/_definst_/mp4:dvrorigin/{video_id}/playlist.m3u8', | |
305 | video_id, 'mp4') | |
306 | self._sort_formats(formats) | |
307 | thumbnail_id = parse_qs(url).get('bgimg', [None])[0] | |
308 | if thumbnail_id and not thumbnail_id.startswith('http'): | |
309 | thumbnail_id = f'https://program.ert.gr{thumbnail_id}' | |
310 | return { | |
311 | 'id': video_id, | |
312 | 'title': f'VOD - {video_id}', | |
313 | 'thumbnail': thumbnail_id, | |
314 | 'formats': formats, | |
315 | 'subtitles': subs, | |
316 | } |