]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/ertgr.py
[cleanup] Add more ruff rules (#10149)
[yt-dlp.git] / yt_dlp / extractor / ertgr.py
CommitLineData
2d49720f
ZM
1import json
2import re
3
4from .common import InfoExtractor
2d49720f 5from ..utils import (
e897bd82 6 ExtractorError,
2d49720f
ZM
7 clean_html,
8 determine_ext,
2d49720f
ZM
9 dict_get,
10 int_or_none,
11 merge_dicts,
2d49720f
ZM
12 parse_age_limit,
13 parse_iso8601,
e897bd82 14 parse_qs,
2d49720f
ZM
15 str_or_none,
16 try_get,
2d49720f
ZM
17 url_or_none,
18 variadic,
19)
20
21
22class ERTFlixBaseIE(InfoExtractor):
23 def _call_api(
24 self, video_id, method='Player/AcquireContent', api_version=1,
25 param_headers=None, data=None, headers=None, **params):
26 platform_codename = {'platformCodename': 'www'}
27 headers_as_param = {'X-Api-Date-Format': 'iso', 'X-Api-Camel-Case': False}
28 headers_as_param.update(param_headers or {})
29 headers = headers or {}
30 if data:
31 headers['Content-Type'] = headers_as_param['Content-Type'] = 'application/json;charset=utf-8'
add96eb9 32 data = json.dumps(merge_dicts(platform_codename, data)).encode()
2d49720f
ZM
33 query = merge_dicts(
34 {} if data else platform_codename,
35 {'$headers': json.dumps(headers_as_param)},
36 params)
37 response = self._download_json(
add96eb9 38 f'https://api.app.ertflix.gr/v{api_version!s}/{method}',
2d49720f
ZM
39 video_id, fatal=False, query=query, data=data, headers=headers)
40 if try_get(response, lambda x: x['Result']['Success']) is True:
41 return response
42
43 def _call_api_get_tiles(self, video_id, *tile_ids):
add96eb9 44 requested_tile_ids = [video_id, *tile_ids]
2d49720f
ZM
45 requested_tiles = [{'Id': tile_id} for tile_id in requested_tile_ids]
46 tiles_response = self._call_api(
47 video_id, method='Tile/GetTiles', api_version=2,
48 data={'RequestedTiles': requested_tiles})
49 tiles = try_get(tiles_response, lambda x: x['Tiles'], list) or []
50 if tile_ids:
51 if sorted([tile['Id'] for tile in tiles]) != sorted(requested_tile_ids):
52 raise ExtractorError('Requested tiles not found', video_id=video_id)
53 return tiles
54 try:
55 return next(tile for tile in tiles if tile['Id'] == video_id)
56 except StopIteration:
57 raise ExtractorError('No matching tile found', video_id=video_id)
58
59
60class ERTFlixCodenameIE(ERTFlixBaseIE):
61 IE_NAME = 'ertflix:codename'
62 IE_DESC = 'ERTFLIX videos by codename'
63 _VALID_URL = r'ertflix:(?P<id>[\w-]+)'
64 _TESTS = [{
65 'url': 'ertflix:monogramma-praxitelis-tzanoylinos',
66 'md5': '5b9c2cd171f09126167e4082fc1dd0ef',
67 'info_dict': {
68 'id': 'monogramma-praxitelis-tzanoylinos',
69 'ext': 'mp4',
70 'title': 'md5:ef0b439902963d56c43ac83c3f41dd0e',
71 },
72 },
73 ]
74
9f14daf2 75 def _extract_formats_and_subs(self, video_id):
2d49720f
ZM
76 media_info = self._call_api(video_id, codename=video_id)
77 formats, subs = [], {}
78 for media_file in try_get(media_info, lambda x: x['MediaFiles'], list) or []:
79 for media in try_get(media_file, lambda x: x['Formats'], list) or []:
80 fmt_url = url_or_none(try_get(media, lambda x: x['Url']))
81 if not fmt_url:
82 continue
83 ext = determine_ext(fmt_url)
84 if ext == 'm3u8':
85 formats_, subs_ = self._extract_m3u8_formats_and_subtitles(
86 fmt_url, video_id, m3u8_id='hls', ext='mp4', fatal=False)
87 elif ext == 'mpd':
88 formats_, subs_ = self._extract_mpd_formats_and_subtitles(
89 fmt_url, video_id, mpd_id='dash', fatal=False)
90 else:
91 formats.append({
92 'url': fmt_url,
93 'format_id': str_or_none(media.get('Id')),
94 })
95 continue
96 formats.extend(formats_)
97 self._merge_subtitles(subs_, target=subs)
98
2d49720f
ZM
99 return formats, subs
100
101 def _real_extract(self, url):
102 video_id = self._match_id(url)
103
104 formats, subs = self._extract_formats_and_subs(video_id)
105
106 if formats:
107 return {
108 'id': video_id,
109 'formats': formats,
110 'subtitles': subs,
111 'title': self._generic_title(url),
112 }
113
114
115class ERTFlixIE(ERTFlixBaseIE):
116 IE_NAME = 'ertflix'
117 IE_DESC = 'ERTFLIX videos'
1685d460 118 _VALID_URL = r'https?://www\.ertflix\.gr/(?:[^/]+/)?(?:series|vod)/(?P<id>[a-z]{3}\.\d+)'
2d49720f
ZM
119 _TESTS = [{
120 'url': 'https://www.ertflix.gr/vod/vod.173258-aoratoi-ergates',
121 'md5': '6479d5e60fd7e520b07ba5411dcdd6e7',
122 'info_dict': {
123 'id': 'aoratoi-ergates',
124 'ext': 'mp4',
125 'title': 'md5:c1433d598fbba0211b0069021517f8b4',
126 'description': 'md5:01a64d113c31957eb7eb07719ab18ff4',
127 'thumbnail': r're:https?://.+\.jpg',
128 'episode_id': 'vod.173258',
129 'timestamp': 1639648800,
130 'upload_date': '20211216',
131 'duration': 3166,
132 'age_limit': 8,
133 },
134 }, {
135 'url': 'https://www.ertflix.gr/series/ser.3448-monogramma',
136 'info_dict': {
137 'id': 'ser.3448',
138 'age_limit': 8,
139 'description': 'Η εκπομπή σαράντα ετών που σημάδεψε τον πολιτισμό μας.',
140 'title': 'Μονόγραμμα',
141 },
142 'playlist_mincount': 64,
143 }, {
144 'url': 'https://www.ertflix.gr/series/ser.3448-monogramma?season=1',
145 'info_dict': {
146 'id': 'ser.3448',
147 'age_limit': 8,
148 'description': 'Η εκπομπή σαράντα ετών που σημάδεψε τον πολιτισμό μας.',
149 'title': 'Μονόγραμμα',
150 },
151 'playlist_count': 22,
152 }, {
153 'url': 'https://www.ertflix.gr/series/ser.3448-monogramma?season=1&season=2021%20-%202022',
154 'info_dict': {
155 'id': 'ser.3448',
156 'age_limit': 8,
157 'description': 'Η εκπομπή σαράντα ετών που σημάδεψε τον πολιτισμό μας.',
158 'title': 'Μονόγραμμα',
159 },
160 'playlist_mincount': 36,
161 }, {
162 'url': 'https://www.ertflix.gr/series/ser.164991-to-diktuo-1?season=1-9',
163 'info_dict': {
164 'id': 'ser.164991',
165 'age_limit': 8,
166 'description': 'Η πρώτη ελληνική εκπομπή με θεματολογία αποκλειστικά γύρω από το ίντερνετ.',
167 'title': 'Το δίκτυο',
168 },
169 'playlist_mincount': 9,
1685d460 170 }, {
171 'url': 'https://www.ertflix.gr/en/vod/vod.127652-ta-kalytera-mas-chronia-ep1-mia-volta-sto-feggari',
172 'only_matching': True,
2d49720f
ZM
173 }]
174
175 def _extract_episode(self, episode):
add96eb9 176 codename = try_get(episode, lambda x: x['Codename'], str)
2d49720f 177 title = episode.get('Title')
add96eb9 178 description = clean_html(dict_get(episode, ('ShortDescription', 'TinyDescription')))
2d49720f
ZM
179 if not codename or not title or not episode.get('HasPlayableStream', True):
180 return
181 thumbnail = next((
182 url_or_none(thumb.get('Url'))
183 for thumb in variadic(dict_get(episode, ('Images', 'Image')) or {})
184 if thumb.get('IsMain')),
185 None)
186 return {
187 '_type': 'url_transparent',
188 'thumbnail': thumbnail,
189 'id': codename,
190 'episode_id': episode.get('Id'),
191 'title': title,
192 'alt_title': episode.get('Subtitle'),
193 'description': description,
194 'timestamp': parse_iso8601(episode.get('PublishDate')),
195 'duration': episode.get('DurationSeconds'),
196 'age_limit': self._parse_age_rating(episode),
add96eb9 197 'url': f'ertflix:{codename}',
2d49720f
ZM
198 }
199
200 @staticmethod
201 def _parse_age_rating(info_dict):
202 return parse_age_limit(
203 info_dict.get('AgeRating')
204 or (info_dict.get('IsAdultContent') and 18)
205 or (info_dict.get('IsKidsContent') and 0))
206
207 def _extract_series(self, video_id, season_titles=None, season_numbers=None):
208 media_info = self._call_api(video_id, method='Tile/GetSeriesDetails', id=video_id)
209
210 series = try_get(media_info, lambda x: x['Series'], dict) or {}
211 series_info = {
212 'age_limit': self._parse_age_rating(series),
213 'title': series.get('Title'),
add96eb9 214 'description': dict_get(series, ('ShortDescription', 'TinyDescription')),
2d49720f
ZM
215 }
216 if season_numbers:
217 season_titles = season_titles or []
218 for season in try_get(series, lambda x: x['Seasons'], list) or []:
219 if season.get('SeasonNumber') in season_numbers and season.get('Title'):
220 season_titles.append(season['Title'])
221
222 def gen_episode(m_info, season_titles):
223 for episode_group in try_get(m_info, lambda x: x['EpisodeGroups'], list) or []:
224 if season_titles and episode_group.get('Title') not in season_titles:
225 continue
226 episodes = try_get(episode_group, lambda x: x['Episodes'], list)
227 if not episodes:
228 continue
229 season_info = {
230 'season': episode_group.get('Title'),
231 'season_number': int_or_none(episode_group.get('SeasonNumber')),
232 }
233 try:
234 episodes = [(int(ep['EpisodeNumber']), ep) for ep in episodes]
235 episodes.sort()
236 except (KeyError, ValueError):
237 episodes = enumerate(episodes, 1)
238 for n, episode in episodes:
239 info = self._extract_episode(episode)
240 if info is None:
241 continue
242 info['episode_number'] = n
243 info.update(season_info)
244 yield info
245
246 return self.playlist_result(
247 gen_episode(media_info, season_titles), playlist_id=video_id, **series_info)
248
249 def _real_extract(self, url):
250 video_id = self._match_id(url)
251 if video_id.startswith('ser.'):
252 param_season = parse_qs(url).get('season', [None])
253 param_season = [
254 (have_number, int_or_none(v) if have_number else str_or_none(v))
255 for have_number, v in
256 [(int_or_none(ps) is not None, ps) for ps in param_season]
257 if v is not None
258 ]
259 season_kwargs = {
260 k: [v for is_num, v in param_season if is_num is c] or None
261 for k, c in
262 [('season_titles', False), ('season_numbers', True)]
263 }
264 return self._extract_series(video_id, **season_kwargs)
265
266 return self._extract_episode(self._call_api_get_tiles(video_id))
267
268
269class ERTWebtvEmbedIE(InfoExtractor):
270 IE_NAME = 'ertwebtv:embed'
271 IE_DESC = 'ert.gr webtv embedded videos'
272 _BASE_PLAYER_URL_RE = re.escape('//www.ert.gr/webtv/live-uni/vod/dt-uni-vod.php')
273 _VALID_URL = rf'https?:{_BASE_PLAYER_URL_RE}\?([^#]+&)?f=(?P<id>[^#&]+)'
bfd973ec 274 _EMBED_REGEX = [rf'<iframe[^>]+?src=(?P<_q1>["\'])(?P<url>(?:https?:)?{_BASE_PLAYER_URL_RE}\?(?:(?!(?P=_q1)).)+)(?P=_q1)']
2d49720f
ZM
275
276 _TESTS = [{
277 'url': 'https://www.ert.gr/webtv/live-uni/vod/dt-uni-vod.php?f=trailers/E2251_TO_DIKTYO_E09_16-01_1900.mp4&bgimg=/photos/2022/1/to_diktio_ep09_i_istoria_tou_diadiktiou_stin_Ellada_1021x576.jpg',
278 'md5': 'f9e9900c25c26f4ecfbddbb4b6305854',
279 'info_dict': {
280 'id': 'trailers/E2251_TO_DIKTYO_E09_16-01_1900.mp4',
281 'title': 'md5:914f06a73cd8b62fbcd6fb90c636e497',
282 'ext': 'mp4',
add96eb9 283 'thumbnail': 'https://program.ert.gr/photos/2022/1/to_diktio_ep09_i_istoria_tou_diadiktiou_stin_Ellada_1021x576.jpg',
2d49720f
ZM
284 },
285 }]
286
2d49720f
ZM
287 def _real_extract(self, url):
288 video_id = self._match_id(url)
289 formats, subs = self._extract_m3u8_formats_and_subtitles(
290 f'https://mediastream.ert.gr/vodedge/_definst_/mp4:dvrorigin/{video_id}/playlist.m3u8',
291 video_id, 'mp4')
2d49720f
ZM
292 thumbnail_id = parse_qs(url).get('bgimg', [None])[0]
293 if thumbnail_id and not thumbnail_id.startswith('http'):
294 thumbnail_id = f'https://program.ert.gr{thumbnail_id}'
295 return {
296 'id': video_id,
297 'title': f'VOD - {video_id}',
298 'thumbnail': thumbnail_id,
299 'formats': formats,
300 'subtitles': subs,
301 }