]>
Commit | Line | Data |
---|---|---|
9b30cd3d E |
1 | import re |
2 | ||
e897bd82 | 3 | from .common import InfoExtractor |
9b30cd3d | 4 | from ..utils import ( |
e897bd82 | 5 | ExtractorError, |
9b30cd3d E |
6 | clean_html, |
7 | determine_ext, | |
9b30cd3d | 8 | float_or_none, |
e897bd82 | 9 | int_or_none, |
9b30cd3d E |
10 | js_to_json, |
11 | mimetype2ext, | |
12 | traverse_obj, | |
9b30cd3d | 13 | url_or_none, |
e897bd82 | 14 | urljoin, |
9b30cd3d E |
15 | ) |
16 | ||
17 | ||
18 | class RTVCPlayBaseIE(InfoExtractor): | |
19 | _BASE_VALID_URL = r'https?://(?:www\.)?rtvcplay\.co' | |
20 | ||
21 | def _extract_player_config(self, webpage, video_id): | |
22 | return self._search_json( | |
23 | r'<script\b[^>]*>[^<]*(?:var|let|const)\s+config\s*=', re.sub(r'"\s*\+\s*"', '', webpage), | |
24 | 'player_config', video_id, transform_source=js_to_json) | |
25 | ||
26 | def _extract_formats_and_subtitles_player_config(self, player_config, video_id): | |
27 | formats, subtitles = [], {} | |
28 | for source in traverse_obj(player_config, ('sources', ..., lambda _, v: url_or_none(v['url']))): | |
29 | ext = mimetype2ext(source.get('mimetype'), default=determine_ext(source['url'])) | |
30 | if ext == 'm3u8': | |
31 | fmts, subs = self._extract_m3u8_formats_and_subtitles( | |
32 | source['url'], video_id, 'mp4', fatal=False) | |
33 | formats.extend(fmts) | |
34 | self._merge_subtitles(subs, target=subtitles) | |
35 | else: | |
36 | formats.append({ | |
37 | 'url': source['url'], | |
38 | 'ext': ext, | |
39 | }) | |
40 | ||
41 | return formats, subtitles | |
42 | ||
43 | ||
44 | class RTVCPlayIE(RTVCPlayBaseIE): | |
45 | _VALID_URL = RTVCPlayBaseIE._BASE_VALID_URL + r'/(?P<category>(?!embed)[^/]+)/(?:[^?#]+/)?(?P<id>[\w-]+)' | |
46 | ||
47 | _TESTS = [{ | |
48 | 'url': 'https://www.rtvcplay.co/en-vivo/canal-institucional', | |
49 | 'info_dict': { | |
50 | 'id': 'canal-institucional', | |
51 | 'title': r're:^Canal Institucional', | |
52 | 'description': 'md5:eff9e548394175928059320c006031ea', | |
53 | 'thumbnail': r're:^https?://.*\.(?:jpg|png)', | |
54 | 'live_status': 'is_live', | |
55 | 'ext': 'mp4', | |
56 | }, | |
57 | 'params': { | |
58 | 'skip_download': 'Livestream', | |
59 | }, | |
60 | }, { | |
61 | 'url': 'https://www.rtvcplay.co/en-vivo/senal-colombia', | |
62 | 'info_dict': { | |
63 | 'id': 'senal-colombia', | |
64 | 'title': r're:^Señal Colombia', | |
65 | 'description': 'md5:799f16a401d97f40c33a2c6a3e2a507b', | |
66 | 'thumbnail': r're:^https?://.*\.(?:jpg|png)', | |
67 | 'live_status': 'is_live', | |
68 | 'ext': 'mp4', | |
69 | }, | |
70 | 'params': { | |
71 | 'skip_download': 'Livestream', | |
72 | }, | |
73 | }, { | |
74 | 'url': 'https://www.rtvcplay.co/en-vivo/radio-nacional', | |
75 | 'info_dict': { | |
76 | 'id': 'radio-nacional', | |
77 | 'title': r're:^Radio Nacional', | |
78 | 'description': 'md5:5de009bc6a9fa79d2a6cf0b73f977d53', | |
79 | 'thumbnail': r're:^https?://.*\.(?:jpg|png)', | |
80 | 'live_status': 'is_live', | |
81 | 'ext': 'mp4', | |
82 | }, | |
83 | 'params': { | |
84 | 'skip_download': 'Livestream', | |
85 | }, | |
86 | }, { | |
87 | 'url': 'https://www.rtvcplay.co/peliculas-ficcion/senoritas', | |
88 | 'md5': '1288ee6f6d1330d880f98bff2ed710a3', | |
89 | 'info_dict': { | |
90 | 'id': 'senoritas', | |
91 | 'title': 'Señoritas', | |
92 | 'description': 'md5:f095a2bb52cb6cf279daf6302f86fb32', | |
93 | 'thumbnail': r're:^https?://.*\.(?:jpg|png)', | |
94 | 'ext': 'mp4', | |
95 | }, | |
96 | }, { | |
97 | 'url': 'https://www.rtvcplay.co/competencias-basicas-ciudadanas-y-socioemocionales/profe-en-tu-casa/james-regresa-clases-28022022', | |
98 | 'md5': 'f040a7380a269ad633cf837384d5e9fc', | |
99 | 'info_dict': { | |
100 | 'id': 'james-regresa-clases-28022022', | |
101 | 'title': 'James regresa a clases - 28/02/2022', | |
102 | 'description': 'md5:c5dcdf757c7ab29305e8763c6007e675', | |
103 | 'ext': 'mp4', | |
104 | }, | |
105 | }, { | |
106 | 'url': 'https://www.rtvcplay.co/peliculas-documentales/llinas-el-cerebro-y-el-universo', | |
107 | 'info_dict': { | |
108 | 'id': 'llinas-el-cerebro-y-el-universo', | |
109 | 'title': 'Llinás, el cerebro y el universo', | |
110 | 'description': 'md5:add875bf2309bb52b3e8b9b06116d9b0', | |
111 | 'thumbnail': r're:^https?://.*\.(?:jpg|png)', | |
112 | }, | |
113 | 'playlist_mincount': 3, | |
114 | }, { | |
115 | 'url': 'https://www.rtvcplay.co/competencias-basicas-ciudadanas-y-socioemocionales/profe-en-tu-casa', | |
116 | 'info_dict': { | |
117 | 'id': 'profe-en-tu-casa', | |
118 | 'title': 'Profe en tu casa', | |
119 | 'description': 'md5:47dbe20e263194413b1db2a2805a4f2e', | |
120 | 'thumbnail': r're:^https?://.*\.(?:jpg|png)', | |
121 | }, | |
122 | 'playlist_mincount': 537, | |
123 | }, { | |
124 | 'url': 'https://www.rtvcplay.co/series-al-oido/relato-de-un-naufrago-una-travesia-del-periodismo-a-la-literatura', | |
125 | 'info_dict': { | |
126 | 'id': 'relato-de-un-naufrago-una-travesia-del-periodismo-a-la-literatura', | |
127 | 'title': 'Relato de un náufrago: una travesía del periodismo a la literatura', | |
128 | 'description': 'md5:6da28fdca4a5a568ea47ef65ef775603', | |
129 | 'thumbnail': r're:^https?://.*\.(?:jpg|png)', | |
130 | }, | |
131 | 'playlist_mincount': 5, | |
132 | }, { | |
133 | 'url': 'https://www.rtvcplay.co/series-al-oido/diez-versiones', | |
134 | 'info_dict': { | |
135 | 'id': 'diez-versiones', | |
136 | 'title': 'Diez versiones', | |
137 | 'description': 'md5:997471ed971cb3fd8e41969457675306', | |
138 | 'thumbnail': r're:^https?://.*\.(?:jpg|png)', | |
139 | }, | |
140 | 'playlist_mincount': 20, | |
141 | }] | |
142 | ||
143 | def _real_extract(self, url): | |
144 | video_id, category = self._match_valid_url(url).group('id', 'category') | |
145 | webpage = self._download_webpage(url, video_id) | |
146 | ||
147 | hydration = self._search_json( | |
148 | r'window\.__RTVCPLAY_STATE__\s*=', webpage, 'hydration', | |
149 | video_id, transform_source=js_to_json)['content']['currentContent'] | |
150 | ||
151 | asset_id = traverse_obj(hydration, ('video', 'assetid')) | |
152 | if asset_id: | |
153 | hls_url = hydration['base_url_hls'].replace('[node:field_asset_id]', asset_id) | |
154 | else: | |
155 | hls_url = traverse_obj(hydration, ('channel', 'hls')) | |
156 | ||
157 | metadata = traverse_obj(hydration, { | |
158 | 'title': 'title', | |
159 | 'description': 'description', | |
160 | 'thumbnail': ((('channel', 'image', 'logo'), ('resource', 'image', 'cover_desktop')), 'path'), | |
161 | }, get_all=False) | |
162 | ||
163 | # Probably it's a program's page | |
164 | if not hls_url: | |
165 | seasons = traverse_obj( | |
166 | hydration, ('widgets', lambda _, y: y['type'] == 'seasonList', 'contents'), | |
167 | get_all=False) | |
168 | if not seasons: | |
169 | podcast_episodes = hydration.get('audios') | |
170 | if not podcast_episodes: | |
171 | raise ExtractorError('Could not find asset_id nor program playlist nor podcast episodes') | |
172 | ||
173 | return self.playlist_result([ | |
174 | self.url_result(episode['file'], url_transparent=True, **traverse_obj(episode, { | |
175 | 'title': 'title', | |
176 | 'description': ('description', {clean_html}), | |
177 | 'episode_number': ('chapter_number', {float_or_none}, {int_or_none}), | |
178 | 'season_number': ('season', {int_or_none}), | |
179 | })) for episode in podcast_episodes], video_id, **metadata) | |
180 | ||
181 | entries = [self.url_result( | |
182 | urljoin(url, episode['slug']), url_transparent=True, | |
183 | **traverse_obj(season, { | |
184 | 'season': 'title', | |
185 | 'season_number': ('season', {int_or_none}), | |
186 | }), **traverse_obj(episode, { | |
187 | 'title': 'title', | |
188 | 'thumbnail': ('image', 'cover', 'path'), | |
189 | 'episode_number': ('chapter_number', {int_or_none}), | |
190 | })) for season in seasons for episode in traverse_obj(season, ('contents', ...))] | |
191 | ||
192 | return self.playlist_result(entries, video_id, **metadata) | |
193 | ||
194 | formats, subtitles = self._extract_m3u8_formats_and_subtitles(hls_url, video_id, 'mp4') | |
195 | ||
196 | return { | |
197 | 'id': video_id, | |
198 | 'formats': formats, | |
199 | 'subtitles': subtitles, | |
200 | 'is_live': category == 'en-vivo', | |
201 | **metadata, | |
202 | } | |
203 | ||
204 | ||
205 | class RTVCPlayEmbedIE(RTVCPlayBaseIE): | |
206 | _VALID_URL = RTVCPlayBaseIE._BASE_VALID_URL + r'/embed/(?P<id>[\w-]+)' | |
207 | ||
208 | _TESTS = [{ | |
209 | 'url': 'https://www.rtvcplay.co/embed/72b0e699-248b-4929-a4a8-3782702fa7f9', | |
210 | 'md5': 'ed529aeaee7aa2a72afe91ac7d1177a8', | |
211 | 'info_dict': { | |
212 | 'id': '72b0e699-248b-4929-a4a8-3782702fa7f9', | |
213 | 'title': 'Tráiler: Señoritas', | |
214 | 'thumbnail': r're:^https?://.*\.(?:jpg|png)', | |
215 | 'ext': 'mp4', | |
216 | } | |
217 | }] | |
218 | ||
219 | def _real_extract(self, url): | |
220 | video_id = self._match_id(url) | |
221 | webpage = self._download_webpage(url, video_id) | |
222 | ||
223 | player_config = self._extract_player_config(webpage, video_id) | |
224 | formats, subtitles = self._extract_formats_and_subtitles_player_config(player_config, video_id) | |
225 | ||
226 | asset_id = traverse_obj(player_config, ('rtvcplay', 'assetid')) | |
227 | metadata = {} if not asset_id else self._download_json( | |
228 | f'https://cms.rtvcplay.co/api/v1/video/asset-id/{asset_id}', video_id, fatal=False) | |
229 | ||
230 | return { | |
231 | 'id': video_id, | |
232 | 'formats': formats, | |
233 | 'subtitles': subtitles, | |
234 | **traverse_obj(metadata, { | |
235 | 'title': 'title', | |
236 | 'description': 'description', | |
237 | 'thumbnail': ('image', ..., 'thumbnail', 'path'), | |
238 | }, get_all=False) | |
239 | } | |
240 | ||
241 | ||
242 | class RTVCKalturaIE(RTVCPlayBaseIE): | |
243 | _VALID_URL = r'https?://media\.rtvc\.gov\.co/kalturartvc/(?P<id>[\w-]+)' | |
244 | ||
245 | _TESTS = [{ | |
246 | 'url': 'https://media.rtvc.gov.co/kalturartvc/indexSC.html', | |
247 | 'info_dict': { | |
248 | 'id': 'indexSC', | |
249 | 'title': r're:^Señal Colombia', | |
250 | 'description': 'md5:799f16a401d97f40c33a2c6a3e2a507b', | |
251 | 'thumbnail': r're:^https?://.*\.(?:jpg|png)', | |
252 | 'live_status': 'is_live', | |
253 | 'ext': 'mp4', | |
254 | }, | |
255 | 'params': { | |
256 | 'skip_download': 'Livestream', | |
257 | }, | |
258 | }] | |
259 | ||
260 | def _real_extract(self, url): | |
261 | video_id = self._match_id(url) | |
262 | webpage = self._download_webpage(url, video_id) | |
263 | ||
264 | player_config = self._extract_player_config(webpage, video_id) | |
265 | formats, subtitles = self._extract_formats_and_subtitles_player_config(player_config, video_id) | |
266 | ||
267 | channel_id = traverse_obj(player_config, ('rtvcplay', 'channelId')) | |
268 | metadata = {} if not channel_id else self._download_json( | |
269 | f'https://cms.rtvcplay.co/api/v1/taxonomy_term/streaming/{channel_id}', video_id, fatal=False) | |
270 | ||
271 | fmts, subs = self._extract_m3u8_formats_and_subtitles( | |
272 | traverse_obj(metadata, ('channel', 'hls')), video_id, 'mp4', fatal=False) | |
273 | formats.extend(fmts) | |
274 | self._merge_subtitles(subs, target=subtitles) | |
275 | ||
276 | return { | |
277 | 'id': video_id, | |
278 | 'formats': formats, | |
279 | 'subtitles': subtitles, | |
280 | 'is_live': True, | |
281 | **traverse_obj(metadata, { | |
282 | 'title': 'title', | |
283 | 'description': 'description', | |
284 | 'thumbnail': ('channel', 'image', 'logo', 'path'), | |
285 | }) | |
286 | } |