]>
Commit | Line | Data |
---|---|---|
9b30cd3d E |
1 | import re |
2 | ||
3 | from .common import InfoExtractor, ExtractorError | |
4 | from ..utils import ( | |
5 | clean_html, | |
6 | determine_ext, | |
7 | int_or_none, | |
8 | float_or_none, | |
9 | js_to_json, | |
10 | mimetype2ext, | |
11 | traverse_obj, | |
12 | urljoin, | |
13 | url_or_none, | |
14 | ) | |
15 | ||
16 | ||
17 | class RTVCPlayBaseIE(InfoExtractor): | |
18 | _BASE_VALID_URL = r'https?://(?:www\.)?rtvcplay\.co' | |
19 | ||
20 | def _extract_player_config(self, webpage, video_id): | |
21 | return self._search_json( | |
22 | r'<script\b[^>]*>[^<]*(?:var|let|const)\s+config\s*=', re.sub(r'"\s*\+\s*"', '', webpage), | |
23 | 'player_config', video_id, transform_source=js_to_json) | |
24 | ||
25 | def _extract_formats_and_subtitles_player_config(self, player_config, video_id): | |
26 | formats, subtitles = [], {} | |
27 | for source in traverse_obj(player_config, ('sources', ..., lambda _, v: url_or_none(v['url']))): | |
28 | ext = mimetype2ext(source.get('mimetype'), default=determine_ext(source['url'])) | |
29 | if ext == 'm3u8': | |
30 | fmts, subs = self._extract_m3u8_formats_and_subtitles( | |
31 | source['url'], video_id, 'mp4', fatal=False) | |
32 | formats.extend(fmts) | |
33 | self._merge_subtitles(subs, target=subtitles) | |
34 | else: | |
35 | formats.append({ | |
36 | 'url': source['url'], | |
37 | 'ext': ext, | |
38 | }) | |
39 | ||
40 | return formats, subtitles | |
41 | ||
42 | ||
43 | class RTVCPlayIE(RTVCPlayBaseIE): | |
44 | _VALID_URL = RTVCPlayBaseIE._BASE_VALID_URL + r'/(?P<category>(?!embed)[^/]+)/(?:[^?#]+/)?(?P<id>[\w-]+)' | |
45 | ||
46 | _TESTS = [{ | |
47 | 'url': 'https://www.rtvcplay.co/en-vivo/canal-institucional', | |
48 | 'info_dict': { | |
49 | 'id': 'canal-institucional', | |
50 | 'title': r're:^Canal Institucional', | |
51 | 'description': 'md5:eff9e548394175928059320c006031ea', | |
52 | 'thumbnail': r're:^https?://.*\.(?:jpg|png)', | |
53 | 'live_status': 'is_live', | |
54 | 'ext': 'mp4', | |
55 | }, | |
56 | 'params': { | |
57 | 'skip_download': 'Livestream', | |
58 | }, | |
59 | }, { | |
60 | 'url': 'https://www.rtvcplay.co/en-vivo/senal-colombia', | |
61 | 'info_dict': { | |
62 | 'id': 'senal-colombia', | |
63 | 'title': r're:^Señal Colombia', | |
64 | 'description': 'md5:799f16a401d97f40c33a2c6a3e2a507b', | |
65 | 'thumbnail': r're:^https?://.*\.(?:jpg|png)', | |
66 | 'live_status': 'is_live', | |
67 | 'ext': 'mp4', | |
68 | }, | |
69 | 'params': { | |
70 | 'skip_download': 'Livestream', | |
71 | }, | |
72 | }, { | |
73 | 'url': 'https://www.rtvcplay.co/en-vivo/radio-nacional', | |
74 | 'info_dict': { | |
75 | 'id': 'radio-nacional', | |
76 | 'title': r're:^Radio Nacional', | |
77 | 'description': 'md5:5de009bc6a9fa79d2a6cf0b73f977d53', | |
78 | 'thumbnail': r're:^https?://.*\.(?:jpg|png)', | |
79 | 'live_status': 'is_live', | |
80 | 'ext': 'mp4', | |
81 | }, | |
82 | 'params': { | |
83 | 'skip_download': 'Livestream', | |
84 | }, | |
85 | }, { | |
86 | 'url': 'https://www.rtvcplay.co/peliculas-ficcion/senoritas', | |
87 | 'md5': '1288ee6f6d1330d880f98bff2ed710a3', | |
88 | 'info_dict': { | |
89 | 'id': 'senoritas', | |
90 | 'title': 'Señoritas', | |
91 | 'description': 'md5:f095a2bb52cb6cf279daf6302f86fb32', | |
92 | 'thumbnail': r're:^https?://.*\.(?:jpg|png)', | |
93 | 'ext': 'mp4', | |
94 | }, | |
95 | }, { | |
96 | 'url': 'https://www.rtvcplay.co/competencias-basicas-ciudadanas-y-socioemocionales/profe-en-tu-casa/james-regresa-clases-28022022', | |
97 | 'md5': 'f040a7380a269ad633cf837384d5e9fc', | |
98 | 'info_dict': { | |
99 | 'id': 'james-regresa-clases-28022022', | |
100 | 'title': 'James regresa a clases - 28/02/2022', | |
101 | 'description': 'md5:c5dcdf757c7ab29305e8763c6007e675', | |
102 | 'ext': 'mp4', | |
103 | }, | |
104 | }, { | |
105 | 'url': 'https://www.rtvcplay.co/peliculas-documentales/llinas-el-cerebro-y-el-universo', | |
106 | 'info_dict': { | |
107 | 'id': 'llinas-el-cerebro-y-el-universo', | |
108 | 'title': 'Llinás, el cerebro y el universo', | |
109 | 'description': 'md5:add875bf2309bb52b3e8b9b06116d9b0', | |
110 | 'thumbnail': r're:^https?://.*\.(?:jpg|png)', | |
111 | }, | |
112 | 'playlist_mincount': 3, | |
113 | }, { | |
114 | 'url': 'https://www.rtvcplay.co/competencias-basicas-ciudadanas-y-socioemocionales/profe-en-tu-casa', | |
115 | 'info_dict': { | |
116 | 'id': 'profe-en-tu-casa', | |
117 | 'title': 'Profe en tu casa', | |
118 | 'description': 'md5:47dbe20e263194413b1db2a2805a4f2e', | |
119 | 'thumbnail': r're:^https?://.*\.(?:jpg|png)', | |
120 | }, | |
121 | 'playlist_mincount': 537, | |
122 | }, { | |
123 | 'url': 'https://www.rtvcplay.co/series-al-oido/relato-de-un-naufrago-una-travesia-del-periodismo-a-la-literatura', | |
124 | 'info_dict': { | |
125 | 'id': 'relato-de-un-naufrago-una-travesia-del-periodismo-a-la-literatura', | |
126 | 'title': 'Relato de un náufrago: una travesía del periodismo a la literatura', | |
127 | 'description': 'md5:6da28fdca4a5a568ea47ef65ef775603', | |
128 | 'thumbnail': r're:^https?://.*\.(?:jpg|png)', | |
129 | }, | |
130 | 'playlist_mincount': 5, | |
131 | }, { | |
132 | 'url': 'https://www.rtvcplay.co/series-al-oido/diez-versiones', | |
133 | 'info_dict': { | |
134 | 'id': 'diez-versiones', | |
135 | 'title': 'Diez versiones', | |
136 | 'description': 'md5:997471ed971cb3fd8e41969457675306', | |
137 | 'thumbnail': r're:^https?://.*\.(?:jpg|png)', | |
138 | }, | |
139 | 'playlist_mincount': 20, | |
140 | }] | |
141 | ||
142 | def _real_extract(self, url): | |
143 | video_id, category = self._match_valid_url(url).group('id', 'category') | |
144 | webpage = self._download_webpage(url, video_id) | |
145 | ||
146 | hydration = self._search_json( | |
147 | r'window\.__RTVCPLAY_STATE__\s*=', webpage, 'hydration', | |
148 | video_id, transform_source=js_to_json)['content']['currentContent'] | |
149 | ||
150 | asset_id = traverse_obj(hydration, ('video', 'assetid')) | |
151 | if asset_id: | |
152 | hls_url = hydration['base_url_hls'].replace('[node:field_asset_id]', asset_id) | |
153 | else: | |
154 | hls_url = traverse_obj(hydration, ('channel', 'hls')) | |
155 | ||
156 | metadata = traverse_obj(hydration, { | |
157 | 'title': 'title', | |
158 | 'description': 'description', | |
159 | 'thumbnail': ((('channel', 'image', 'logo'), ('resource', 'image', 'cover_desktop')), 'path'), | |
160 | }, get_all=False) | |
161 | ||
162 | # Probably it's a program's page | |
163 | if not hls_url: | |
164 | seasons = traverse_obj( | |
165 | hydration, ('widgets', lambda _, y: y['type'] == 'seasonList', 'contents'), | |
166 | get_all=False) | |
167 | if not seasons: | |
168 | podcast_episodes = hydration.get('audios') | |
169 | if not podcast_episodes: | |
170 | raise ExtractorError('Could not find asset_id nor program playlist nor podcast episodes') | |
171 | ||
172 | return self.playlist_result([ | |
173 | self.url_result(episode['file'], url_transparent=True, **traverse_obj(episode, { | |
174 | 'title': 'title', | |
175 | 'description': ('description', {clean_html}), | |
176 | 'episode_number': ('chapter_number', {float_or_none}, {int_or_none}), | |
177 | 'season_number': ('season', {int_or_none}), | |
178 | })) for episode in podcast_episodes], video_id, **metadata) | |
179 | ||
180 | entries = [self.url_result( | |
181 | urljoin(url, episode['slug']), url_transparent=True, | |
182 | **traverse_obj(season, { | |
183 | 'season': 'title', | |
184 | 'season_number': ('season', {int_or_none}), | |
185 | }), **traverse_obj(episode, { | |
186 | 'title': 'title', | |
187 | 'thumbnail': ('image', 'cover', 'path'), | |
188 | 'episode_number': ('chapter_number', {int_or_none}), | |
189 | })) for season in seasons for episode in traverse_obj(season, ('contents', ...))] | |
190 | ||
191 | return self.playlist_result(entries, video_id, **metadata) | |
192 | ||
193 | formats, subtitles = self._extract_m3u8_formats_and_subtitles(hls_url, video_id, 'mp4') | |
194 | ||
195 | return { | |
196 | 'id': video_id, | |
197 | 'formats': formats, | |
198 | 'subtitles': subtitles, | |
199 | 'is_live': category == 'en-vivo', | |
200 | **metadata, | |
201 | } | |
202 | ||
203 | ||
204 | class RTVCPlayEmbedIE(RTVCPlayBaseIE): | |
205 | _VALID_URL = RTVCPlayBaseIE._BASE_VALID_URL + r'/embed/(?P<id>[\w-]+)' | |
206 | ||
207 | _TESTS = [{ | |
208 | 'url': 'https://www.rtvcplay.co/embed/72b0e699-248b-4929-a4a8-3782702fa7f9', | |
209 | 'md5': 'ed529aeaee7aa2a72afe91ac7d1177a8', | |
210 | 'info_dict': { | |
211 | 'id': '72b0e699-248b-4929-a4a8-3782702fa7f9', | |
212 | 'title': 'Tráiler: Señoritas', | |
213 | 'thumbnail': r're:^https?://.*\.(?:jpg|png)', | |
214 | 'ext': 'mp4', | |
215 | } | |
216 | }] | |
217 | ||
218 | def _real_extract(self, url): | |
219 | video_id = self._match_id(url) | |
220 | webpage = self._download_webpage(url, video_id) | |
221 | ||
222 | player_config = self._extract_player_config(webpage, video_id) | |
223 | formats, subtitles = self._extract_formats_and_subtitles_player_config(player_config, video_id) | |
224 | ||
225 | asset_id = traverse_obj(player_config, ('rtvcplay', 'assetid')) | |
226 | metadata = {} if not asset_id else self._download_json( | |
227 | f'https://cms.rtvcplay.co/api/v1/video/asset-id/{asset_id}', video_id, fatal=False) | |
228 | ||
229 | return { | |
230 | 'id': video_id, | |
231 | 'formats': formats, | |
232 | 'subtitles': subtitles, | |
233 | **traverse_obj(metadata, { | |
234 | 'title': 'title', | |
235 | 'description': 'description', | |
236 | 'thumbnail': ('image', ..., 'thumbnail', 'path'), | |
237 | }, get_all=False) | |
238 | } | |
239 | ||
240 | ||
241 | class RTVCKalturaIE(RTVCPlayBaseIE): | |
242 | _VALID_URL = r'https?://media\.rtvc\.gov\.co/kalturartvc/(?P<id>[\w-]+)' | |
243 | ||
244 | _TESTS = [{ | |
245 | 'url': 'https://media.rtvc.gov.co/kalturartvc/indexSC.html', | |
246 | 'info_dict': { | |
247 | 'id': 'indexSC', | |
248 | 'title': r're:^Señal Colombia', | |
249 | 'description': 'md5:799f16a401d97f40c33a2c6a3e2a507b', | |
250 | 'thumbnail': r're:^https?://.*\.(?:jpg|png)', | |
251 | 'live_status': 'is_live', | |
252 | 'ext': 'mp4', | |
253 | }, | |
254 | 'params': { | |
255 | 'skip_download': 'Livestream', | |
256 | }, | |
257 | }] | |
258 | ||
259 | def _real_extract(self, url): | |
260 | video_id = self._match_id(url) | |
261 | webpage = self._download_webpage(url, video_id) | |
262 | ||
263 | player_config = self._extract_player_config(webpage, video_id) | |
264 | formats, subtitles = self._extract_formats_and_subtitles_player_config(player_config, video_id) | |
265 | ||
266 | channel_id = traverse_obj(player_config, ('rtvcplay', 'channelId')) | |
267 | metadata = {} if not channel_id else self._download_json( | |
268 | f'https://cms.rtvcplay.co/api/v1/taxonomy_term/streaming/{channel_id}', video_id, fatal=False) | |
269 | ||
270 | fmts, subs = self._extract_m3u8_formats_and_subtitles( | |
271 | traverse_obj(metadata, ('channel', 'hls')), video_id, 'mp4', fatal=False) | |
272 | formats.extend(fmts) | |
273 | self._merge_subtitles(subs, target=subtitles) | |
274 | ||
275 | return { | |
276 | 'id': video_id, | |
277 | 'formats': formats, | |
278 | 'subtitles': subtitles, | |
279 | 'is_live': True, | |
280 | **traverse_obj(metadata, { | |
281 | 'title': 'title', | |
282 | 'description': 'description', | |
283 | 'thumbnail': ('channel', 'image', 'logo', 'path'), | |
284 | }) | |
285 | } |