]>
jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/rtvcplay.py
3 from .common
import InfoExtractor
, ExtractorError
17 class RTVCPlayBaseIE(InfoExtractor
):
18 _BASE_VALID_URL
= r
'https?://(?:www\.)?rtvcplay\.co'
20 def _extract_player_config(self
, webpage
, video_id
):
21 return self
._search
_json
(
22 r
'<script\b[^>]*>[^<]*(?:var|let|const)\s+config\s*=', re
.sub(r
'"\s*\+\s*"', '', webpage
),
23 'player_config', video_id
, transform_source
=js_to_json
)
25 def _extract_formats_and_subtitles_player_config(self
, player_config
, video_id
):
26 formats
, subtitles
= [], {}
27 for source
in traverse_obj(player_config
, ('sources', ..., lambda _
, v
: url_or_none(v
['url']))):
28 ext
= mimetype2ext(source
.get('mimetype'), default
=determine_ext(source
['url']))
30 fmts
, subs
= self
._extract
_m
3u8_formats
_and
_subtitles
(
31 source
['url'], video_id
, 'mp4', fatal
=False)
33 self
._merge
_subtitles
(subs
, target
=subtitles
)
40 return formats
, subtitles
43 class RTVCPlayIE(RTVCPlayBaseIE
):
44 _VALID_URL
= RTVCPlayBaseIE
._BASE
_VALID
_URL
+ r
'/(?P<category>(?!embed)[^/]+)/(?:[^?#]+/)?(?P<id>[\w-]+)'
47 'url': 'https://www.rtvcplay.co/en-vivo/canal-institucional',
49 'id': 'canal-institucional',
50 'title': r
're:^Canal Institucional',
51 'description': 'md5:eff9e548394175928059320c006031ea',
52 'thumbnail': r
're:^https?://.*\.(?:jpg|png)',
53 'live_status': 'is_live',
57 'skip_download': 'Livestream',
60 'url': 'https://www.rtvcplay.co/en-vivo/senal-colombia',
62 'id': 'senal-colombia',
63 'title': r
're:^Señal Colombia',
64 'description': 'md5:799f16a401d97f40c33a2c6a3e2a507b',
65 'thumbnail': r
're:^https?://.*\.(?:jpg|png)',
66 'live_status': 'is_live',
70 'skip_download': 'Livestream',
73 'url': 'https://www.rtvcplay.co/en-vivo/radio-nacional',
75 'id': 'radio-nacional',
76 'title': r
're:^Radio Nacional',
77 'description': 'md5:5de009bc6a9fa79d2a6cf0b73f977d53',
78 'thumbnail': r
're:^https?://.*\.(?:jpg|png)',
79 'live_status': 'is_live',
83 'skip_download': 'Livestream',
86 'url': 'https://www.rtvcplay.co/peliculas-ficcion/senoritas',
87 'md5': '1288ee6f6d1330d880f98bff2ed710a3',
91 'description': 'md5:f095a2bb52cb6cf279daf6302f86fb32',
92 'thumbnail': r
're:^https?://.*\.(?:jpg|png)',
96 'url': 'https://www.rtvcplay.co/competencias-basicas-ciudadanas-y-socioemocionales/profe-en-tu-casa/james-regresa-clases-28022022',
97 'md5': 'f040a7380a269ad633cf837384d5e9fc',
99 'id': 'james-regresa-clases-28022022',
100 'title': 'James regresa a clases - 28/02/2022',
101 'description': 'md5:c5dcdf757c7ab29305e8763c6007e675',
105 'url': 'https://www.rtvcplay.co/peliculas-documentales/llinas-el-cerebro-y-el-universo',
107 'id': 'llinas-el-cerebro-y-el-universo',
108 'title': 'Llinás, el cerebro y el universo',
109 'description': 'md5:add875bf2309bb52b3e8b9b06116d9b0',
110 'thumbnail': r
're:^https?://.*\.(?:jpg|png)',
112 'playlist_mincount': 3,
114 'url': 'https://www.rtvcplay.co/competencias-basicas-ciudadanas-y-socioemocionales/profe-en-tu-casa',
116 'id': 'profe-en-tu-casa',
117 'title': 'Profe en tu casa',
118 'description': 'md5:47dbe20e263194413b1db2a2805a4f2e',
119 'thumbnail': r
're:^https?://.*\.(?:jpg|png)',
121 'playlist_mincount': 537,
123 'url': 'https://www.rtvcplay.co/series-al-oido/relato-de-un-naufrago-una-travesia-del-periodismo-a-la-literatura',
125 'id': 'relato-de-un-naufrago-una-travesia-del-periodismo-a-la-literatura',
126 'title': 'Relato de un náufrago: una travesía del periodismo a la literatura',
127 'description': 'md5:6da28fdca4a5a568ea47ef65ef775603',
128 'thumbnail': r
're:^https?://.*\.(?:jpg|png)',
130 'playlist_mincount': 5,
132 'url': 'https://www.rtvcplay.co/series-al-oido/diez-versiones',
134 'id': 'diez-versiones',
135 'title': 'Diez versiones',
136 'description': 'md5:997471ed971cb3fd8e41969457675306',
137 'thumbnail': r
're:^https?://.*\.(?:jpg|png)',
139 'playlist_mincount': 20,
142 def _real_extract(self
, url
):
143 video_id
, category
= self
._match
_valid
_url
(url
).group('id', 'category')
144 webpage
= self
._download
_webpage
(url
, video_id
)
146 hydration
= self
._search
_json
(
147 r
'window\.__RTVCPLAY_STATE__\s*=', webpage
, 'hydration',
148 video_id
, transform_source
=js_to_json
)['content']['currentContent']
150 asset_id
= traverse_obj(hydration
, ('video', 'assetid'))
152 hls_url
= hydration
['base_url_hls'].replace('[node:field_asset_id]', asset_id
)
154 hls_url
= traverse_obj(hydration
, ('channel', 'hls'))
156 metadata
= traverse_obj(hydration
, {
158 'description': 'description',
159 'thumbnail': ((('channel', 'image', 'logo'), ('resource', 'image', 'cover_desktop')), 'path'),
162 # Probably it's a program's page
164 seasons
= traverse_obj(
165 hydration
, ('widgets', lambda _
, y
: y
['type'] == 'seasonList', 'contents'),
168 podcast_episodes
= hydration
.get('audios')
169 if not podcast_episodes
:
170 raise ExtractorError('Could not find asset_id nor program playlist nor podcast episodes')
172 return self
.playlist_result([
173 self
.url_result(episode
['file'], url_transparent
=True, **traverse_obj(episode
, {
175 'description': ('description', {clean_html}
),
176 'episode_number': ('chapter_number', {float_or_none}
, {int_or_none}
),
177 'season_number': ('season', {int_or_none}
),
178 })) for episode
in podcast_episodes
], video_id
, **metadata
)
180 entries
= [self
.url_result(
181 urljoin(url
, episode
['slug']), url_transparent
=True,
182 **traverse_obj(season
, {
184 'season_number': ('season', {int_or_none}
),
185 }), **traverse_obj(episode
, {
187 'thumbnail': ('image', 'cover', 'path'),
188 'episode_number': ('chapter_number', {int_or_none}
),
189 })) for season
in seasons
for episode
in traverse_obj(season
, ('contents', ...))]
191 return self
.playlist_result(entries
, video_id
, **metadata
)
193 formats
, subtitles
= self
._extract
_m
3u8_formats
_and
_subtitles
(hls_url
, video_id
, 'mp4')
198 'subtitles': subtitles
,
199 'is_live': category
== 'en-vivo',
204 class RTVCPlayEmbedIE(RTVCPlayBaseIE
):
205 _VALID_URL
= RTVCPlayBaseIE
._BASE
_VALID
_URL
+ r
'/embed/(?P<id>[\w-]+)'
208 'url': 'https://www.rtvcplay.co/embed/72b0e699-248b-4929-a4a8-3782702fa7f9',
209 'md5': 'ed529aeaee7aa2a72afe91ac7d1177a8',
211 'id': '72b0e699-248b-4929-a4a8-3782702fa7f9',
212 'title': 'Tráiler: Señoritas',
213 'thumbnail': r
're:^https?://.*\.(?:jpg|png)',
218 def _real_extract(self
, url
):
219 video_id
= self
._match
_id
(url
)
220 webpage
= self
._download
_webpage
(url
, video_id
)
222 player_config
= self
._extract
_player
_config
(webpage
, video_id
)
223 formats
, subtitles
= self
._extract
_formats
_and
_subtitles
_player
_config
(player_config
, video_id
)
225 asset_id
= traverse_obj(player_config
, ('rtvcplay', 'assetid'))
226 metadata
= {} if not asset_id
else self
._download
_json
(
227 f
'https://cms.rtvcplay.co/api/v1/video/asset-id/{asset_id}', video_id
, fatal
=False)
232 'subtitles': subtitles
,
233 **traverse_obj(metadata
, {
235 'description': 'description',
236 'thumbnail': ('image', ..., 'thumbnail', 'path'),
241 class RTVCKalturaIE(RTVCPlayBaseIE
):
242 _VALID_URL
= r
'https?://media\.rtvc\.gov\.co/kalturartvc/(?P<id>[\w-]+)'
245 'url': 'https://media.rtvc.gov.co/kalturartvc/indexSC.html',
248 'title': r
're:^Señal Colombia',
249 'description': 'md5:799f16a401d97f40c33a2c6a3e2a507b',
250 'thumbnail': r
're:^https?://.*\.(?:jpg|png)',
251 'live_status': 'is_live',
255 'skip_download': 'Livestream',
259 def _real_extract(self
, url
):
260 video_id
= self
._match
_id
(url
)
261 webpage
= self
._download
_webpage
(url
, video_id
)
263 player_config
= self
._extract
_player
_config
(webpage
, video_id
)
264 formats
, subtitles
= self
._extract
_formats
_and
_subtitles
_player
_config
(player_config
, video_id
)
266 channel_id
= traverse_obj(player_config
, ('rtvcplay', 'channelId'))
267 metadata
= {} if not channel_id
else self
._download
_json
(
268 f
'https://cms.rtvcplay.co/api/v1/taxonomy_term/streaming/{channel_id}', video_id
, fatal
=False)
270 fmts
, subs
= self
._extract
_m
3u8_formats
_and
_subtitles
(
271 traverse_obj(metadata
, ('channel', 'hls')), video_id
, 'mp4', fatal
=False)
273 self
._merge
_subtitles
(subs
, target
=subtitles
)
278 'subtitles': subtitles
,
280 **traverse_obj(metadata
, {
282 'description': 'description',
283 'thumbnail': ('channel', 'image', 'logo', 'path'),