]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/telecaribe.py
[ie/orf:on] Improve extraction (#9677)
[yt-dlp.git] / yt_dlp / extractor / telecaribe.py
1 import re
2
3 from .common import InfoExtractor
4 from ..utils import traverse_obj
5
6
7 class TelecaribePlayIE(InfoExtractor):
8 _VALID_URL = r'https?://(?:www\.)?play\.telecaribe\.co/(?P<id>[\w-]+)'
9 _TESTS = [{
10 'url': 'https://www.play.telecaribe.co/breicok',
11 'info_dict': {
12 'id': 'breicok',
13 'title': 'Breicok',
14 },
15 'playlist_count': 7,
16 }, {
17 'url': 'https://www.play.telecaribe.co/si-fue-gol-de-yepes',
18 'info_dict': {
19 'id': 'si-fue-gol-de-yepes',
20 'title': 'Sí Fue Gol de Yepes',
21 },
22 'playlist_count': 6,
23 }, {
24 'url': 'https://www.play.telecaribe.co/ciudad-futura',
25 'info_dict': {
26 'id': 'ciudad-futura',
27 'title': 'Ciudad Futura',
28 },
29 'playlist_count': 10,
30 }, {
31 'url': 'https://www.play.telecaribe.co/live',
32 'info_dict': {
33 'id': 'live',
34 'title': r're:^Señal en vivo',
35 'live_status': 'is_live',
36 'ext': 'mp4',
37 },
38 'params': {
39 'skip_download': 'Livestream',
40 }
41 }, {
42 'url': 'https://www.play.telecaribe.co/liveplus',
43 'info_dict': {
44 'id': 'liveplus',
45 'title': r're:^Señal en vivo Plus',
46 'live_status': 'is_live',
47 'ext': 'mp4',
48 },
49 'params': {
50 'skip_download': 'Livestream',
51 },
52 'skip': 'Geo-restricted to Colombia',
53 }]
54
55 def _download_player_webpage(self, webpage, display_id):
56 page_id = self._search_regex(
57 (r'window\.firstPageId\s*=\s*["\']([^"\']+)', r'<div[^>]+id\s*=\s*"pageBackground_([^"]+)'),
58 webpage, 'page_id')
59
60 props = self._download_json(self._search_regex(
61 rf'<link[^>]+href\s*=\s*"([^"]+)"[^>]+id\s*=\s*"features_{page_id}"',
62 webpage, 'json_props_url'), display_id)['props']['render']['compProps']
63
64 return self._download_webpage(traverse_obj(props, (..., 'url'))[-1], display_id)
65
66 def _get_clean_title(self, title):
67 return re.sub(r'\s*\|\s*Telecaribe\s*VOD', '', title or '').strip() or None
68
69 def _real_extract(self, url):
70 display_id = self._match_id(url)
71 webpage = self._download_webpage(url, display_id)
72 player = self._download_player_webpage(webpage, display_id)
73
74 livestream_url = self._search_regex(
75 r'(?:let|const|var)\s+source\s*=\s*["\']([^"\']+)', player, 'm3u8 url', default=None)
76
77 if not livestream_url:
78 return self.playlist_from_matches(
79 re.findall(r'<a[^>]+href\s*=\s*"([^"]+\.mp4)', player), display_id,
80 self._get_clean_title(self._og_search_title(webpage)))
81
82 formats, subtitles = self._extract_m3u8_formats_and_subtitles(
83 livestream_url, display_id, 'mp4', live=True)
84
85 return {
86 'id': display_id,
87 'title': self._get_clean_title(self._og_search_title(webpage)),
88 'formats': formats,
89 'subtitles': subtitles,
90 'is_live': True,
91 }