]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/mediastream.py
[ie/Canal1,CaracolTvPlay] Add extractors (#7151)
[yt-dlp.git] / yt_dlp / extractor / mediastream.py
CommitLineData
3d79ebc8
E
1import re
2
3from .common import InfoExtractor
2d5a8c5d 4from ..utils import (
03025b6e 5 clean_html,
2d5a8c5d 6 remove_end,
2d5a8c5d 7 traverse_obj,
8 urljoin,
9)
3d79ebc8
E
10
11
03025b6e 12class MediaStreamBaseIE(InfoExtractor):
13 _EMBED_BASE_URL = 'https://mdstrm.com/embed'
14 _BASE_URL_RE = r'https?://mdstrm\.com/(?:embed|live-stream)'
15
16 def _extract_mediastream_urls(self, webpage):
635ae31f 17 yield from traverse_obj(list(self._yield_json_ld(webpage, None, fatal=False)), (
03025b6e 18 lambda _, v: v['@type'] == 'VideoObject', ('embedUrl', 'contentUrl'),
19 {lambda x: x if re.match(rf'{self._BASE_URL_RE}/\w+', x) else None}))
20
21 for mobj in re.finditer(r'<script[^>]+>[^>]*playerMdStream\.mdstreamVideo\(\s*[\'"](?P<video_id>\w+)', webpage):
22 yield f'{self._EMBED_BASE_URL}/{mobj.group("video_id")}'
23
24 yield from re.findall(
25 rf'<iframe[^>]+\bsrc="({self._BASE_URL_RE}/\w+)', webpage)
26
27 for mobj in re.finditer(
28 r'''(?x)
29 <(?:div|ps-mediastream)[^>]+
30 (class="[^"]*MediaStreamVideoPlayer)[^"]*"[^>]+
31 data-video-id="(?P<video_id>\w+)"
32 (?:\s*data-video-type="(?P<video_type>[^"]+))?
33 (?:[^>]*>\s*<div[^>]+\1[^"]*"[^>]+data-mediastream=["\'][^>]+
34 https://mdstrm\.com/(?P<live>live-stream))?
35 ''', webpage):
36
37 video_type = 'live-stream' if mobj.group('video_type') == 'live' or mobj.group('live') else 'embed'
38 yield f'https://mdstrm.com/{video_type}/{mobj.group("video_id")}'
39
40
41class MediaStreamIE(MediaStreamBaseIE):
42 _VALID_URL = MediaStreamBaseIE._BASE_URL_RE + r'/(?P<id>\w+)'
3d79ebc8
E
43
44 _TESTS = [{
45 'url': 'https://mdstrm.com/embed/6318e3f1d1d316083ae48831',
46 'md5': '97b4f2634b8e8612cc574dfcd504df05',
47 'info_dict': {
48 'id': '6318e3f1d1d316083ae48831',
49 'title': 'Video: Así fue el despido de Thomas Tuchel del Chelsea',
50 'description': 'md5:358ce1e1396010d50a1ece1be3633c95',
51 'thumbnail': r're:^https?://[^?#]+6318e3f1d1d316083ae48831',
52 'ext': 'mp4',
53 },
03025b6e 54 'params': {'skip_download': 'm3u8'},
3d79ebc8
E
55 }]
56
57 _WEBPAGE_TESTS = [{
58 'url': 'https://www.multimedios.com/video/costa-rica-tv-en-vivo/v2616',
59 'info_dict': {
60 'id': '5a7b1e63a8da282c34d65445',
61 'title': 're:mmtv-costarica',
62 'description': 'mmtv-costarica',
63 'thumbnail': 're:^https?://[^?#]+5a7b1e63a8da282c34d65445',
64 'ext': 'mp4',
65 'live_status': 'is_live',
66 },
03025b6e 67 'params': {'skip_download': 'Livestream'},
3d79ebc8
E
68 }, {
69 'url': 'https://www.multimedios.com/television/clases-de-llaves-y-castigos-quien-sabe-mas',
70 'md5': 'de31f0b1ecc321fb35bf22d58734ea40',
71 'info_dict': {
72 'id': '63731bab8ec9b308a2c9ed28',
73 'title': 'Clases de llaves y castigos ¿Quién sabe más?',
74 'description': 'md5:1b49aa1ee5a4b32fbd66104b2d629e9d',
75 'thumbnail': 're:^https?://[^?#]+63731bab8ec9b308a2c9ed28',
76 'ext': 'mp4',
77 },
03025b6e 78 'params': {'skip_download': 'm3u8'},
3d79ebc8
E
79 }, {
80 'url': 'https://www.americatv.com.pe/videos/esto-es-guerra/facundo-gonzalez-sufrio-fuerte-golpe-durante-competencia-frente-hugo-garcia-eeg-noticia-139120',
81 'info_dict': {
82 'id': '63756df1c638b008a5659dec',
83 'title': 'Facundo González sufrió fuerte golpe durante competencia frente a Hugo García en EEG',
84 'description': 'md5:9490c034264afd756eef7b2c3adee69e',
85 'thumbnail': 're:^https?://[^?#]+63756df1c638b008a5659dec',
86 'ext': 'mp4',
87 },
03025b6e 88 'params': {'skip_download': 'm3u8'},
3d79ebc8
E
89 }, {
90 'url': 'https://www.americatv.com.pe/videos/al-fondo-hay-sitio/nuevas-lomas-town-bernardo-mata-se-enfrento-sujeto-luchar-amor-macarena-noticia-139083',
91 'info_dict': {
92 'id': '637307669609130f74cd3a6e',
93 'title': 'Las Nuevas Lomas Town: Bernardo De La Mata se enfrentó a sujeto para luchar por el amor de Macarena',
94 'description': 'md5:60d71772f1e1496923539ae58aa17124',
95 'thumbnail': 're:^https?://[^?#]+637307669609130f74cd3a6e',
96 'ext': 'mp4',
97 },
03025b6e 98 'params': {'skip_download': 'm3u8'},
3d79ebc8
E
99 }]
100
03025b6e 101 def _extract_from_webpage(self, url, webpage):
102 for embed_url in self._extract_mediastream_urls(webpage):
103 yield self.url_result(embed_url, MediaStreamIE, None)
3d79ebc8
E
104
105 def _real_extract(self, url):
106 video_id = self._match_id(url)
107 webpage = self._download_webpage(url, video_id)
108
b3febedb
E
109 for message in [
110 'Debido a tu ubicación no puedes ver el contenido',
111 'You are not allowed to watch this video: Geo Fencing Restriction'
112 ]:
113 if message in webpage:
114 self.raise_geo_restricted()
3d79ebc8 115
03025b6e 116 player_config = self._search_json(r'window\.MDSTRM\.OPTIONS\s*=', webpage, 'metadata', video_id)
3d79ebc8
E
117
118 formats, subtitles = [], {}
119 for video_format in player_config['src']:
120 if video_format == 'hls':
121 fmts, subs = self._extract_m3u8_formats_and_subtitles(player_config['src'][video_format], video_id)
122 formats.extend(fmts)
123 self._merge_subtitles(subs, target=subtitles)
124 elif video_format == 'mpd':
125 fmts, subs = self._extract_mpd_formats_and_subtitles(player_config['src'][video_format], video_id)
126 formats.extend(fmts)
127 self._merge_subtitles(subs, target=subtitles)
128 else:
129 formats.append({
130 'url': player_config['src'][video_format],
131 })
132
133 return {
134 'id': video_id,
135 'title': self._og_search_title(webpage) or player_config.get('title'),
136 'description': self._og_search_description(webpage),
137 'formats': formats,
138 'subtitles': subtitles,
139 'is_live': player_config.get('type') == 'live',
140 'thumbnail': self._og_search_thumbnail(webpage),
141 }
142
143
03025b6e 144class WinSportsVideoIE(MediaStreamBaseIE):
2d5a8c5d 145 _VALID_URL = r'https?://www\.winsports\.co/videos/(?P<id>[\w-]+)'
3d79ebc8
E
146
147 _TESTS = [{
148 'url': 'https://www.winsports.co/videos/siempre-castellanos-gran-atajada-del-portero-cardenal-para-evitar-la-caida-de-su-arco-60536',
149 'info_dict': {
150 'id': '62dc8357162c4b0821fcfb3c',
2d5a8c5d 151 'display_id': 'siempre-castellanos-gran-atajada-del-portero-cardenal-para-evitar-la-caida-de-su-arco-60536',
3d79ebc8
E
152 'title': '¡Siempre Castellanos! Gran atajada del portero \'cardenal\' para evitar la caída de su arco',
153 'description': 'md5:eb811b2b2882bdc59431732c06b905f2',
154 'thumbnail': r're:^https?://[^?#]+62dc8357162c4b0821fcfb3c',
155 'ext': 'mp4',
156 },
2d5a8c5d 157 'params': {'skip_download': 'm3u8'},
3d79ebc8
E
158 }, {
159 'url': 'https://www.winsports.co/videos/observa-aqui-los-goles-del-empate-entre-tolima-y-nacional-60548',
160 'info_dict': {
161 'id': '62dcb875ef12a5526790b552',
2d5a8c5d 162 'display_id': 'observa-aqui-los-goles-del-empate-entre-tolima-y-nacional-60548',
3d79ebc8
E
163 'title': 'Observa aquí los goles del empate entre Tolima y Nacional',
164 'description': 'md5:b19402ba6e46558b93fd24b873eea9c9',
165 'thumbnail': r're:^https?://[^?#]+62dcb875ef12a5526790b552',
166 'ext': 'mp4',
167 },
2d5a8c5d 168 'params': {'skip_download': 'm3u8'},
169 }, {
170 'url': 'https://www.winsports.co/videos/equidad-vuelve-defender-su-arco-de-remates-de-junior',
171 'info_dict': {
172 'id': '63fa7eca72f1741ad3a4d515',
173 'display_id': 'equidad-vuelve-defender-su-arco-de-remates-de-junior',
174 'title': '⚽ Equidad vuelve a defender su arco de remates de Junior',
175 'description': 'Remate de Sierra',
176 'thumbnail': r're:^https?://[^?#]+63fa7eca72f1741ad3a4d515',
177 'ext': 'mp4',
178 },
179 'params': {'skip_download': 'm3u8'},
03025b6e 180 }, {
181 'url': 'https://www.winsports.co/videos/bucaramanga-se-quedo-con-el-grito-de-gol-en-la-garganta',
182 'info_dict': {
183 'id': '6402adb62bbf3b18d454e1b0',
184 'display_id': 'bucaramanga-se-quedo-con-el-grito-de-gol-en-la-garganta',
185 'title': '⚽Bucaramanga se quedó con el grito de gol en la garganta',
186 'description': 'Gol anulado Bucaramanga',
187 'thumbnail': r're:^https?://[^?#]+6402adb62bbf3b18d454e1b0',
188 'ext': 'mp4',
189 },
190 'params': {'skip_download': 'm3u8'},
3d79ebc8
E
191 }]
192
193 def _real_extract(self, url):
2d5a8c5d 194 display_id = self._match_id(url)
3d79ebc8 195 webpage = self._download_webpage(url, display_id)
03025b6e 196 data = self._search_json(
197 r'<script\s*[^>]+data-drupal-selector="drupal-settings-json">', webpage, 'data', display_id)
198
199 mediastream_url = urljoin(f'{self._EMBED_BASE_URL}/', (
200 traverse_obj(data, (
201 (('settings', 'mediastream_formatter', ..., 'mediastream_id'), 'url'), {str}), get_all=False)
202 or next(self._extract_mediastream_urls(webpage), None)))
203
204 if not mediastream_url:
2d5a8c5d 205 self.raise_no_formats('No MediaStream embed found in webpage')
3d79ebc8 206
03025b6e 207 title = clean_html(remove_end(
208 self._search_json_ld(webpage, display_id, expected_type='VideoObject', default={}).get('title')
209 or self._og_search_title(webpage), '| Win Sports'))
210
3d79ebc8 211 return self.url_result(
03025b6e 212 mediastream_url, MediaStreamIE, display_id, url_transparent=True, display_id=display_id, video_title=title)