]>
Commit | Line | Data |
---|---|---|
91a6adde | 1 | import base64 |
10db0d2f | 2 | import io |
ac668111 | 3 | import struct |
91a6adde JMF |
4 | |
5 | from .common import InfoExtractor | |
6 | from ..utils import ( | |
ce73839f | 7 | ExtractorError, |
ac668111 | 8 | determine_ext, |
f3bff94c | 9 | float_or_none, |
10db0d2f | 10 | qualities, |
2b9faf55 | 11 | remove_end, |
3e769682 | 12 | remove_start, |
5edb8dfe | 13 | try_get, |
91a6adde JMF |
14 | ) |
15 | ||
2b9faf55 | 16 | |
91a6adde JMF |
17 | class RTVEALaCartaIE(InfoExtractor): |
18 | IE_NAME = 'rtve.es:alacarta' | |
19 | IE_DESC = 'RTVE a la carta' | |
92519402 | 20 | _VALID_URL = r'https?://(?:www\.)?rtve\.es/(m/)?(alacarta/videos|filmoteca)/[^/]+/[^/]+/(?P<id>\d+)' |
91a6adde | 21 | |
2b9faf55 | 22 | _TESTS = [{ |
91a6adde | 23 | 'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/', |
9aeaf730 | 24 | 'md5': '1d49b7e1ca7a7502c56a4bf1b60f1b43', |
91a6adde JMF |
25 | 'info_dict': { |
26 | 'id': '2491869', | |
27 | 'ext': 'mp4', | |
28 | 'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia', | |
f3bff94c | 29 | 'duration': 5024.566, |
10db0d2f | 30 | 'series': 'Balonmano', |
91a6adde | 31 | }, |
10db0d2f | 32 | 'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'], |
2b9faf55 PH |
33 | }, { |
34 | 'note': 'Live stream', | |
35 | 'url': 'http://www.rtve.es/alacarta/videos/television/24h-live/1694255/', | |
36 | 'info_dict': { | |
37 | 'id': '1694255', | |
10db0d2f | 38 | 'ext': 'mp4', |
39 | 'title': 're:^24H LIVE [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', | |
40 | 'is_live': True, | |
41 | }, | |
42 | 'params': { | |
43 | 'skip_download': 'live stream', | |
cc57bd33 | 44 | }, |
2c53bd51 GF |
45 | }, { |
46 | 'url': 'http://www.rtve.es/alacarta/videos/servir-y-proteger/servir-proteger-capitulo-104/4236788/', | |
10db0d2f | 47 | 'md5': 'd850f3c8731ea53952ebab489cf81cbf', |
2c53bd51 GF |
48 | 'info_dict': { |
49 | 'id': '4236788', | |
50 | 'ext': 'mp4', | |
10db0d2f | 51 | 'title': 'Servir y proteger - Capítulo 104', |
2c53bd51 GF |
52 | 'duration': 3222.0, |
53 | }, | |
10db0d2f | 54 | 'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'], |
cd596028 JMF |
55 | }, { |
56 | 'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve', | |
57 | 'only_matching': True, | |
4c718d3c JMF |
58 | }, { |
59 | 'url': 'http://www.rtve.es/filmoteca/no-do/not-1-introduccion-primer-noticiario-espanol/1465256/', | |
60 | 'only_matching': True, | |
2b9faf55 | 61 | }] |
91a6adde | 62 | |
e9f65f87 | 63 | def _real_initialize(self): |
add96eb9 | 64 | user_agent_b64 = base64.b64encode(self.get_param('http_headers')['User-Agent'].encode()).decode('utf-8') |
10db0d2f | 65 | self._manager = self._download_json( |
e9f65f87 | 66 | 'http://www.rtve.es/odin/loki/' + user_agent_b64, |
10db0d2f | 67 | None, 'Fetching manager info')['manager'] |
68 | ||
69 | @staticmethod | |
70 | def _decrypt_url(png): | |
add96eb9 | 71 | encrypted_data = io.BytesIO(base64.b64decode(png)[8:]) |
10db0d2f | 72 | while True: |
ac668111 | 73 | length = struct.unpack('!I', encrypted_data.read(4))[0] |
10db0d2f | 74 | chunk_type = encrypted_data.read(4) |
75 | if chunk_type == b'IEND': | |
76 | break | |
77 | data = encrypted_data.read(length) | |
78 | if chunk_type == b'tEXt': | |
79 | alphabet_data, text = data.split(b'\0') | |
80 | quality, url_data = text.split(b'%%') | |
81 | alphabet = [] | |
82 | e = 0 | |
83 | d = 0 | |
19a03940 | 84 | for l in alphabet_data.decode('iso-8859-1'): |
10db0d2f | 85 | if d == 0: |
86 | alphabet.append(l) | |
87 | d = e = (e + 1) % 4 | |
88 | else: | |
89 | d -= 1 | |
90 | url = '' | |
91 | f = 0 | |
92 | e = 3 | |
93 | b = 1 | |
19a03940 | 94 | for letter in url_data.decode('iso-8859-1'): |
10db0d2f | 95 | if f == 0: |
96 | l = int(letter) * 10 | |
97 | f = 1 | |
98 | else: | |
99 | if e == 0: | |
100 | l += int(letter) | |
101 | url += alphabet[l] | |
102 | e = (b + 3) % 4 | |
103 | f = 0 | |
104 | b += 1 | |
105 | else: | |
106 | e -= 1 | |
107 | ||
108 | yield quality.decode(), url | |
109 | encrypted_data.read(4) # CRC | |
110 | ||
111 | def _extract_png_formats(self, video_id): | |
112 | png = self._download_webpage( | |
add96eb9 | 113 | f'http://www.rtve.es/ztnr/movil/thumbnail/{self._manager}/videos/{video_id}.png', |
10db0d2f | 114 | video_id, 'Downloading url information', query={'q': 'v2'}) |
115 | q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL']) | |
116 | formats = [] | |
117 | for quality, video_url in self._decrypt_url(png): | |
118 | ext = determine_ext(video_url) | |
119 | if ext == 'm3u8': | |
120 | formats.extend(self._extract_m3u8_formats( | |
121 | video_url, video_id, 'mp4', 'm3u8_native', | |
122 | m3u8_id='hls', fatal=False)) | |
123 | elif ext == 'mpd': | |
124 | formats.extend(self._extract_mpd_formats( | |
125 | video_url, video_id, 'dash', fatal=False)) | |
126 | else: | |
127 | formats.append({ | |
128 | 'format_id': quality, | |
129 | 'quality': q(quality), | |
130 | 'url': video_url, | |
131 | }) | |
10db0d2f | 132 | return formats |
e9f65f87 | 133 | |
91a6adde | 134 | def _real_extract(self, url): |
10db0d2f | 135 | video_id = self._match_id(url) |
91a6adde | 136 | info = self._download_json( |
add96eb9 | 137 | f'http://www.rtve.es/api/videos/{video_id}/config/alacarta_videos.json', |
91a6adde | 138 | video_id)['page']['items'][0] |
ce73839f JMF |
139 | if info['state'] == 'DESPU': |
140 | raise ExtractorError('The video is no longer available', expected=True) | |
10db0d2f | 141 | title = info['title'].strip() |
142 | formats = self._extract_png_formats(video_id) | |
2c53bd51 | 143 | |
25ac63ed | 144 | subtitles = None |
10db0d2f | 145 | sbt_file = info.get('sbtFile') |
146 | if sbt_file: | |
147 | subtitles = self.extract_subtitles(video_id, sbt_file) | |
148 | ||
149 | is_live = info.get('live') is True | |
25ac63ed | 150 | |
91a6adde JMF |
151 | return { |
152 | 'id': video_id, | |
39ca3b5c | 153 | 'title': title, |
2c53bd51 | 154 | 'formats': formats, |
2b9faf55 | 155 | 'thumbnail': info.get('image'), |
25ac63ed | 156 | 'subtitles': subtitles, |
10db0d2f | 157 | 'duration': float_or_none(info.get('duration'), 1000), |
158 | 'is_live': is_live, | |
159 | 'series': info.get('programTitle'), | |
2b9faf55 PH |
160 | } |
161 | ||
25ac63ed JMF |
162 | def _get_subtitles(self, video_id, sub_file): |
163 | subs = self._download_json( | |
164 | sub_file + '.json', video_id, | |
165 | 'Downloading subtitles info')['page']['items'] | |
9c665ab7 PH |
166 | return dict( |
167 | (s['lang'], [{'ext': 'vtt', 'url': s['src']}]) | |
25ac63ed JMF |
168 | for s in subs) |
169 | ||
b68eedba | 170 | |
6368e2e6 | 171 | class RTVEAudioIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE |
5edb8dfe | 172 | IE_NAME = 'rtve.es:audio' |
173 | IE_DESC = 'RTVE audio' | |
174 | _VALID_URL = r'https?://(?:www\.)?rtve\.es/(alacarta|play)/audios/[^/]+/[^/]+/(?P<id>[0-9]+)' | |
175 | ||
176 | _TESTS = [{ | |
177 | 'url': 'https://www.rtve.es/alacarta/audios/a-hombros-de-gigantes/palabra-ingeniero-codigos-informaticos-27-04-21/5889192/', | |
178 | 'md5': 'ae06d27bff945c4e87a50f89f6ce48ce', | |
179 | 'info_dict': { | |
180 | 'id': '5889192', | |
181 | 'ext': 'mp3', | |
182 | 'title': 'Códigos informáticos', | |
183 | 'thumbnail': r're:https?://.+/1598856591583.jpg', | |
184 | 'duration': 349.440, | |
185 | 'series': 'A hombros de gigantes', | |
186 | }, | |
187 | }, { | |
188 | 'url': 'https://www.rtve.es/play/audios/en-radio-3/ignatius-farray/5791165/', | |
189 | 'md5': '072855ab89a9450e0ba314c717fa5ebc', | |
190 | 'info_dict': { | |
191 | 'id': '5791165', | |
192 | 'ext': 'mp3', | |
193 | 'title': 'Ignatius Farray', | |
194 | 'thumbnail': r're:https?://.+/1613243011863.jpg', | |
195 | 'duration': 3559.559, | |
add96eb9 | 196 | 'series': 'En Radio 3', |
5edb8dfe | 197 | }, |
198 | }, { | |
199 | 'url': 'https://www.rtve.es/play/audios/frankenstein-o-el-moderno-prometeo/capitulo-26-ultimo-muerte-victor-juan-jose-plans-mary-shelley/6082623/', | |
200 | 'md5': '0eadab248cc8dd193fa5765712e84d5c', | |
201 | 'info_dict': { | |
202 | 'id': '6082623', | |
203 | 'ext': 'mp3', | |
204 | 'title': 'Capítulo 26 y último: La muerte de Victor', | |
205 | 'thumbnail': r're:https?://.+/1632147445707.jpg', | |
206 | 'duration': 3174.086, | |
add96eb9 | 207 | 'series': 'Frankenstein o el moderno Prometeo', |
5edb8dfe | 208 | }, |
209 | }] | |
210 | ||
211 | def _extract_png_formats(self, audio_id): | |
212 | """ | |
213 | This function retrieves media related png thumbnail which obfuscate | |
214 | valuable information about the media. This information is decrypted | |
215 | via base class _decrypt_url function providing media quality and | |
216 | media url | |
217 | """ | |
218 | png = self._download_webpage( | |
add96eb9 | 219 | f'http://www.rtve.es/ztnr/movil/thumbnail/{self._manager}/audios/{audio_id}.png', |
5edb8dfe | 220 | audio_id, 'Downloading url information', query={'q': 'v2'}) |
221 | q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL']) | |
222 | formats = [] | |
223 | for quality, audio_url in self._decrypt_url(png): | |
224 | ext = determine_ext(audio_url) | |
225 | if ext == 'm3u8': | |
226 | formats.extend(self._extract_m3u8_formats( | |
227 | audio_url, audio_id, 'mp4', 'm3u8_native', | |
228 | m3u8_id='hls', fatal=False)) | |
229 | elif ext == 'mpd': | |
230 | formats.extend(self._extract_mpd_formats( | |
231 | audio_url, audio_id, 'dash', fatal=False)) | |
232 | else: | |
233 | formats.append({ | |
234 | 'format_id': quality, | |
235 | 'quality': q(quality), | |
236 | 'url': audio_url, | |
237 | }) | |
5edb8dfe | 238 | return formats |
239 | ||
240 | def _real_extract(self, url): | |
241 | audio_id = self._match_id(url) | |
242 | info = self._download_json( | |
add96eb9 | 243 | f'https://www.rtve.es/api/audios/{audio_id}.json', |
5edb8dfe | 244 | audio_id)['page']['items'][0] |
245 | ||
246 | return { | |
247 | 'id': audio_id, | |
248 | 'title': info['title'].strip(), | |
249 | 'thumbnail': info.get('thumbnail'), | |
250 | 'duration': float_or_none(info.get('duration'), 1000), | |
251 | 'series': try_get(info, lambda x: x['programInfo']['title']), | |
252 | 'formats': self._extract_png_formats(audio_id), | |
253 | } | |
254 | ||
255 | ||
6368e2e6 | 256 | class RTVEInfantilIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE |
b68eedba JMF |
257 | IE_NAME = 'rtve.es:infantil' |
258 | IE_DESC = 'RTVE infantil' | |
10db0d2f | 259 | _VALID_URL = r'https?://(?:www\.)?rtve\.es/infantil/serie/[^/]+/video/[^/]+/(?P<id>[0-9]+)/' |
d5b55939 EF |
260 | |
261 | _TESTS = [{ | |
262 | 'url': 'http://www.rtve.es/infantil/serie/cleo/video/maneras-vivir/3040283/', | |
10db0d2f | 263 | 'md5': '5747454717aedf9f9fdf212d1bcfc48d', |
d5b55939 EF |
264 | 'info_dict': { |
265 | 'id': '3040283', | |
266 | 'ext': 'mp4', | |
267 | 'title': 'Maneras de vivir', | |
10db0d2f | 268 | 'thumbnail': r're:https?://.+/1426182947956\.JPG', |
d5b55939 EF |
269 | 'duration': 357.958, |
270 | }, | |
10db0d2f | 271 | 'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'], |
b68eedba | 272 | }] |
d5b55939 | 273 | |
d5b55939 | 274 | |
6368e2e6 | 275 | class RTVELiveIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE |
2b9faf55 PH |
276 | IE_NAME = 'rtve.es:live' |
277 | IE_DESC = 'RTVE.es live streams' | |
92519402 | 278 | _VALID_URL = r'https?://(?:www\.)?rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)' |
2b9faf55 PH |
279 | |
280 | _TESTS = [{ | |
3e769682 | 281 | 'url': 'http://www.rtve.es/directo/la-1/', |
2b9faf55 | 282 | 'info_dict': { |
3e769682 JMF |
283 | 'id': 'la-1', |
284 | 'ext': 'mp4', | |
10db0d2f | 285 | 'title': 're:^La 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', |
2b9faf55 PH |
286 | }, |
287 | 'params': { | |
288 | 'skip_download': 'live stream', | |
add96eb9 | 289 | }, |
2b9faf55 PH |
290 | }] |
291 | ||
292 | def _real_extract(self, url): | |
5ad28e7f | 293 | mobj = self._match_valid_url(url) |
2b9faf55 PH |
294 | video_id = mobj.group('id') |
295 | ||
296 | webpage = self._download_webpage(url, video_id) | |
3e769682 JMF |
297 | title = remove_end(self._og_search_title(webpage), ' en directo en RTVE.es') |
298 | title = remove_start(title, 'Estoy viendo ') | |
2b9faf55 PH |
299 | |
300 | vidplayer_id = self._search_regex( | |
b63005f5 S |
301 | (r'playerId=player([0-9]+)', |
302 | r'class=["\'].*?\blive_mod\b.*?["\'][^>]+data-assetid=["\'](\d+)', | |
303 | r'data-id=["\'](\d+)'), | |
304 | webpage, 'internal video ID') | |
2b9faf55 | 305 | |
2b9faf55 PH |
306 | return { |
307 | 'id': video_id, | |
39ca3b5c | 308 | 'title': title, |
10db0d2f | 309 | 'formats': self._extract_png_formats(vidplayer_id), |
3e769682 | 310 | 'is_live': True, |
91a6adde | 311 | } |
233b58de JMF |
312 | |
313 | ||
314 | class RTVETelevisionIE(InfoExtractor): | |
315 | IE_NAME = 'rtve.es:television' | |
92519402 | 316 | _VALID_URL = r'https?://(?:www\.)?rtve\.es/television/[^/]+/[^/]+/(?P<id>\d+).shtml' |
233b58de JMF |
317 | |
318 | _TEST = { | |
319 | 'url': 'http://www.rtve.es/television/20160628/revolucion-del-movil/1364141.shtml', | |
320 | 'info_dict': { | |
321 | 'id': '3069778', | |
322 | 'ext': 'mp4', | |
323 | 'title': 'Documentos TV - La revolución del móvil', | |
324 | 'duration': 3496.948, | |
325 | }, | |
326 | 'params': { | |
327 | 'skip_download': True, | |
328 | }, | |
329 | } | |
330 | ||
331 | def _real_extract(self, url): | |
332 | page_id = self._match_id(url) | |
333 | webpage = self._download_webpage(url, page_id) | |
334 | ||
335 | alacarta_url = self._search_regex( | |
336 | r'data-location="alacarta_videos"[^<]+url":"(http://www\.rtve\.es/alacarta.+?)&', | |
337 | webpage, 'alacarta url', default=None) | |
338 | if alacarta_url is None: | |
339 | raise ExtractorError( | |
340 | 'The webpage doesn\'t contain any video', expected=True) | |
341 | ||
342 | return self.url_result(alacarta_url, ie=RTVEALaCartaIE.ie_key()) |