jfr.im git - yt-dlp.git/blame_incremental

... / ...

Commit	Line	Data
	1	# coding: utf-8
	2	from __future__ import unicode_literals
	3
	4	import base64
	5	import io
	6	import sys
	7
	8	from .common import InfoExtractor
	9	from ..compat import (
	10	compat_b64decode,
	11	compat_struct_unpack,
	12	)
	13	from ..utils import (
	14	determine_ext,
	15	ExtractorError,
	16	float_or_none,
	17	qualities,
	18	remove_end,
	19	remove_start,
	20	try_get,
	21	)
	22
	23	_bytes_to_chr = (lambda x: x) if sys.version_info[0] == 2 else (lambda x: map(chr, x))
	24
	25
	26	class RTVEALaCartaIE(InfoExtractor):
	27	IE_NAME = 'rtve.es:alacarta'
	28	IE_DESC = 'RTVE a la carta'
	29	_VALID_URL = r'https?://(?:www\.)?rtve\.es/(m/)?(alacarta/videos\|filmoteca)/[^/]+/[^/]+/(?P<id>\d+)'
	30
	31	_TESTS = [{
	32	'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/',
	33	'md5': '1d49b7e1ca7a7502c56a4bf1b60f1b43',
	34	'info_dict': {
	35	'id': '2491869',
	36	'ext': 'mp4',
	37	'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia',
	38	'duration': 5024.566,
	39	'series': 'Balonmano',
	40	},
	41	'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
	42	}, {
	43	'note': 'Live stream',
	44	'url': 'http://www.rtve.es/alacarta/videos/television/24h-live/1694255/',
	45	'info_dict': {
	46	'id': '1694255',
	47	'ext': 'mp4',
	48	'title': 're:^24H LIVE [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
	49	'is_live': True,
	50	},
	51	'params': {
	52	'skip_download': 'live stream',
	53	},
	54	}, {
	55	'url': 'http://www.rtve.es/alacarta/videos/servir-y-proteger/servir-proteger-capitulo-104/4236788/',
	56	'md5': 'd850f3c8731ea53952ebab489cf81cbf',
	57	'info_dict': {
	58	'id': '4236788',
	59	'ext': 'mp4',
	60	'title': 'Servir y proteger - Capítulo 104',
	61	'duration': 3222.0,
	62	},
	63	'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
	64	}, {
	65	'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve',
	66	'only_matching': True,
	67	}, {
	68	'url': 'http://www.rtve.es/filmoteca/no-do/not-1-introduccion-primer-noticiario-espanol/1465256/',
	69	'only_matching': True,
	70	}]
	71
	72	def _real_initialize(self):
	73	user_agent_b64 = base64.b64encode(self.get_param('http_headers')['User-Agent'].encode('utf-8')).decode('utf-8')
	74	self._manager = self._download_json(
	75	'http://www.rtve.es/odin/loki/' + user_agent_b64,
	76	None, 'Fetching manager info')['manager']
	77
	78	@staticmethod
	79	def _decrypt_url(png):
	80	encrypted_data = io.BytesIO(compat_b64decode(png)[8:])
	81	while True:
	82	length = compat_struct_unpack('!I', encrypted_data.read(4))[0]
	83	chunk_type = encrypted_data.read(4)
	84	if chunk_type == b'IEND':
	85	break
	86	data = encrypted_data.read(length)
	87	if chunk_type == b'tEXt':
	88	alphabet_data, text = data.split(b'\0')
	89	quality, url_data = text.split(b'%%')
	90	alphabet = []
	91	e = 0
	92	d = 0
	93	for l in _bytes_to_chr(alphabet_data):
	94	if d == 0:
	95	alphabet.append(l)
	96	d = e = (e + 1) % 4
	97	else:
	98	d -= 1
	99	url = ''
	100	f = 0
	101	e = 3
	102	b = 1
	103	for letter in _bytes_to_chr(url_data):
	104	if f == 0:
	105	l = int(letter) * 10
	106	f = 1
	107	else:
	108	if e == 0:
	109	l += int(letter)
	110	url += alphabet[l]
	111	e = (b + 3) % 4
	112	f = 0
	113	b += 1
	114	else:
	115	e -= 1
	116
	117	yield quality.decode(), url
	118	encrypted_data.read(4) # CRC
	119
	120	def _extract_png_formats(self, video_id):
	121	png = self._download_webpage(
	122	'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id),
	123	video_id, 'Downloading url information', query={'q': 'v2'})
	124	q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL'])
	125	formats = []
	126	for quality, video_url in self._decrypt_url(png):
	127	ext = determine_ext(video_url)
	128	if ext == 'm3u8':
	129	formats.extend(self._extract_m3u8_formats(
	130	video_url, video_id, 'mp4', 'm3u8_native',
	131	m3u8_id='hls', fatal=False))
	132	elif ext == 'mpd':
	133	formats.extend(self._extract_mpd_formats(
	134	video_url, video_id, 'dash', fatal=False))
	135	else:
	136	formats.append({
	137	'format_id': quality,
	138	'quality': q(quality),
	139	'url': video_url,
	140	})
	141	self._sort_formats(formats)
	142	return formats
	143
	144	def _real_extract(self, url):
	145	video_id = self._match_id(url)
	146	info = self._download_json(
	147	'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id,
	148	video_id)['page']['items'][0]
	149	if info['state'] == 'DESPU':
	150	raise ExtractorError('The video is no longer available', expected=True)
	151	title = info['title'].strip()
	152	formats = self._extract_png_formats(video_id)
	153
	154	subtitles = None
	155	sbt_file = info.get('sbtFile')
	156	if sbt_file:
	157	subtitles = self.extract_subtitles(video_id, sbt_file)
	158
	159	is_live = info.get('live') is True
	160
	161	return {
	162	'id': video_id,
	163	'title': title,
	164	'formats': formats,
	165	'thumbnail': info.get('image'),
	166	'subtitles': subtitles,
	167	'duration': float_or_none(info.get('duration'), 1000),
	168	'is_live': is_live,
	169	'series': info.get('programTitle'),
	170	}
	171
	172	def _get_subtitles(self, video_id, sub_file):
	173	subs = self._download_json(
	174	sub_file + '.json', video_id,
	175	'Downloading subtitles info')['page']['items']
	176	return dict(
	177	(s['lang'], [{'ext': 'vtt', 'url': s['src']}])
	178	for s in subs)
	179
	180
	181	class RTVEAudioIE(RTVEALaCartaIE):
	182	IE_NAME = 'rtve.es:audio'
	183	IE_DESC = 'RTVE audio'
	184	_VALID_URL = r'https?://(?:www\.)?rtve\.es/(alacarta\|play)/audios/[^/]+/[^/]+/(?P<id>[0-9]+)'
	185
	186	_TESTS = [{
	187	'url': 'https://www.rtve.es/alacarta/audios/a-hombros-de-gigantes/palabra-ingeniero-codigos-informaticos-27-04-21/5889192/',
	188	'md5': 'ae06d27bff945c4e87a50f89f6ce48ce',
	189	'info_dict': {
	190	'id': '5889192',
	191	'ext': 'mp3',
	192	'title': 'Códigos informáticos',
	193	'thumbnail': r're:https?://.+/1598856591583.jpg',
	194	'duration': 349.440,
	195	'series': 'A hombros de gigantes',
	196	},
	197	}, {
	198	'url': 'https://www.rtve.es/play/audios/en-radio-3/ignatius-farray/5791165/',
	199	'md5': '072855ab89a9450e0ba314c717fa5ebc',
	200	'info_dict': {
	201	'id': '5791165',
	202	'ext': 'mp3',
	203	'title': 'Ignatius Farray',
	204	'thumbnail': r're:https?://.+/1613243011863.jpg',
	205	'duration': 3559.559,
	206	'series': 'En Radio 3'
	207	},
	208	}, {
	209	'url': 'https://www.rtve.es/play/audios/frankenstein-o-el-moderno-prometeo/capitulo-26-ultimo-muerte-victor-juan-jose-plans-mary-shelley/6082623/',
	210	'md5': '0eadab248cc8dd193fa5765712e84d5c',
	211	'info_dict': {
	212	'id': '6082623',
	213	'ext': 'mp3',
	214	'title': 'Capítulo 26 y último: La muerte de Victor',
	215	'thumbnail': r're:https?://.+/1632147445707.jpg',
	216	'duration': 3174.086,
	217	'series': 'Frankenstein o el moderno Prometeo'
	218	},
	219	}]
	220
	221	def _extract_png_formats(self, audio_id):
	222	"""
	223	This function retrieves media related png thumbnail which obfuscate
	224	valuable information about the media. This information is decrypted
	225	via base class _decrypt_url function providing media quality and
	226	media url
	227	"""
	228	png = self._download_webpage(
	229	'http://www.rtve.es/ztnr/movil/thumbnail/%s/audios/%s.png' %
	230	(self._manager, audio_id),
	231	audio_id, 'Downloading url information', query={'q': 'v2'})
	232	q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL'])
	233	formats = []
	234	for quality, audio_url in self._decrypt_url(png):
	235	ext = determine_ext(audio_url)
	236	if ext == 'm3u8':
	237	formats.extend(self._extract_m3u8_formats(
	238	audio_url, audio_id, 'mp4', 'm3u8_native',
	239	m3u8_id='hls', fatal=False))
	240	elif ext == 'mpd':
	241	formats.extend(self._extract_mpd_formats(
	242	audio_url, audio_id, 'dash', fatal=False))
	243	else:
	244	formats.append({
	245	'format_id': quality,
	246	'quality': q(quality),
	247	'url': audio_url,
	248	})
	249	self._sort_formats(formats)
	250	return formats
	251
	252	def _real_extract(self, url):
	253	audio_id = self._match_id(url)
	254	info = self._download_json(
	255	'https://www.rtve.es/api/audios/%s.json' % audio_id,
	256	audio_id)['page']['items'][0]
	257
	258	return {
	259	'id': audio_id,
	260	'title': info['title'].strip(),
	261	'thumbnail': info.get('thumbnail'),
	262	'duration': float_or_none(info.get('duration'), 1000),
	263	'series': try_get(info, lambda x: x['programInfo']['title']),
	264	'formats': self._extract_png_formats(audio_id),
	265	}
	266
	267
	268	class RTVEInfantilIE(RTVEALaCartaIE):
	269	IE_NAME = 'rtve.es:infantil'
	270	IE_DESC = 'RTVE infantil'
	271	_VALID_URL = r'https?://(?:www\.)?rtve\.es/infantil/serie/[^/]+/video/[^/]+/(?P<id>[0-9]+)/'
	272
	273	_TESTS = [{
	274	'url': 'http://www.rtve.es/infantil/serie/cleo/video/maneras-vivir/3040283/',
	275	'md5': '5747454717aedf9f9fdf212d1bcfc48d',
	276	'info_dict': {
	277	'id': '3040283',
	278	'ext': 'mp4',
	279	'title': 'Maneras de vivir',
	280	'thumbnail': r're:https?://.+/1426182947956\.JPG',
	281	'duration': 357.958,
	282	},
	283	'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
	284	}]
	285
	286
	287	class RTVELiveIE(RTVEALaCartaIE):
	288	IE_NAME = 'rtve.es:live'
	289	IE_DESC = 'RTVE.es live streams'
	290	_VALID_URL = r'https?://(?:www\.)?rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)'
	291
	292	_TESTS = [{
	293	'url': 'http://www.rtve.es/directo/la-1/',
	294	'info_dict': {
	295	'id': 'la-1',
	296	'ext': 'mp4',
	297	'title': 're:^La 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
	298	},
	299	'params': {
	300	'skip_download': 'live stream',
	301	}
	302	}]
	303
	304	def _real_extract(self, url):
	305	mobj = self._match_valid_url(url)
	306	video_id = mobj.group('id')
	307
	308	webpage = self._download_webpage(url, video_id)
	309	title = remove_end(self._og_search_title(webpage), ' en directo en RTVE.es')
	310	title = remove_start(title, 'Estoy viendo ')
	311
	312	vidplayer_id = self._search_regex(
	313	(r'playerId=player([0-9]+)',
	314	r'class=["\'].?\blive_mod\b.?["\'][^>]+data-assetid=["\'](\d+)',
	315	r'data-id=["\'](\d+)'),
	316	webpage, 'internal video ID')
	317
	318	return {
	319	'id': video_id,
	320	'title': title,
	321	'formats': self._extract_png_formats(vidplayer_id),
	322	'is_live': True,
	323	}
	324
	325
	326	class RTVETelevisionIE(InfoExtractor):
	327	IE_NAME = 'rtve.es:television'
	328	_VALID_URL = r'https?://(?:www\.)?rtve\.es/television/[^/]+/[^/]+/(?P<id>\d+).shtml'
	329
	330	_TEST = {
	331	'url': 'http://www.rtve.es/television/20160628/revolucion-del-movil/1364141.shtml',
	332	'info_dict': {
	333	'id': '3069778',
	334	'ext': 'mp4',
	335	'title': 'Documentos TV - La revolución del móvil',
	336	'duration': 3496.948,
	337	},
	338	'params': {
	339	'skip_download': True,
	340	},
	341	}
	342
	343	def _real_extract(self, url):
	344	page_id = self._match_id(url)
	345	webpage = self._download_webpage(url, page_id)
	346
	347	alacarta_url = self._search_regex(
	348	r'data-location="alacarta_videos"[^<]+url":"(http://www\.rtve\.es/alacarta.+?)&',
	349	webpage, 'alacarta url', default=None)
	350	if alacarta_url is None:
	351	raise ExtractorError(
	352	'The webpage doesn\'t contain any video', expected=True)
	353
	354	return self.url_result(alacarta_url, ie=RTVEALaCartaIE.ie_key())