jfr.im git - yt-dlp.git/blame_incremental

... / ...

Commit	Line	Data
	1	import re
	2
	3	from .common import InfoExtractor
	4	from ..compat import compat_HTTPError
	5	from ..utils import (
	6	float_or_none,
	7	parse_iso8601,
	8	str_or_none,
	9	try_get,
	10	unescapeHTML,
	11	url_or_none,
	12	ExtractorError,
	13	)
	14
	15
	16	class RteBaseIE(InfoExtractor):
	17	def _real_extract(self, url):
	18	item_id = self._match_id(url)
	19
	20	info_dict = {}
	21	formats = []
	22
	23	ENDPOINTS = (
	24	'https://feeds.rasset.ie/rteavgen/player/playlist?type=iptv&format=json&showId=',
	25	'http://www.rte.ie/rteavgen/getplaylist/?type=web&format=json&id=',
	26	)
	27
	28	for num, ep_url in enumerate(ENDPOINTS, start=1):
	29	try:
	30	data = self._download_json(ep_url + item_id, item_id)
	31	except ExtractorError as ee:
	32	if num < len(ENDPOINTS) or formats:
	33	continue
	34	if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
	35	error_info = self._parse_json(ee.cause.read().decode(), item_id, fatal=False)
	36	if error_info:
	37	raise ExtractorError(
	38	'%s said: %s' % (self.IE_NAME, error_info['message']),
	39	expected=True)
	40	raise
	41
	42	# NB the string values in the JSON are stored using XML escaping(!)
	43	show = try_get(data, lambda x: x['shows'][0], dict)
	44	if not show:
	45	continue
	46
	47	if not info_dict:
	48	title = unescapeHTML(show['title'])
	49	description = unescapeHTML(show.get('description'))
	50	thumbnail = show.get('thumbnail')
	51	duration = float_or_none(show.get('duration'), 1000)
	52	timestamp = parse_iso8601(show.get('published'))
	53	info_dict = {
	54	'id': item_id,
	55	'title': title,
	56	'description': description,
	57	'thumbnail': thumbnail,
	58	'timestamp': timestamp,
	59	'duration': duration,
	60	}
	61
	62	mg = try_get(show, lambda x: x['media:group'][0], dict)
	63	if not mg:
	64	continue
	65
	66	if mg.get('url'):
	67	m = re.match(r'(?P<url>rtmpe?://[^/]+)/(?P<app>.+)/(?P<playpath>mp4:.*)', mg['url'])
	68	if m:
	69	m = m.groupdict()
	70	formats.append({
	71	'url': m['url'] + '/' + m['app'],
	72	'app': m['app'],
	73	'play_path': m['playpath'],
	74	'player_url': url,
	75	'ext': 'flv',
	76	'format_id': 'rtmp',
	77	})
	78
	79	if mg.get('hls_server') and mg.get('hls_url'):
	80	formats.extend(self._extract_m3u8_formats(
	81	mg['hls_server'] + mg['hls_url'], item_id, 'mp4',
	82	entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
	83
	84	if mg.get('hds_server') and mg.get('hds_url'):
	85	formats.extend(self._extract_f4m_formats(
	86	mg['hds_server'] + mg['hds_url'], item_id,
	87	f4m_id='hds', fatal=False))
	88
	89	mg_rte_server = str_or_none(mg.get('rte:server'))
	90	mg_url = str_or_none(mg.get('url'))
	91	if mg_rte_server and mg_url:
	92	hds_url = url_or_none(mg_rte_server + mg_url)
	93	if hds_url:
	94	formats.extend(self._extract_f4m_formats(
	95	hds_url, item_id, f4m_id='hds', fatal=False))
	96
	97	info_dict['formats'] = formats
	98	return info_dict
	99
	100
	101	class RteIE(RteBaseIE):
	102	IE_NAME = 'rte'
	103	IE_DESC = 'Raidió Teilifís Éireann TV'
	104	_VALID_URL = r'https?://(?:www\.)?rte\.ie/player/[^/]{2,3}/show/[^/]+/(?P<id>[0-9]+)'
	105	_TEST = {
	106	'url': 'http://www.rte.ie/player/ie/show/iwitness-862/10478715/',
	107	'md5': '4a76eb3396d98f697e6e8110563d2604',
	108	'info_dict': {
	109	'id': '10478715',
	110	'ext': 'mp4',
	111	'title': 'iWitness',
	112	'thumbnail': r're:^https?://.*\.jpg$',
	113	'description': 'The spirit of Ireland, one voice and one minute at a time.',
	114	'duration': 60.046,
	115	'upload_date': '20151012',
	116	'timestamp': 1444694160,
	117	},
	118	}
	119
	120
	121	class RteRadioIE(RteBaseIE):
	122	IE_NAME = 'rte:radio'
	123	IE_DESC = 'Raidió Teilifís Éireann radio'
	124	# Radioplayer URLs have two distinct specifier formats,
	125	# the old format #!rii=<channel_id>:<id>:<playable_item_id>:<date>:
	126	# the new format #!rii=b<channel_id>_<id>_<playable_item_id>_<date>_
	127	# where the IDs are int/empty, the date is DD-MM-YYYY, and the specifier may be truncated.
	128	# An <id> uniquely defines an individual recording, and is the only part we require.
	129	_VALID_URL = r'https?://(?:www\.)?rte\.ie/radio/utils/radioplayer/rteradioweb\.html#!rii=(?:b?[0-9]*)(?:%3A\|:\|%5F\|_)(?P<id>[0-9]+)'
	130
	131	_TESTS = [{
	132	# Old-style player URL; HLS and RTMPE formats
	133	'url': 'http://www.rte.ie/radio/utils/radioplayer/rteradioweb.html#!rii=16:10507902:2414:27-12-2015:',
	134	'md5': 'c79ccb2c195998440065456b69760411',
	135	'info_dict': {
	136	'id': '10507902',
	137	'ext': 'mp4',
	138	'title': 'Gloria',
	139	'thumbnail': r're:^https?://.*\.jpg$',
	140	'description': 'md5:9ce124a7fb41559ec68f06387cabddf0',
	141	'timestamp': 1451203200,
	142	'upload_date': '20151227',
	143	'duration': 7230.0,
	144	},
	145	}, {
	146	# New-style player URL; RTMPE formats only
	147	'url': 'http://rte.ie/radio/utils/radioplayer/rteradioweb.html#!rii=b16_3250678_8861_06-04-2012_',
	148	'info_dict': {
	149	'id': '3250678',
	150	'ext': 'flv',
	151	'title': 'The Lyric Concert with Paul Herriott',
	152	'thumbnail': r're:^https?://.*\.jpg$',
	153	'description': '',
	154	'timestamp': 1333742400,
	155	'upload_date': '20120406',
	156	'duration': 7199.016,
	157	},
	158	'params': {
	159	# rtmp download
	160	'skip_download': True,
	161	},
	162	}]