jfr.im git - yt-dlp.git/blame_incremental

... / ...

Commit	Line	Data
	1	import re
	2
	3	from .common import InfoExtractor
	4	from ..compat import (
	5	compat_str,
	6	compat_urlparse,
	7	)
	8	from ..utils import (
	9	determine_ext,
	10	ExtractorError,
	11	filter_dict,
	12	find_xpath_attr,
	13	fix_xml_ampersands,
	14	GeoRestrictedError,
	15	HEADRequest,
	16	int_or_none,
	17	join_nonempty,
	18	parse_duration,
	19	remove_start,
	20	strip_or_none,
	21	traverse_obj,
	22	try_get,
	23	unified_strdate,
	24	unified_timestamp,
	25	update_url_query,
	26	urljoin,
	27	xpath_text,
	28	)
	29
	30
	31	class RaiBaseIE(InfoExtractor):
	32	_UUID_RE = r'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}'
	33	_GEO_COUNTRIES = ['IT']
	34	_GEO_BYPASS = False
	35
	36	def _extract_relinker_info(self, relinker_url, video_id, audio_only=False):
	37	if not re.match(r'https?://', relinker_url):
	38	return {'formats': [{'url': relinker_url}]}
	39
	40	formats = []
	41	geoprotection = None
	42	is_live = None
	43	duration = None
	44
	45	for platform in ('mon', 'flash', 'native'):
	46	relinker = self._download_xml(
	47	relinker_url, video_id,
	48	note='Downloading XML metadata for platform %s' % platform,
	49	transform_source=fix_xml_ampersands,
	50	query={'output': 45, 'pl': platform},
	51	headers=self.geo_verification_headers())
	52
	53	if not geoprotection:
	54	geoprotection = xpath_text(
	55	relinker, './geoprotection', default=None) == 'Y'
	56
	57	if not is_live:
	58	is_live = xpath_text(
	59	relinker, './is_live', default=None) == 'Y'
	60	if not duration:
	61	duration = parse_duration(xpath_text(
	62	relinker, './duration', default=None))
	63
	64	url_elem = find_xpath_attr(relinker, './url', 'type', 'content')
	65	if url_elem is None:
	66	continue
	67
	68	media_url = url_elem.text
	69
	70	# This does not imply geo restriction (e.g.
	71	# http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html)
	72	if '/video_no_available.mp4' in media_url:
	73	continue
	74
	75	ext = determine_ext(media_url)
	76	if (ext == 'm3u8' and platform != 'mon') or (ext == 'f4m' and platform != 'flash'):
	77	continue
	78
	79	if ext == 'mp3':
	80	formats.append({
	81	'url': media_url,
	82	'vcodec': 'none',
	83	'acodec': 'mp3',
	84	'format_id': 'http-mp3',
	85	})
	86	break
	87	elif ext == 'm3u8' or 'format=m3u8' in media_url or platform == 'mon':
	88	formats.extend(self._extract_m3u8_formats(
	89	media_url, video_id, 'mp4', 'm3u8_native',
	90	m3u8_id='hls', fatal=False))
	91	elif ext == 'f4m' or platform == 'flash':
	92	manifest_url = update_url_query(
	93	media_url.replace('manifest#live_hds.f4m', 'manifest.f4m'),
	94	{'hdcore': '3.7.0', 'plugin': 'aasp-3.7.0.39.44'})
	95	formats.extend(self._extract_f4m_formats(
	96	manifest_url, video_id, f4m_id='hds', fatal=False))
	97	else:
	98	bitrate = int_or_none(xpath_text(relinker, 'bitrate'))
	99	formats.append({
	100	'url': media_url,
	101	'tbr': bitrate if bitrate > 0 else None,
	102	'format_id': 'http-%d' % bitrate if bitrate > 0 else 'http',
	103	})
	104
	105	if not formats and geoprotection is True:
	106	self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
	107
	108	if not audio_only:
	109	formats.extend(self._create_http_urls(relinker_url, formats))
	110
	111	return filter_dict({
	112	'is_live': is_live,
	113	'duration': duration,
	114	'formats': formats,
	115	})
	116
	117	def _create_http_urls(self, relinker_url, fmts):
	118	_RELINKER_REG = r'https?://(?P<host>[^/]+?)/(?:i/)?(?P<extra>[^/]+?)/(?P<path>.+?)/(?P<id>\w+)(?:_(?P<quality>[\d\,]+))?(?:\.mp4\|/playlist\.m3u8).+?'
	119	_MP4_TMPL = '%s&overrideUserAgentRule=mp4-%s'
	120	_QUALITY = {
	121	# tbr: w, h
	122	'250': [352, 198],
	123	'400': [512, 288],
	124	'700': [512, 288],
	125	'800': [700, 394],
	126	'1200': [736, 414],
	127	'1800': [1024, 576],
	128	'2400': [1280, 720],
	129	'3200': [1440, 810],
	130	'3600': [1440, 810],
	131	'5000': [1920, 1080],
	132	'10000': [1920, 1080],
	133	}
	134
	135	def test_url(url):
	136	resp = self._request_webpage(
	137	HEADRequest(url), None, headers={'User-Agent': 'Rai'},
	138	fatal=False, errnote=False, note=False)
	139
	140	if resp is False:
	141	return False
	142
	143	if resp.code == 200:
	144	return False if resp.url == url else resp.url
	145	return None
	146
	147	# filter out audio-only formats
	148	fmts = [f for f in fmts if not f.get('vcodec') == 'none']
	149
	150	def get_format_info(tbr):
	151	import math
	152	br = int_or_none(tbr)
	153	if len(fmts) == 1 and not br:
	154	br = fmts[0].get('tbr')
	155	if br > 300:
	156	tbr = compat_str(math.floor(br / 100) * 100)
	157	else:
	158	tbr = '250'
	159
	160	# try extracting info from available m3u8 formats
	161	format_copy = None
	162	for f in fmts:
	163	if f.get('tbr'):
	164	br_limit = math.floor(br / 100)
	165	if br_limit - 1 <= math.floor(f['tbr'] / 100) <= br_limit + 1:
	166	format_copy = f.copy()
	167	return {
	168	'width': format_copy.get('width'),
	169	'height': format_copy.get('height'),
	170	'tbr': format_copy.get('tbr'),
	171	'vcodec': format_copy.get('vcodec'),
	172	'acodec': format_copy.get('acodec'),
	173	'fps': format_copy.get('fps'),
	174	'format_id': 'https-%s' % tbr,
	175	} if format_copy else {
	176	'width': _QUALITY[tbr][0],
	177	'height': _QUALITY[tbr][1],
	178	'format_id': 'https-%s' % tbr,
	179	'tbr': int(tbr),
	180	}
	181
	182	loc = test_url(_MP4_TMPL % (relinker_url, '*'))
	183	if not isinstance(loc, compat_str):
	184	return []
	185
	186	mobj = re.match(
	187	_RELINKER_REG,
	188	test_url(relinker_url) or '')
	189	if not mobj:
	190	return []
	191
	192	available_qualities = mobj.group('quality').split(',') if mobj.group('quality') else ['*']
	193	available_qualities = [i for i in available_qualities if i]
	194
	195	formats = []
	196	for q in available_qualities:
	197	fmt = {
	198	'url': _MP4_TMPL % (relinker_url, q),
	199	'protocol': 'https',
	200	'ext': 'mp4',
	201	}
	202	fmt.update(get_format_info(q))
	203	formats.append(fmt)
	204	return formats
	205
	206	@staticmethod
	207	def _extract_subtitles(url, video_data):
	208	STL_EXT = 'stl'
	209	SRT_EXT = 'srt'
	210	subtitles = {}
	211	subtitles_array = video_data.get('subtitlesArray') or []
	212	for k in ('subtitles', 'subtitlesUrl'):
	213	subtitles_array.append({'url': video_data.get(k)})
	214	for subtitle in subtitles_array:
	215	sub_url = subtitle.get('url')
	216	if sub_url and isinstance(sub_url, compat_str):
	217	sub_lang = subtitle.get('language') or 'it'
	218	sub_url = urljoin(url, sub_url)
	219	sub_ext = determine_ext(sub_url, SRT_EXT)
	220	subtitles.setdefault(sub_lang, []).append({
	221	'ext': sub_ext,
	222	'url': sub_url,
	223	})
	224	if STL_EXT == sub_ext:
	225	subtitles[sub_lang].append({
	226	'ext': SRT_EXT,
	227	'url': sub_url[:-len(STL_EXT)] + SRT_EXT,
	228	})
	229	return subtitles
	230
	231
	232	class RaiPlayIE(RaiBaseIE):
	233	_VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>%s))\.(?:html\|json)' % RaiBaseIE._UUID_RE
	234	_TESTS = [{
	235	'url': 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html',
	236	'md5': '8970abf8caf8aef4696e7b1f2adfc696',
	237	'info_dict': {
	238	'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391',
	239	'ext': 'mp4',
	240	'title': 'Report del 07/04/2014',
	241	'alt_title': 'St 2013/14 - Report - Espresso nel caffè - 07/04/2014',
	242	'description': 'md5:d730c168a58f4bb35600fc2f881ec04e',
	243	'thumbnail': r're:^https?://.*\.jpg$',
	244	'uploader': 'Rai Gulp',
	245	'duration': 6160,
	246	'series': 'Report',
	247	'season': '2013/14',
	248	'subtitles': {
	249	'it': 'count:4',
	250	},
	251	},
	252	'params': {
	253	'skip_download': True,
	254	},
	255	}, {
	256	# 1080p direct mp4 url
	257	'url': 'https://www.raiplay.it/video/2021/11/Blanca-S1E1-Senza-occhi-b1255a4a-8e72-4a2f-b9f3-fc1308e00736.html',
	258	'md5': 'aeda7243115380b2dd5e881fd42d949a',
	259	'info_dict': {
	260	'id': 'b1255a4a-8e72-4a2f-b9f3-fc1308e00736',
	261	'ext': 'mp4',
	262	'title': 'Blanca - S1E1 - Senza occhi',
	263	'alt_title': 'St 1 Ep 1 - Blanca - Senza occhi',
	264	'description': 'md5:75f95d5c030ec8bac263b1212322e28c',
	265	'thumbnail': r're:^https?://.*\.jpg$',
	266	'uploader': 'Rai 1',
	267	'duration': 6493,
	268	'series': 'Blanca',
	269	'season': 'Season 1',
	270	},
	271	}, {
	272	'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?',
	273	'only_matching': True,
	274	}, {
	275	# subtitles at 'subtitlesArray' key (see #27698)
	276	'url': 'https://www.raiplay.it/video/2020/12/Report---04-01-2021-2e90f1de-8eee-4de4-ac0e-78d21db5b600.html',
	277	'only_matching': True,
	278	}, {
	279	# DRM protected
	280	'url': 'https://www.raiplay.it/video/2020/09/Lo-straordinario-mondo-di-Zoey-S1E1-Lo-straordinario-potere-di-Zoey-ed493918-1d32-44b7-8454-862e473d00ff.html',
	281	'only_matching': True,
	282	}]
	283
	284	def _real_extract(self, url):
	285	base, video_id = self._match_valid_url(url).groups()
	286
	287	media = self._download_json(
	288	base + '.json', video_id, 'Downloading video JSON')
	289
	290	if not self.get_param('allow_unplayable_formats'):
	291	if try_get(
	292	media,
	293	(lambda x: x['rights_management']['rights']['drm'],
	294	lambda x: x['program_info']['rights_management']['rights']['drm']),
	295	dict):
	296	self.report_drm(video_id)
	297
	298	title = media['name']
	299	video = media['video']
	300
	301	relinker_info = self._extract_relinker_info(video['content_url'], video_id)
	302	self._sort_formats(relinker_info['formats'])
	303
	304	thumbnails = []
	305	for _, value in media.get('images', {}).items():
	306	if value:
	307	thumbnails.append({
	308	'url': urljoin(url, value),
	309	})
	310
	311	date_published = media.get('date_published')
	312	time_published = media.get('time_published')
	313	if date_published and time_published:
	314	date_published += ' ' + time_published
	315
	316	subtitles = self._extract_subtitles(url, video)
	317
	318	program_info = media.get('program_info') or {}
	319	season = media.get('season')
	320
	321	alt_title = join_nonempty(media.get('subtitle'), media.get('toptitle'), delim=' - ')
	322
	323	info = {
	324	'id': remove_start(media.get('id'), 'ContentItem-') or video_id,
	325	'display_id': video_id,
	326	'title': title,
	327	'alt_title': strip_or_none(alt_title),
	328	'description': media.get('description'),
	329	'uploader': strip_or_none(media.get('channel')),
	330	'creator': strip_or_none(media.get('editor') or None),
	331	'duration': parse_duration(video.get('duration')),
	332	'timestamp': unified_timestamp(date_published),
	333	'thumbnails': thumbnails,
	334	'series': program_info.get('name'),
	335	'season_number': int_or_none(season),
	336	'season': season if (season and not season.isdigit()) else None,
	337	'episode': media.get('episode_title'),
	338	'episode_number': int_or_none(media.get('episode')),
	339	'subtitles': subtitles,
	340	'release_year': traverse_obj(media, ('track_info', 'edit_year')),
	341	}
	342
	343	info.update(relinker_info)
	344	return info
	345
	346
	347	class RaiPlayLiveIE(RaiPlayIE):
	348	_VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/dirette/(?P<id>[^/?#&]+))'
	349	_TESTS = [{
	350	'url': 'http://www.raiplay.it/dirette/rainews24',
	351	'info_dict': {
	352	'id': 'd784ad40-e0ae-4a69-aa76-37519d238a9c',
	353	'display_id': 'rainews24',
	354	'ext': 'mp4',
	355	'title': 're:^Diretta di Rai News 24 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
	356	'description': 'md5:4d00bcf6dc98b27c6ec480de329d1497',
	357	'uploader': 'Rai News 24',
	358	'creator': 'Rai News 24',
	359	'is_live': True,
	360	},
	361	'params': {
	362	'skip_download': True,
	363	},
	364	}]
	365
	366
	367	class RaiPlayPlaylistIE(InfoExtractor):
	368	_VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/programmi/(?P<id>[^/?#&]+))(?:/(?P<extra_id>[^?#&]+))?'
	369	_TESTS = [{
	370	'url': 'https://www.raiplay.it/programmi/nondirloalmiocapo/',
	371	'info_dict': {
	372	'id': 'nondirloalmiocapo',
	373	'title': 'Non dirlo al mio capo',
	374	'description': 'md5:98ab6b98f7f44c2843fd7d6f045f153b',
	375	},
	376	'playlist_mincount': 12,
	377	}, {
	378	'url': 'https://www.raiplay.it/programmi/nondirloalmiocapo/episodi/stagione-2/',
	379	'info_dict': {
	380	'id': 'nondirloalmiocapo',
	381	'title': 'Non dirlo al mio capo - Stagione 2',
	382	'description': 'md5:98ab6b98f7f44c2843fd7d6f045f153b',
	383	},
	384	'playlist_mincount': 12,
	385	}]
	386
	387	def _real_extract(self, url):
	388	base, playlist_id, extra_id = self._match_valid_url(url).groups()
	389
	390	program = self._download_json(
	391	base + '.json', playlist_id, 'Downloading program JSON')
	392
	393	if extra_id:
	394	extra_id = extra_id.upper().rstrip('/')
	395
	396	playlist_title = program.get('name')
	397	entries = []
	398	for b in (program.get('blocks') or []):
	399	for s in (b.get('sets') or []):
	400	if extra_id:
	401	if extra_id != join_nonempty(
	402	b.get('name'), s.get('name'), delim='/').replace(' ', '-').upper():
	403	continue
	404	playlist_title = join_nonempty(playlist_title, s.get('name'), delim=' - ')
	405
	406	s_id = s.get('id')
	407	if not s_id:
	408	continue
	409	medias = self._download_json(
	410	'%s/%s.json' % (base, s_id), s_id,
	411	'Downloading content set JSON', fatal=False)
	412	if not medias:
	413	continue
	414	for m in (medias.get('items') or []):
	415	path_id = m.get('path_id')
	416	if not path_id:
	417	continue
	418	video_url = urljoin(url, path_id)
	419	entries.append(self.url_result(
	420	video_url, ie=RaiPlayIE.ie_key(),
	421	video_id=RaiPlayIE._match_id(video_url)))
	422
	423	return self.playlist_result(
	424	entries, playlist_id, playlist_title,
	425	try_get(program, lambda x: x['program_info']['description']))
	426
	427
	428	class RaiPlaySoundIE(RaiBaseIE):
	429	_VALID_URL = r'(?P<base>https?://(?:www\.)?raiplaysound\.it/.+?-(?P<id>%s))\.(?:html\|json)' % RaiBaseIE._UUID_RE
	430	_TESTS = [{
	431	'url': 'https://www.raiplaysound.it/audio/2021/12/IL-RUGGITO-DEL-CONIGLIO-1ebae2a7-7cdb-42bb-842e-fe0d193e9707.html',
	432	'md5': '8970abf8caf8aef4696e7b1f2adfc696',
	433	'info_dict': {
	434	'id': '1ebae2a7-7cdb-42bb-842e-fe0d193e9707',
	435	'ext': 'mp3',
	436	'title': 'Il Ruggito del Coniglio del 10/12/2021',
	437	'description': 'md5:2a17d2107e59a4a8faa0e18334139ee2',
	438	'thumbnail': r're:^https?://.*\.jpg$',
	439	'uploader': 'rai radio 2',
	440	'duration': 5685,
	441	'series': 'Il Ruggito del Coniglio',
	442	},
	443	'params': {
	444	'skip_download': True,
	445	},
	446	}]
	447
	448	def _real_extract(self, url):
	449	base, audio_id = self._match_valid_url(url).group('base', 'id')
	450	media = self._download_json(f'{base}.json', audio_id, 'Downloading audio JSON')
	451	uid = try_get(media, lambda x: remove_start(remove_start(x['uniquename'], 'ContentItem-'), 'Page-'))
	452
	453	info = {}
	454	formats = []
	455	relinkers = set(traverse_obj(media, (('downloadable_audio', 'audio', ('live', 'cards', 0, 'audio')), 'url')))
	456	for r in relinkers:
	457	info = self._extract_relinker_info(r, audio_id, True)
	458	formats.extend(info.get('formats'))
	459
	460	date_published = try_get(media, (lambda x: f'{x["create_date"]} {x.get("create_time") or ""}',
	461	lambda x: x['live']['create_date']))
	462
	463	podcast_info = traverse_obj(media, 'podcast_info', ('live', 'cards', 0)) or {}
	464	thumbnails = [{
	465	'url': urljoin(url, thumb_url),
	466	} for thumb_url in (podcast_info.get('images') or {}).values() if thumb_url]
	467
	468	return {
	469	**info,
	470	'id': uid or audio_id,
	471	'display_id': audio_id,
	472	'title': traverse_obj(media, 'title', 'episode_title'),
	473	'alt_title': traverse_obj(media, ('track_info', 'media_name')),
	474	'description': media.get('description'),
	475	'uploader': traverse_obj(media, ('track_info', 'channel'), expected_type=strip_or_none),
	476	'creator': traverse_obj(media, ('track_info', 'editor'), expected_type=strip_or_none),
	477	'timestamp': unified_timestamp(date_published),
	478	'thumbnails': thumbnails,
	479	'series': podcast_info.get('title'),
	480	'season_number': int_or_none(media.get('season')),
	481	'episode': media.get('episode_title'),
	482	'episode_number': int_or_none(media.get('episode')),
	483	'formats': formats,
	484	}
	485
	486
	487	class RaiPlaySoundLiveIE(RaiPlaySoundIE):
	488	_VALID_URL = r'(?P<base>https?://(?:www\.)?raiplaysound\.it/(?P<id>[^/?#&]+)$)'
	489	_TESTS = [{
	490	'url': 'https://www.raiplaysound.it/radio2',
	491	'info_dict': {
	492	'id': 'b00a50e6-f404-4af6-8f8c-ff3b9af73a44',
	493	'display_id': 'radio2',
	494	'ext': 'mp4',
	495	'title': 'Rai Radio 2',
	496	'uploader': 'rai radio 2',
	497	'creator': 'raiplaysound',
	498	'is_live': True,
	499	},
	500	'params': {
	501	'skip_download': 'live',
	502	},
	503	}]
	504
	505
	506	class RaiPlaySoundPlaylistIE(InfoExtractor):
	507	_VALID_URL = r'(?P<base>https?://(?:www\.)?raiplaysound\.it/(?:programmi\|playlist\|audiolibri)/(?P<id>[^/?#&]+))(?:/(?P<extra_id>[^?#&]+))?'
	508	_TESTS = [{
	509	'url': 'https://www.raiplaysound.it/programmi/ilruggitodelconiglio',
	510	'info_dict': {
	511	'id': 'ilruggitodelconiglio',
	512	'title': 'Il Ruggito del Coniglio',
	513	'description': 'md5:1bbaf631245a7ab1ec4d9fbb3c7aa8f3',
	514	},
	515	'playlist_mincount': 65,
	516	}, {
	517	'url': 'https://www.raiplaysound.it/programmi/ilruggitodelconiglio/puntate/prima-stagione-1995',
	518	'info_dict': {
	519	'id': 'ilruggitodelconiglio_puntate_prima-stagione-1995',
	520	'title': 'Prima Stagione 1995',
	521	},
	522	'playlist_count': 1,
	523	}]
	524
	525	def _real_extract(self, url):
	526	base, playlist_id, extra_id = self._match_valid_url(url).group('base', 'id', 'extra_id')
	527	url = f'{base}.json'
	528	program = self._download_json(url, playlist_id, 'Downloading program JSON')
	529
	530	if extra_id:
	531	extra_id = extra_id.rstrip('/')
	532	playlist_id += '_' + extra_id.replace('/', '_')
	533	path = next(c['path_id'] for c in program.get('filters') or [] if extra_id in c.get('weblink'))
	534	program = self._download_json(
	535	urljoin('https://www.raiplaysound.it', path), playlist_id, 'Downloading program secondary JSON')
	536
	537	entries = [
	538	self.url_result(urljoin(base, c['path_id']), ie=RaiPlaySoundIE.ie_key())
	539	for c in traverse_obj(program, 'cards', ('block', 'cards')) or []
	540	if c.get('path_id')]
	541
	542	return self.playlist_result(entries, playlist_id, program.get('title'),
	543	traverse_obj(program, ('podcast_info', 'description')))
	544
	545
	546	class RaiIE(RaiBaseIE):
	547	_VALID_URL = r'https?://[^/]+\.(?:rai\.(?:it\|tv)\|rainews\.it)/.+?-(?P<id>%s)(?:-.+?)?\.html' % RaiBaseIE._UUID_RE
	548	_TESTS = [{
	549	# var uniquename = "ContentItem-..."
	550	# data-id="ContentItem-..."
	551	'url': 'http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html',
	552	'info_dict': {
	553	'id': '04a9f4bd-b563-40cf-82a6-aad3529cb4a9',
	554	'ext': 'mp4',
	555	'title': 'TG PRIMO TEMPO',
	556	'thumbnail': r're:^https?://.*\.jpg$',
	557	'duration': 1758,
	558	'upload_date': '20140612',
	559	},
	560	'skip': 'This content is available only in Italy',
	561	}, {
	562	# with ContentItem in many metas
	563	'url': 'http://www.rainews.it/dl/rainews/media/Weekend-al-cinema-da-Hollywood-arriva-il-thriller-di-Tate-Taylor-La-ragazza-del-treno-1632c009-c843-4836-bb65-80c33084a64b.html',
	564	'info_dict': {
	565	'id': '1632c009-c843-4836-bb65-80c33084a64b',
	566	'ext': 'mp4',
	567	'title': 'Weekend al cinema, da Hollywood arriva il thriller di Tate Taylor "La ragazza del treno"',
	568	'description': 'I film in uscita questa settimana.',
	569	'thumbnail': r're:^https?://.*\.png$',
	570	'duration': 833,
	571	'upload_date': '20161103',
	572	}
	573	}, {
	574	# with ContentItem in og:url
	575	'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-efb17665-691c-45d5-a60c-5301333cbb0c.html',
	576	'md5': '06345bd97c932f19ffb129973d07a020',
	577	'info_dict': {
	578	'id': 'efb17665-691c-45d5-a60c-5301333cbb0c',
	579	'ext': 'mp4',
	580	'title': 'TG1 ore 20:00 del 03/11/2016',
	581	'description': 'TG1 edizione integrale ore 20:00 del giorno 03/11/2016',
	582	'thumbnail': r're:^https?://.*\.jpg$',
	583	'duration': 2214,
	584	'upload_date': '20161103',
	585	}
	586	}, {
	587	# initEdizione('ContentItem-...'
	588	'url': 'http://www.tg1.rai.it/dl/tg1/2010/edizioni/ContentSet-9b6e0cba-4bef-4aef-8cf0-9f7f665b7dfb-tg1.html?item=undefined',
	589	'info_dict': {
	590	'id': 'c2187016-8484-4e3a-8ac8-35e475b07303',
	591	'ext': 'mp4',
	592	'title': r're:TG1 ore \d{2}:\d{2} del \d{2}/\d{2}/\d{4}',
	593	'duration': 2274,
	594	'upload_date': '20170401',
	595	},
	596	'skip': 'Changes daily',
	597	}, {
	598	# HLS live stream with ContentItem in og:url
	599	'url': 'http://www.rainews.it/dl/rainews/live/ContentItem-3156f2f2-dc70-4953-8e2f-70d7489d4ce9.html',
	600	'info_dict': {
	601	'id': '3156f2f2-dc70-4953-8e2f-70d7489d4ce9',
	602	'ext': 'mp4',
	603	'title': 'La diretta di Rainews24',
	604	},
	605	'params': {
	606	'skip_download': True,
	607	},
	608	}, {
	609	# Direct MMS URL
	610	'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-b63a4089-ac28-48cf-bca5-9f5b5bc46df5.html',
	611	'only_matching': True,
	612	}, {
	613	'url': 'https://www.rainews.it/tgr/marche/notiziari/video/2019/02/ContentItem-6ba945a2-889c-4a80-bdeb-8489c70a8db9.html',
	614	'only_matching': True,
	615	}]
	616
	617	def _extract_from_content_id(self, content_id, url):
	618	media = self._download_json(
	619	'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-%s.html?json' % content_id,
	620	content_id, 'Downloading video JSON')
	621
	622	title = media['name'].strip()
	623
	624	media_type = media['type']
	625	if 'Audio' in media_type:
	626	relinker_info = {
	627	'formats': [{
	628	'format_id': media.get('formatoAudio'),
	629	'url': media['audioUrl'],
	630	'ext': media.get('formatoAudio'),
	631	}]
	632	}
	633	elif 'Video' in media_type:
	634	relinker_info = self._extract_relinker_info(media['mediaUri'], content_id)
	635	else:
	636	raise ExtractorError('not a media file')
	637
	638	self._sort_formats(relinker_info['formats'])
	639
	640	thumbnails = []
	641	for image_type in ('image', 'image_medium', 'image_300'):
	642	thumbnail_url = media.get(image_type)
	643	if thumbnail_url:
	644	thumbnails.append({
	645	'url': compat_urlparse.urljoin(url, thumbnail_url),
	646	})
	647
	648	subtitles = self._extract_subtitles(url, media)
	649
	650	info = {
	651	'id': content_id,
	652	'title': title,
	653	'description': strip_or_none(media.get('desc')),
	654	'thumbnails': thumbnails,
	655	'uploader': media.get('author'),
	656	'upload_date': unified_strdate(media.get('date')),
	657	'duration': parse_duration(media.get('length')),
	658	'subtitles': subtitles,
	659	}
	660
	661	info.update(relinker_info)
	662
	663	return info
	664
	665	def _real_extract(self, url):
	666	video_id = self._match_id(url)
	667
	668	webpage = self._download_webpage(url, video_id)
	669
	670	content_item_id = None
	671
	672	content_item_url = self._html_search_meta(
	673	('og:url', 'og:video', 'og:video:secure_url', 'twitter:url',
	674	'twitter:player', 'jsonlink'), webpage, default=None)
	675	if content_item_url:
	676	content_item_id = self._search_regex(
	677	r'ContentItem-(%s)' % self._UUID_RE, content_item_url,
	678	'content item id', default=None)
	679
	680	if not content_item_id:
	681	content_item_id = self._search_regex(
	682	r'''(?x)
	683	(?:
	684	(?:initEdizione\|drawMediaRaiTV)\(\|
	685	<(?:[^>]+\bdata-id\|var\s+uniquename)=\|
	686	<iframe[^>]+\bsrc=
	687	)
	688	(["\'])
	689	(?:(?!\1).)*\bContentItem-(?P<id>%s)
	690	''' % self._UUID_RE,
	691	webpage, 'content item id', default=None, group='id')
	692
	693	content_item_ids = set()
	694	if content_item_id:
	695	content_item_ids.add(content_item_id)
	696	if video_id not in content_item_ids:
	697	content_item_ids.add(video_id)
	698
	699	for content_item_id in content_item_ids:
	700	try:
	701	return self._extract_from_content_id(content_item_id, url)
	702	except GeoRestrictedError:
	703	raise
	704	except ExtractorError:
	705	pass
	706
	707	relinker_url = self._proto_relative_url(self._search_regex(
	708	r'''(?x)
	709	(?:
	710	var\s+videoURL\|
	711	mediaInfo\.mediaUri
	712	)\s=\s
	713	([\'"])
	714	(?P<url>
	715	(?:https?:)?
	716	//mediapolis(?:vod)?\.rai\.it/relinker/relinkerServlet\.htm\?
	717	(?:(?!\1).)*\bcont=(?:(?!\1).)+)\1
	718	''',
	719	webpage, 'relinker URL', group='url'))
	720
	721	relinker_info = self._extract_relinker_info(
	722	urljoin(url, relinker_url), video_id)
	723	self._sort_formats(relinker_info['formats'])
	724
	725	title = self._search_regex(
	726	r'var\s+videoTitolo\s=\s([\'"])(?P<title>[^\'"]+)\1',
	727	webpage, 'title', group='title',
	728	default=None) or self._og_search_title(webpage)
	729
	730	info = {
	731	'id': video_id,
	732	'title': title,
	733	}
	734
	735	info.update(relinker_info)
	736
	737	return info