jfr.im git - yt-dlp.git/blame_incremental - yt

... / ...

Commit	Line	Data
	1	import re
	2
	3	from .common import InfoExtractor
	4	from ..compat import compat_HTTPError
	5	from ..utils import (
	6	determine_ext,
	7	float_or_none,
	8	int_or_none,
	9	smuggle_url,
	10	try_get,
	11	unsmuggle_url,
	12	ExtractorError,
	13	)
	14
	15
	16	class LimelightBaseIE(InfoExtractor):
	17	_PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s'
	18
	19	@classmethod
	20	def _extract_urls(cls, webpage, source_url):
	21	lm = {
	22	'Media': 'media',
	23	'Channel': 'channel',
	24	'ChannelList': 'channel_list',
	25	}
	26
	27	def smuggle(url):
	28	return smuggle_url(url, {'source_url': source_url})
	29
	30	entries = []
	31	for kind, video_id in re.findall(
	32	r'LimelightPlayer\.doLoad(Media\|Channel\|ChannelList)\(["\'](?P<id>[a-z0-9]{32})',
	33	webpage):
	34	entries.append(cls.url_result(
	35	smuggle('limelight:%s:%s' % (lm[kind], video_id)),
	36	'Limelight%s' % kind, video_id))
	37	for mobj in re.finditer(
	38	# As per [1] class attribute should be exactly equal to
	39	# LimelightEmbeddedPlayerFlash but numerous examples seen
	40	# that don't exactly match it (e.g. [2]).
	41	# 1. http://support.3playmedia.com/hc/en-us/articles/227732408-Limelight-Embedding-the-Captions-Plugin-with-the-Limelight-Player-on-Your-Webpage
	42	# 2. http://www.sedona.com/FacilitatorTraining2017
	43	r'''(?sx)
	44	<object[^>]+class=(["\'])(?:(?!\1).)\bLimelightEmbeddedPlayerFlash\b(?:(?!\1).)\1[^>]>.?
	45	<param[^>]+
	46	name=(["\'])flashVars\2[^>]+
	47	value=(["\'])(?:(?!\3).)*(?P<kind>media\|channel(?:List)?)Id=(?P<id>[a-z0-9]{32})
	48	''', webpage):
	49	kind, video_id = mobj.group('kind'), mobj.group('id')
	50	entries.append(cls.url_result(
	51	smuggle('limelight:%s:%s' % (kind, video_id)),
	52	'Limelight%s' % kind.capitalize(), video_id))
	53	# http://support.3playmedia.com/hc/en-us/articles/115009517327-Limelight-Embedding-the-Audio-Description-Plugin-with-the-Limelight-Player-on-Your-Web-Page)
	54	for video_id in re.findall(
	55	r'(?s)LimelightPlayerUtil\.embed\s\(\s{.?\bmediaId["\']\s:\s*["\'](?P<id>[a-z0-9]{32})',
	56	webpage):
	57	entries.append(cls.url_result(
	58	smuggle('limelight:media:%s' % video_id),
	59	LimelightMediaIE.ie_key(), video_id))
	60	return entries
	61
	62	def _call_playlist_service(self, item_id, method, fatal=True, referer=None):
	63	headers = {}
	64	if referer:
	65	headers['Referer'] = referer
	66	try:
	67	return self._download_json(
	68	self._PLAYLIST_SERVICE_URL % (self._PLAYLIST_SERVICE_PATH, item_id, method),
	69	item_id, 'Downloading PlaylistService %s JSON' % method,
	70	fatal=fatal, headers=headers)
	71	except ExtractorError as e:
	72	if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
	73	error = self._parse_json(e.cause.read().decode(), item_id)['detail']['contentAccessPermission']
	74	if error == 'CountryDisabled':
	75	self.raise_geo_restricted()
	76	raise ExtractorError(error, expected=True)
	77	raise
	78
	79	def _extract(self, item_id, pc_method, mobile_method, referer=None):
	80	pc = self._call_playlist_service(item_id, pc_method, referer=referer)
	81	mobile = self._call_playlist_service(
	82	item_id, mobile_method, fatal=False, referer=referer)
	83	return pc, mobile
	84
	85	def _extract_info(self, pc, mobile, i, referer):
	86	get_item = lambda x, y: try_get(x, lambda x: x[y][i], dict) or {}
	87	pc_item = get_item(pc, 'playlistItems')
	88	mobile_item = get_item(mobile, 'mediaList')
	89	video_id = pc_item.get('mediaId') or mobile_item['mediaId']
	90	title = pc_item.get('title') or mobile_item['title']
	91
	92	formats = []
	93	urls = []
	94	for stream in pc_item.get('streams', []):
	95	stream_url = stream.get('url')
	96	if not stream_url or stream_url in urls:
	97	continue
	98	if not self.get_param('allow_unplayable_formats') and stream.get('drmProtected'):
	99	continue
	100	urls.append(stream_url)
	101	ext = determine_ext(stream_url)
	102	if ext == 'f4m':
	103	formats.extend(self._extract_f4m_formats(
	104	stream_url, video_id, f4m_id='hds', fatal=False))
	105	else:
	106	fmt = {
	107	'url': stream_url,
	108	'abr': float_or_none(stream.get('audioBitRate')),
	109	'fps': float_or_none(stream.get('videoFrameRate')),
	110	'ext': ext,
	111	}
	112	width = int_or_none(stream.get('videoWidthInPixels'))
	113	height = int_or_none(stream.get('videoHeightInPixels'))
	114	vbr = float_or_none(stream.get('videoBitRate'))
	115	if width or height or vbr:
	116	fmt.update({
	117	'width': width,
	118	'height': height,
	119	'vbr': vbr,
	120	})
	121	else:
	122	fmt['vcodec'] = 'none'
	123	rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp[34]:.+)$', stream_url)
	124	if rtmp:
	125	format_id = 'rtmp'
	126	if stream.get('videoBitRate'):
	127	format_id += '-%d' % int_or_none(stream['videoBitRate'])
	128	http_format_id = format_id.replace('rtmp', 'http')
	129
	130	CDN_HOSTS = (
	131	('delvenetworks.com', 'cpl.delvenetworks.com'),
	132	('video.llnw.net', 's2.content.video.llnw.net'),
	133	)
	134	for cdn_host, http_host in CDN_HOSTS:
	135	if cdn_host not in rtmp.group('host').lower():
	136	continue
	137	http_url = 'http://%s/%s' % (http_host, rtmp.group('playpath')[4:])
	138	urls.append(http_url)
	139	if self._is_valid_url(http_url, video_id, http_format_id):
	140	http_fmt = fmt.copy()
	141	http_fmt.update({
	142	'url': http_url,
	143	'format_id': http_format_id,
	144	})
	145	formats.append(http_fmt)
	146	break
	147
	148	fmt.update({
	149	'url': rtmp.group('url'),
	150	'play_path': rtmp.group('playpath'),
	151	'app': rtmp.group('app'),
	152	'ext': 'flv',
	153	'format_id': format_id,
	154	})
	155	formats.append(fmt)
	156
	157	for mobile_url in mobile_item.get('mobileUrls', []):
	158	media_url = mobile_url.get('mobileUrl')
	159	format_id = mobile_url.get('targetMediaPlatform')
	160	if not media_url or media_url in urls:
	161	continue
	162	if (format_id in ('Widevine', 'SmoothStreaming')
	163	and not self.get_param('allow_unplayable_formats', False)):
	164	continue
	165	urls.append(media_url)
	166	ext = determine_ext(media_url)
	167	if ext == 'm3u8':
	168	formats.extend(self._extract_m3u8_formats(
	169	media_url, video_id, 'mp4', 'm3u8_native',
	170	m3u8_id=format_id, fatal=False))
	171	elif ext == 'f4m':
	172	formats.extend(self._extract_f4m_formats(
	173	stream_url, video_id, f4m_id=format_id, fatal=False))
	174	else:
	175	formats.append({
	176	'url': media_url,
	177	'format_id': format_id,
	178	'quality': -10,
	179	'ext': ext,
	180	})
	181
	182	self._sort_formats(formats)
	183
	184	subtitles = {}
	185	for flag in mobile_item.get('flags'):
	186	if flag == 'ClosedCaptions':
	187	closed_captions = self._call_playlist_service(
	188	video_id, 'getClosedCaptionsDetailsByMediaId',
	189	False, referer) or []
	190	for cc in closed_captions:
	191	cc_url = cc.get('webvttFileUrl')
	192	if not cc_url:
	193	continue
	194	lang = cc.get('languageCode') or self._search_regex(r'/([a-z]{2})\.vtt', cc_url, 'lang', default='en')
	195	subtitles.setdefault(lang, []).append({
	196	'url': cc_url,
	197	})
	198	break
	199
	200	get_meta = lambda x: pc_item.get(x) or mobile_item.get(x)
	201
	202	return {
	203	'id': video_id,
	204	'title': title,
	205	'description': get_meta('description'),
	206	'formats': formats,
	207	'duration': float_or_none(get_meta('durationInMilliseconds'), 1000),
	208	'thumbnail': get_meta('previewImageUrl') or get_meta('thumbnailImageUrl'),
	209	'subtitles': subtitles,
	210	}
	211
	212
	213	class LimelightMediaIE(LimelightBaseIE):
	214	IE_NAME = 'limelight'
	215	_VALID_URL = r'''(?x)
	216	(?:
	217	limelight:media:\|
	218	https?://
	219	(?:
	220	link\.videoplatform\.limelight\.com/media/\|
	221	assets\.delvenetworks\.com/player/loader\.swf
	222	)
	223	\?.*?\bmediaId=
	224	)
	225	(?P<id>[a-z0-9]{32})
	226	'''
	227	_TESTS = [{
	228	'url': 'http://link.videoplatform.limelight.com/media/?mediaId=3ffd040b522b4485b6d84effc750cd86',
	229	'info_dict': {
	230	'id': '3ffd040b522b4485b6d84effc750cd86',
	231	'ext': 'mp4',
	232	'title': 'HaP and the HB Prince Trailer',
	233	'description': 'md5:8005b944181778e313d95c1237ddb640',
	234	'thumbnail': r're:^https?://.*\.jpeg$',
	235	'duration': 144.23,
	236	},
	237	'params': {
	238	# m3u8 download
	239	'skip_download': True,
	240	},
	241	}, {
	242	# video with subtitles
	243	'url': 'limelight:media:a3e00274d4564ec4a9b29b9466432335',
	244	'md5': '2fa3bad9ac321e23860ca23bc2c69e3d',
	245	'info_dict': {
	246	'id': 'a3e00274d4564ec4a9b29b9466432335',
	247	'ext': 'mp4',
	248	'title': '3Play Media Overview Video',
	249	'thumbnail': r're:^https?://.*\.jpeg$',
	250	'duration': 78.101,
	251	# TODO: extract all languages that were accessible via API
	252	# 'subtitles': 'mincount:9',
	253	'subtitles': 'mincount:1',
	254	},
	255	}, {
	256	'url': 'https://assets.delvenetworks.com/player/loader.swf?mediaId=8018a574f08d416e95ceaccae4ba0452',
	257	'only_matching': True,
	258	}]
	259	_PLAYLIST_SERVICE_PATH = 'media'
	260
	261	def _real_extract(self, url):
	262	url, smuggled_data = unsmuggle_url(url, {})
	263	video_id = self._match_id(url)
	264	source_url = smuggled_data.get('source_url')
	265	self._initialize_geo_bypass({
	266	'countries': smuggled_data.get('geo_countries'),
	267	})
	268
	269	pc, mobile = self._extract(
	270	video_id, 'getPlaylistByMediaId',
	271	'getMobilePlaylistByMediaId', source_url)
	272
	273	return self._extract_info(pc, mobile, 0, source_url)
	274
	275
	276	class LimelightChannelIE(LimelightBaseIE):
	277	IE_NAME = 'limelight:channel'
	278	_VALID_URL = r'''(?x)
	279	(?:
	280	limelight:channel:\|
	281	https?://
	282	(?:
	283	link\.videoplatform\.limelight\.com/media/\|
	284	assets\.delvenetworks\.com/player/loader\.swf
	285	)
	286	\?.*?\bchannelId=
	287	)
	288	(?P<id>[a-z0-9]{32})
	289	'''
	290	_TESTS = [{
	291	'url': 'http://link.videoplatform.limelight.com/media/?channelId=ab6a524c379342f9b23642917020c082',
	292	'info_dict': {
	293	'id': 'ab6a524c379342f9b23642917020c082',
	294	'title': 'Javascript Sample Code',
	295	'description': 'Javascript Sample Code - http://www.delvenetworks.com/sample-code/playerCode-demo.html',
	296	},
	297	'playlist_mincount': 3,
	298	}, {
	299	'url': 'http://assets.delvenetworks.com/player/loader.swf?channelId=ab6a524c379342f9b23642917020c082',
	300	'only_matching': True,
	301	}]
	302	_PLAYLIST_SERVICE_PATH = 'channel'
	303
	304	def _real_extract(self, url):
	305	url, smuggled_data = unsmuggle_url(url, {})
	306	channel_id = self._match_id(url)
	307	source_url = smuggled_data.get('source_url')
	308
	309	pc, mobile = self._extract(
	310	channel_id, 'getPlaylistByChannelId',
	311	'getMobilePlaylistWithNItemsByChannelId?begin=0&count=-1',
	312	source_url)
	313
	314	entries = [
	315	self._extract_info(pc, mobile, i, source_url)
	316	for i in range(len(pc['playlistItems']))]
	317
	318	return self.playlist_result(
	319	entries, channel_id, pc.get('title'), mobile.get('description'))
	320
	321
	322	class LimelightChannelListIE(LimelightBaseIE):
	323	IE_NAME = 'limelight:channel_list'
	324	_VALID_URL = r'''(?x)
	325	(?:
	326	limelight:channel_list:\|
	327	https?://
	328	(?:
	329	link\.videoplatform\.limelight\.com/media/\|
	330	assets\.delvenetworks\.com/player/loader\.swf
	331	)
	332	\?.*?\bchannelListId=
	333	)
	334	(?P<id>[a-z0-9]{32})
	335	'''
	336	_TESTS = [{
	337	'url': 'http://link.videoplatform.limelight.com/media/?channelListId=301b117890c4465c8179ede21fd92e2b',
	338	'info_dict': {
	339	'id': '301b117890c4465c8179ede21fd92e2b',
	340	'title': 'Website - Hero Player',
	341	},
	342	'playlist_mincount': 2,
	343	}, {
	344	'url': 'https://assets.delvenetworks.com/player/loader.swf?channelListId=301b117890c4465c8179ede21fd92e2b',
	345	'only_matching': True,
	346	}]
	347	_PLAYLIST_SERVICE_PATH = 'channel_list'
	348
	349	def _real_extract(self, url):
	350	channel_list_id = self._match_id(url)
	351
	352	channel_list = self._call_playlist_service(
	353	channel_list_id, 'getMobileChannelListById')
	354
	355	entries = [
	356	self.url_result('limelight:channel:%s' % channel['id'], 'LimelightChannel')
	357	for channel in channel_list['channelList']]
	358
	359	return self.playlist_result(
	360	entries, channel_list_id, channel_list['title'])