jfr.im git - yt-dlp.git/blame_incremental - yt

... / ...

Commit	Line	Data
	1	import itertools
	2	import re
	3	import urllib.parse
	4
	5	from .common import InfoExtractor
	6	from ..utils import (
	7	determine_ext,
	8	find_xpath_attr,
	9	float_or_none,
	10	int_or_none,
	11	orderedSet,
	12	parse_iso8601,
	13	traverse_obj,
	14	update_url_query,
	15	xpath_attr,
	16	xpath_text,
	17	xpath_with_ns,
	18	)
	19
	20
	21	class LivestreamIE(InfoExtractor):
	22	IE_NAME = 'livestream'
	23	_VALID_URL = r'''(?x)
	24	https?://(?:new\.)?livestream\.com/
	25	(?:accounts/(?P<account_id>\d+)\|(?P<account_name>[^/]+))
	26	(?:/events/(?P<event_id>\d+)\|/(?P<event_name>[^/]+))?
	27	(?:/videos/(?P<id>\d+))?
	28	'''
	29	_EMBED_REGEX = [r'<iframe[^>]+src="(?P<url>https?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"']
	30
	31	_TESTS = [{
	32	'url': 'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
	33	'md5': '7876c5f5dc3e711b6b73acce4aac1527',
	34	'info_dict': {
	35	'id': '4719370',
	36	'ext': 'mp4',
	37	'title': 'Live from Webster Hall NYC',
	38	'timestamp': 1350008072,
	39	'upload_date': '20121012',
	40	'duration': 5968.0,
	41	'like_count': int,
	42	'view_count': int,
	43	'comment_count': int,
	44	'thumbnail': r're:^http://.*\.jpg$',
	45	},
	46	}, {
	47	'url': 'https://livestream.com/coheedandcambria/websterhall',
	48	'info_dict': {
	49	'id': '1585861',
	50	'title': 'Live From Webster Hall',
	51	},
	52	'playlist_mincount': 1,
	53	}, {
	54	'url': 'https://livestream.com/dayananda/events/7954027',
	55	'info_dict': {
	56	'title': 'Live from Mevo',
	57	'id': '7954027',
	58	},
	59	'playlist_mincount': 4,
	60	}, {
	61	'url': 'https://livestream.com/accounts/82',
	62	'info_dict': {
	63	'id': '253978',
	64	'view_count': int,
	65	'title': 'trsr',
	66	'comment_count': int,
	67	'like_count': int,
	68	'upload_date': '20120306',
	69	'timestamp': 1331042383,
	70	'thumbnail': 'http://img.new.livestream.com/videos/0000000000000372/cacbeed6-fb68-4b5e-ad9c-e148124e68a9_640x427.jpg',
	71	'duration': 15.332,
	72	'ext': 'mp4',
	73	},
	74	}, {
	75	'url': 'https://new.livestream.com/accounts/362/events/3557232/videos/67864563/player?autoPlay=false&height=360&mute=false&width=640',
	76	'only_matching': True,
	77	}, {
	78	'url': 'http://livestream.com/bsww/concacafbeachsoccercampeonato2015',
	79	'only_matching': True,
	80	}]
	81	_API_URL_TEMPLATE = 'http://livestream.com/api/accounts/%s/events/%s'
	82
	83	def _parse_smil_formats_and_subtitles(
	84	self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
	85	base_ele = find_xpath_attr(
	86	smil, self._xpath_ns('.//meta', namespace), 'name', 'httpBase')
	87	base = base_ele.get('content') if base_ele is not None else 'http://livestreamvod-f.akamaihd.net/'
	88
	89	formats = []
	90	video_nodes = smil.findall(self._xpath_ns('.//video', namespace))
	91
	92	for vn in video_nodes:
	93	tbr = int_or_none(vn.attrib.get('system-bitrate'), 1000)
	94	furl = (
	95	update_url_query(urllib.parse.urljoin(base, vn.attrib['src']), {
	96	'v': '3.0.3',
	97	'fp': 'WIN% 14,0,0,145',
	98	}))
	99	if 'clipBegin' in vn.attrib:
	100	furl += '&ssek=' + vn.attrib['clipBegin']
	101	formats.append({
	102	'url': furl,
	103	'format_id': 'smil_%d' % tbr,
	104	'ext': 'flv',
	105	'tbr': tbr,
	106	'preference': -1000, # Strictly inferior than all other formats?
	107	})
	108	return formats, {}
	109
	110	def _extract_video_info(self, video_data):
	111	video_id = str(video_data['id'])
	112
	113	FORMAT_KEYS = (
	114	('sd', 'progressive_url'),
	115	('hd', 'progressive_url_hd'),
	116	)
	117
	118	formats = []
	119	for format_id, key in FORMAT_KEYS:
	120	video_url = video_data.get(key)
	121	if video_url:
	122	ext = determine_ext(video_url)
	123	if ext == 'm3u8':
	124	continue
	125	bitrate = int_or_none(self._search_regex(
	126	rf'(\d+)\.{ext}', video_url, 'bitrate', default=None))
	127	formats.append({
	128	'url': video_url,
	129	'format_id': format_id,
	130	'tbr': bitrate,
	131	'ext': ext,
	132	})
	133
	134	smil_url = video_data.get('smil_url')
	135	if smil_url:
	136	formats.extend(self._extract_smil_formats(smil_url, video_id, fatal=False))
	137
	138	m3u8_url = video_data.get('m3u8_url')
	139	if m3u8_url:
	140	formats.extend(self._extract_m3u8_formats(
	141	m3u8_url, video_id, 'mp4', 'm3u8_native',
	142	m3u8_id='hls', fatal=False))
	143
	144	f4m_url = video_data.get('f4m_url')
	145	if f4m_url:
	146	formats.extend(self._extract_f4m_formats(
	147	f4m_url, video_id, f4m_id='hds', fatal=False))
	148
	149	comments = [{
	150	'author_id': comment.get('author_id'),
	151	'author': comment.get('author', {}).get('full_name'),
	152	'id': comment.get('id'),
	153	'text': comment['text'],
	154	'timestamp': parse_iso8601(comment.get('created_at')),
	155	} for comment in video_data.get('comments', {}).get('data', [])]
	156
	157	return {
	158	'id': video_id,
	159	'formats': formats,
	160	'title': video_data['caption'],
	161	'description': video_data.get('description'),
	162	'thumbnail': video_data.get('thumbnail_url'),
	163	'duration': float_or_none(video_data.get('duration'), 1000),
	164	'timestamp': parse_iso8601(video_data.get('publish_at')),
	165	'like_count': video_data.get('likes', {}).get('total'),
	166	'comment_count': video_data.get('comments', {}).get('total'),
	167	'view_count': video_data.get('views'),
	168	'comments': comments,
	169	}
	170
	171	def _extract_stream_info(self, stream_info):
	172	broadcast_id = str(stream_info['broadcast_id'])
	173	is_live = stream_info.get('is_live')
	174
	175	formats = []
	176	smil_url = stream_info.get('play_url')
	177	if smil_url:
	178	formats.extend(self._extract_smil_formats(smil_url, broadcast_id))
	179
	180	m3u8_url = stream_info.get('m3u8_url')
	181	if m3u8_url:
	182	formats.extend(self._extract_m3u8_formats(
	183	m3u8_url, broadcast_id, 'mp4', 'm3u8_native',
	184	m3u8_id='hls', fatal=False))
	185
	186	rtsp_url = stream_info.get('rtsp_url')
	187	if rtsp_url:
	188	formats.append({
	189	'url': rtsp_url,
	190	'format_id': 'rtsp',
	191	})
	192
	193	return {
	194	'id': broadcast_id,
	195	'formats': formats,
	196	'title': stream_info['stream_title'],
	197	'thumbnail': stream_info.get('thumbnail_url'),
	198	'is_live': is_live,
	199	}
	200
	201	def _generate_event_playlist(self, event_data):
	202	event_id = str(event_data['id'])
	203	account_id = str(event_data['owner_account_id'])
	204	feed_root_url = self._API_URL_TEMPLATE % (account_id, event_id) + '/feed.json'
	205
	206	stream_info = event_data.get('stream_info')
	207	if stream_info:
	208	return self._extract_stream_info(stream_info)
	209
	210	last_video = None
	211	for i in itertools.count(1):
	212	if last_video is None:
	213	info_url = feed_root_url
	214	else:
	215	info_url = f'{feed_root_url}?&id={last_video}&newer=-1&type=video'
	216	videos_info = self._download_json(
	217	info_url, event_id, f'Downloading page {i}')['data']
	218	videos_info = [v['data'] for v in videos_info if v['type'] == 'video']
	219	if not videos_info:
	220	break
	221	for v in videos_info:
	222	v_id = str(v['id'])
	223	yield self.url_result(
	224	f'http://livestream.com/accounts/{account_id}/events/{event_id}/videos/{v_id}',
	225	LivestreamIE, v_id, v.get('caption'))
	226	last_video = videos_info[-1]['id']
	227
	228	def _real_extract(self, url):
	229	mobj = self._match_valid_url(url)
	230	video_id = mobj.group('id')
	231	event = mobj.group('event_id') or mobj.group('event_name')
	232	account = mobj.group('account_id') or mobj.group('account_name')
	233	api_url = f'http://livestream.com/api/accounts/{account}'
	234
	235	if video_id:
	236	video_data = self._download_json(
	237	f'{api_url}/events/{event}/videos/{video_id}', video_id)
	238	return self._extract_video_info(video_data)
	239	elif event:
	240	event_data = self._download_json(f'{api_url}/events/{event}', None)
	241	return self.playlist_result(
	242	self._generate_event_playlist(event_data), str(event_data['id']), event_data['full_name'])
	243
	244	account_data = self._download_json(api_url, None)
	245	items = traverse_obj(account_data, (('upcoming_events', 'past_events'), 'data', ...))
	246	return self.playlist_result(
	247	itertools.chain.from_iterable(map(self._generate_event_playlist, items)),
	248	account_data.get('id'), account_data.get('full_name'))
	249
	250
	251	# The original version of Livestream uses a different system
	252	class LivestreamOriginalIE(InfoExtractor):
	253	IE_NAME = 'livestream:original'
	254	_VALID_URL = r'''(?x)https?://original\.livestream\.com/
	255	(?P<user>[^/\?#]+)(?:/(?P<type>video\|folder)
	256	(?:(?:\?.?Id=\|/)(?P<id>.?)(&\|$))?)?
	257	'''
	258	_TESTS = [{
	259	'url': 'http://original.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
	260	'info_dict': {
	261	'id': 'pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
	262	'ext': 'mp4',
	263	'title': 'Spark 1 (BitCoin) with Cameron Winklevoss & Tyler Winklevoss of Winklevoss Capital',
	264	'duration': 771.301,
	265	'view_count': int,
	266	},
	267	}, {
	268	'url': 'https://original.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3',
	269	'info_dict': {
	270	'id': 'a07bf706-d0e4-4e75-a747-b021d84f2fd3',
	271	},
	272	'playlist_mincount': 4,
	273	}, {
	274	# live stream
	275	'url': 'http://original.livestream.com/znsbahamas',
	276	'only_matching': True,
	277	}]
	278
	279	def _extract_video_info(self, user, video_id):
	280	api_url = f'http://x{user}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={video_id}'
	281	info = self._download_xml(api_url, video_id)
	282
	283	item = info.find('channel').find('item')
	284	title = xpath_text(item, 'title')
	285	media_ns = {'media': 'http://search.yahoo.com/mrss'}
	286	thumbnail_url = xpath_attr(
	287	item, xpath_with_ns('media:thumbnail', media_ns), 'url')
	288	duration = float_or_none(xpath_attr(
	289	item, xpath_with_ns('media:content', media_ns), 'duration'))
	290	ls_ns = {'ls': 'http://api.channel.livestream.com/2.0'}
	291	view_count = int_or_none(xpath_text(
	292	item, xpath_with_ns('ls:viewsCount', ls_ns)))
	293
	294	return {
	295	'id': video_id,
	296	'title': title,
	297	'thumbnail': thumbnail_url,
	298	'duration': duration,
	299	'view_count': view_count,
	300	}
	301
	302	def _extract_video_formats(self, video_data, video_id):
	303	formats = []
	304
	305	progressive_url = video_data.get('progressiveUrl')
	306	if progressive_url:
	307	formats.append({
	308	'url': progressive_url,
	309	'format_id': 'http',
	310	})
	311
	312	m3u8_url = video_data.get('httpUrl')
	313	if m3u8_url:
	314	formats.extend(self._extract_m3u8_formats(
	315	m3u8_url, video_id, 'mp4', 'm3u8_native',
	316	m3u8_id='hls', fatal=False))
	317
	318	rtsp_url = video_data.get('rtspUrl')
	319	if rtsp_url:
	320	formats.append({
	321	'url': rtsp_url,
	322	'format_id': 'rtsp',
	323	})
	324
	325	return formats
	326
	327	def _extract_folder(self, url, folder_id):
	328	webpage = self._download_webpage(url, folder_id)
	329	paths = orderedSet(re.findall(
	330	r'''(?x)(?:
	331	<li\s+class="folder">\s*<a\s+href="\|
	332	<a\s+href="(?=https?://livestre\.am/)
	333	)([^"]+)"''', webpage))
	334
	335	entries = [{
	336	'_type': 'url',
	337	'url': urllib.parse.urljoin(url, p),
	338	} for p in paths]
	339
	340	return self.playlist_result(entries, folder_id)
	341
	342	def _real_extract(self, url):
	343	mobj = self._match_valid_url(url)
	344	user = mobj.group('user')
	345	url_type = mobj.group('type')
	346	content_id = mobj.group('id')
	347	if url_type == 'folder':
	348	return self._extract_folder(url, content_id)
	349	else:
	350	# this url is used on mobile devices
	351	stream_url = f'http://x{user}x.api.channel.livestream.com/3.0/getstream.json'
	352	info = {}
	353	if content_id:
	354	stream_url += f'?id={content_id}'
	355	info = self._extract_video_info(user, content_id)
	356	else:
	357	content_id = user
	358	webpage = self._download_webpage(url, content_id)
	359	info = {
	360	'title': self._og_search_title(webpage),
	361	'description': self._og_search_description(webpage),
	362	'thumbnail': self._search_regex(r'channelLogo\.src\s=\s"([^"]+)"', webpage, 'thumbnail', None),
	363	}
	364	video_data = self._download_json(stream_url, content_id)
	365	is_live = video_data.get('isLive')
	366	info.update({
	367	'id': content_id,
	368	'title': info['title'],
	369	'formats': self._extract_video_formats(video_data, content_id),
	370	'is_live': is_live,
	371	})
	372	return info
	373
	374
	375	# The server doesn't support HEAD request, the generic extractor can't detect
	376	# the redirection
	377	class LivestreamShortenerIE(InfoExtractor):
	378	IE_NAME = 'livestream:shortener'
	379	IE_DESC = False # Do not list
	380	_VALID_URL = r'https?://livestre\.am/(?P<id>.+)'
	381
	382	def _real_extract(self, url):
	383	video_id = self._match_id(url)
	384	webpage = self._download_webpage(url, video_id)
	385
	386	return self.url_result(self._og_search_url(webpage))