jfr.im git - yt-dlp.git/blame_incremental - yt

... / ...

Commit	Line	Data
	1	import base64
	2	import functools
	3	import math
	4	import re
	5	import time
	6	import urllib.parse
	7
	8	from .common import InfoExtractor
	9	from .slideslive import SlidesLiveIE
	10	from ..utils import (
	11	ExtractorError,
	12	InAdvancePagedList,
	13	int_or_none,
	14	traverse_obj,
	15	update_url_query,
	16	url_or_none,
	17	)
	18
	19
	20	class VideoKenBaseIE(InfoExtractor):
	21	_ORGANIZATIONS = {
	22	'videos.icts.res.in': 'icts',
	23	'videos.cncf.io': 'cncf',
	24	'videos.neurips.cc': 'neurips',
	25	}
	26	_BASE_URL_RE = rf'https?://(?P<host>{"\|".join(map(re.escape, _ORGANIZATIONS))})/'
	27
	28	_PAGE_SIZE = 12
	29
	30	def _get_org_id_and_api_key(self, org, video_id):
	31	details = self._download_json(
	32	f'https://analytics.videoken.com/api/videolake/{org}/details', video_id,
	33	note='Downloading organization ID and API key', headers={
	34	'Accept': 'application/json',
	35	})
	36	return details['id'], details['apikey']
	37
	38	def _create_slideslive_url(self, video_url, video_id, referer):
	39	if not video_url and not video_id:
	40	return
	41	elif not video_url or 'embed/sign-in' in video_url:
	42	video_url = f'https://slideslive.com/embed/{video_id.lstrip("slideslive-")}'
	43	if url_or_none(referer):
	44	return update_url_query(video_url, {
	45	'embed_parent_url': referer,
	46	'embed_container_origin': f'https://{urllib.parse.urlparse(referer).netloc}',
	47	})
	48	return video_url
	49
	50	def _extract_videos(self, videos, url):
	51	for video in traverse_obj(videos, (('videos', 'results'), ...)):
	52	video_id = traverse_obj(video, 'youtube_id', 'videoid')
	53	if not video_id:
	54	continue
	55	ie_key = None
	56	if traverse_obj(video, 'type', 'source') == 'youtube':
	57	video_url = video_id
	58	ie_key = 'Youtube'
	59	else:
	60	video_url = traverse_obj(video, 'embed_url', 'embeddableurl')
	61	if urllib.parse.urlparse(video_url).netloc == 'slideslive.com':
	62	ie_key = SlidesLiveIE
	63	video_url = self._create_slideslive_url(video_url, video_id, url)
	64	if not video_url:
	65	continue
	66	yield self.url_result(video_url, ie_key, video_id)
	67
	68
	69	class VideoKenIE(VideoKenBaseIE):
	70	_VALID_URL = VideoKenBaseIE._BASE_URL_RE + r'(?:(?:topic\|category)/[^/#?]+/)?video/(?P<id>[\w-]+)'
	71	_TESTS = [{
	72	# neurips -> videoken -> slideslive
	73	'url': 'https://videos.neurips.cc/video/slideslive-38922815',
	74	'info_dict': {
	75	'id': '38922815',
	76	'ext': 'mp4',
	77	'title': 'Efficient Processing of Deep Neural Network: from Algorithms to Hardware Architectures',
	78	'timestamp': 1630939331,
	79	'upload_date': '20210906',
	80	'thumbnail': r're:^https?://.*\.(?:jpg\|png)',
	81	'thumbnails': 'count:330',
	82	'chapters': 'count:329',
	83	},
	84	'params': {
	85	'skip_download': 'm3u8',
	86	},
	87	'expected_warnings': ['Failed to download VideoKen API JSON'],
	88	}, {
	89	# neurips -> videoken -> slideslive -> youtube
	90	'url': 'https://videos.neurips.cc/topic/machine%20learning/video/slideslive-38923348',
	91	'info_dict': {
	92	'id': '2Xa_dt78rJE',
	93	'ext': 'mp4',
	94	'display_id': '38923348',
	95	'title': 'Machine Education',
	96	'description': 'Watch full version of this video at https://slideslive.com/38923348.',
	97	'channel': 'SlidesLive Videos - G2',
	98	'channel_id': 'UCOExahQQ588Da8Nft_Ltb9w',
	99	'channel_url': 'https://www.youtube.com/channel/UCOExahQQ588Da8Nft_Ltb9w',
	100	'uploader': 'SlidesLive Videos - G2',
	101	'uploader_id': 'UCOExahQQ588Da8Nft_Ltb9w',
	102	'uploader_url': 'http://www.youtube.com/channel/UCOExahQQ588Da8Nft_Ltb9w',
	103	'duration': 2504,
	104	'timestamp': 1618922125,
	105	'upload_date': '20200131',
	106	'age_limit': 0,
	107	'channel_follower_count': int,
	108	'view_count': int,
	109	'availability': 'unlisted',
	110	'live_status': 'not_live',
	111	'playable_in_embed': True,
	112	'categories': ['People & Blogs'],
	113	'tags': [],
	114	'thumbnail': r're:^https?://.*\.(?:jpg\|webp)',
	115	'thumbnails': 'count:78',
	116	'chapters': 'count:77',
	117	},
	118	'params': {
	119	'skip_download': 'm3u8',
	120	},
	121	'expected_warnings': ['Failed to download VideoKen API JSON'],
	122	}, {
	123	# icts -> videoken -> youtube
	124	'url': 'https://videos.icts.res.in/topic/random%20variable/video/zysIsojYdvc',
	125	'info_dict': {
	126	'id': 'zysIsojYdvc',
	127	'ext': 'mp4',
	128	'title': 'Small-worlds, complex networks and random graphs (Lecture 3) by Remco van der Hofstad',
	129	'description': 'md5:87433069d79719eeadc1962cc2ace00b',
	130	'channel': 'International Centre for Theoretical Sciences',
	131	'channel_id': 'UCO3xnVTHzB7l-nc8mABUJIQ',
	132	'channel_url': 'https://www.youtube.com/channel/UCO3xnVTHzB7l-nc8mABUJIQ',
	133	'uploader': 'International Centre for Theoretical Sciences',
	134	'uploader_id': 'ICTStalks',
	135	'uploader_url': 'http://www.youtube.com/user/ICTStalks',
	136	'duration': 3372,
	137	'upload_date': '20191004',
	138	'age_limit': 0,
	139	'live_status': 'not_live',
	140	'availability': 'public',
	141	'playable_in_embed': True,
	142	'channel_follower_count': int,
	143	'like_count': int,
	144	'view_count': int,
	145	'categories': ['Science & Technology'],
	146	'tags': [],
	147	'thumbnail': r're:^https?://.*\.(?:jpg\|webp)',
	148	'thumbnails': 'count:42',
	149	'chapters': 'count:20',
	150	},
	151	'params': {
	152	'skip_download': 'm3u8',
	153	},
	154	}, {
	155	'url': 'https://videos.cncf.io/category/478/video/IL4nxbmUIX8',
	156	'only_matching': True,
	157	}, {
	158	'url': 'https://videos.cncf.io/topic/kubernetes/video/YAM2d7yTrrI',
	159	'only_matching': True,
	160	}, {
	161	'url': 'https://videos.icts.res.in/video/d7HuP_abpKU',
	162	'only_matching': True,
	163	}]
	164
	165	def _real_extract(self, url):
	166	hostname, video_id = self._match_valid_url(url).group('host', 'id')
	167	org_id, _ = self._get_org_id_and_api_key(self._ORGANIZATIONS[hostname], video_id)
	168	details = self._download_json(
	169	'https://analytics.videoken.com/api/videoinfo_private', video_id, query={
	170	'videoid': video_id,
	171	'org_id': org_id,
	172	}, headers={'Accept': 'application/json'}, note='Downloading VideoKen API JSON',
	173	errnote='Failed to download VideoKen API JSON', fatal=False)
	174	if details:
	175	return next(self._extract_videos({'videos': [details]}, url))
	176	# fallback for API error 400 response
	177	elif video_id.startswith('slideslive-'):
	178	return self.url_result(
	179	self._create_slideslive_url(None, video_id, url), SlidesLiveIE, video_id)
	180	elif re.match(r'^[\w-]{11}$', video_id):
	181	self.url_result(video_id, 'Youtube', video_id)
	182	else:
	183	raise ExtractorError('Unable to extract without VideoKen API response')
	184
	185
	186	class VideoKenPlayerIE(VideoKenBaseIE):
	187	_VALID_URL = r'https?://player\.videoken\.com/embed/slideslive-(?P<id>\d+)'
	188	_TESTS = [{
	189	'url': 'https://player.videoken.com/embed/slideslive-38968434',
	190	'info_dict': {
	191	'id': '38968434',
	192	'ext': 'mp4',
	193	'title': 'Deep Learning with Label Differential Privacy',
	194	'timestamp': 1643377020,
	195	'upload_date': '20220128',
	196	'thumbnail': r're:^https?://.*\.(?:jpg\|png)',
	197	'thumbnails': 'count:30',
	198	'chapters': 'count:29',
	199	},
	200	'params': {
	201	'skip_download': 'm3u8',
	202	},
	203	}]
	204
	205	def _real_extract(self, url):
	206	video_id = self._match_id(url)
	207	return self.url_result(
	208	self._create_slideslive_url(None, video_id, url), SlidesLiveIE, video_id)
	209
	210
	211	class VideoKenPlaylistIE(VideoKenBaseIE):
	212	_VALID_URL = VideoKenBaseIE._BASE_URL_RE + r'(?:category/\d+/)?playlist/(?P<id>\d+)'
	213	_TESTS = [{
	214	'url': 'https://videos.icts.res.in/category/1822/playlist/381',
	215	'playlist_mincount': 117,
	216	'info_dict': {
	217	'id': '381',
	218	'title': 'Cosmology - The Next Decade',
	219	},
	220	}]
	221
	222	def _real_extract(self, url):
	223	hostname, playlist_id = self._match_valid_url(url).group('host', 'id')
	224	org_id, _ = self._get_org_id_and_api_key(self._ORGANIZATIONS[hostname], playlist_id)
	225	videos = self._download_json(
	226	f'https://analytics.videoken.com/api/{org_id}/playlistitems/{playlist_id}/',
	227	playlist_id, headers={'Accept': 'application/json'}, note='Downloading API JSON')
	228	return self.playlist_result(self._extract_videos(videos, url), playlist_id, videos.get('title'))
	229
	230
	231	class VideoKenCategoryIE(VideoKenBaseIE):
	232	_VALID_URL = VideoKenBaseIE._BASE_URL_RE + r'category/(?P<id>\d+)/?(?:$\|[?#])'
	233	_TESTS = [{
	234	'url': 'https://videos.icts.res.in/category/1822/',
	235	'playlist_mincount': 500,
	236	'info_dict': {
	237	'id': '1822',
	238	'title': 'Programs',
	239	},
	240	}, {
	241	'url': 'https://videos.neurips.cc/category/350/',
	242	'playlist_mincount': 34,
	243	'info_dict': {
	244	'id': '350',
	245	'title': 'NeurIPS 2018',
	246	},
	247	}, {
	248	'url': 'https://videos.cncf.io/category/479/',
	249	'playlist_mincount': 328,
	250	'info_dict': {
	251	'id': '479',
	252	'title': 'KubeCon + CloudNativeCon Europe\'19',
	253	},
	254	}]
	255
	256	def _get_category_page(self, category_id, org_id, page=1, note=None):
	257	return self._download_json(
	258	f'https://analytics.videoken.com/api/videolake/{org_id}/category_videos', category_id,
	259	fatal=False, note=note if note else f'Downloading category page {page}',
	260	query={
	261	'category_id': category_id,
	262	'page_number': page,
	263	'length': self._PAGE_SIZE,
	264	}, headers={'Accept': 'application/json'}) or {}
	265
	266	def _entries(self, category_id, org_id, url, page):
	267	videos = self._get_category_page(category_id, org_id, page + 1)
	268	yield from self._extract_videos(videos, url)
	269
	270	def _real_extract(self, url):
	271	hostname, category_id = self._match_valid_url(url).group('host', 'id')
	272	org_id, _ = self._get_org_id_and_api_key(self._ORGANIZATIONS[hostname], category_id)
	273	category_info = self._get_category_page(category_id, org_id, note='Downloading category info')
	274	category = category_info['category_name']
	275	total_pages = math.ceil(int(category_info['recordsTotal']) / self._PAGE_SIZE)
	276	return self.playlist_result(InAdvancePagedList(
	277	functools.partial(self._entries, category_id, org_id, url),
	278	total_pages, self._PAGE_SIZE), category_id, category)
	279
	280
	281	class VideoKenTopicIE(VideoKenBaseIE):
	282	_VALID_URL = VideoKenBaseIE._BASE_URL_RE + r'topic/(?P<id>[^/#?]+)/?(?:$\|[?#])'
	283	_TESTS = [{
	284	'url': 'https://videos.neurips.cc/topic/machine%20learning/',
	285	'playlist_mincount': 500,
	286	'info_dict': {
	287	'id': 'machine_learning',
	288	'title': 'machine learning',
	289	},
	290	}, {
	291	'url': 'https://videos.icts.res.in/topic/gravitational%20waves/',
	292	'playlist_mincount': 77,
	293	'info_dict': {
	294	'id': 'gravitational_waves',
	295	'title': 'gravitational waves'
	296	},
	297	}, {
	298	'url': 'https://videos.cncf.io/topic/prometheus/',
	299	'playlist_mincount': 134,
	300	'info_dict': {
	301	'id': 'prometheus',
	302	'title': 'prometheus',
	303	},
	304	}]
	305
	306	def _get_topic_page(self, topic, org_id, search_id, api_key, page=1, note=None):
	307	return self._download_json(
	308	'https://es.videoken.com/api/v1.0/get_results', topic, fatal=False, query={
	309	'orgid': org_id,
	310	'size': self._PAGE_SIZE,
	311	'query': topic,
	312	'page': page,
	313	'sort': 'upload_desc',
	314	'filter': 'all',
	315	'token': api_key,
	316	'is_topic': 'true',
	317	'category': '',
	318	'searchid': search_id,
	319	}, headers={'Accept': 'application/json'},
	320	note=note if note else f'Downloading topic page {page}') or {}
	321
	322	def _entries(self, topic, org_id, search_id, api_key, url, page):
	323	videos = self._get_topic_page(topic, org_id, search_id, api_key, page + 1)
	324	yield from self._extract_videos(videos, url)
	325
	326	def _real_extract(self, url):
	327	hostname, topic_id = self._match_valid_url(url).group('host', 'id')
	328	topic = urllib.parse.unquote(topic_id)
	329	topic_id = topic.replace(' ', '_')
	330	org_id, api_key = self._get_org_id_and_api_key(self._ORGANIZATIONS[hostname], topic)
	331	search_id = base64.b64encode(f':{topic}:{int(time.time())}:transient'.encode()).decode()
	332	total_pages = int_or_none(self._get_topic_page(
	333	topic, org_id, search_id, api_key, note='Downloading topic info')['total_no_of_pages'])
	334	return self.playlist_result(InAdvancePagedList(
	335	functools.partial(self._entries, topic, org_id, search_id, api_key, url),
	336	total_pages, self._PAGE_SIZE), topic_id, topic)