jfr.im git - yt-dlp.git/blame_incremental - yt

... / ...

Commit	Line	Data
	1	import itertools
	2	import re
	3
	4	from .common import InfoExtractor
	5	from ..compat import compat_HTTPError
	6	from ..utils import (
	7	int_or_none,
	8	parse_iso8601,
	9	traverse_obj,
	10	unescapeHTML,
	11	ExtractorError,
	12	)
	13
	14
	15	class RumbleEmbedIE(InfoExtractor):
	16	_VALID_URL = r'https?://(?:www\.)?rumble\.com/embed/(?:[0-9a-z]+\.)?(?P<id>[0-9a-z]+)'
	17	_EMBED_REGEX = [fr'(?:<(?:script\|iframe)[^>]+\bsrc=\|["\']embedUrl["\']\s:\s)["\'](?P<url>{_VALID_URL})']
	18	_TESTS = [{
	19	'url': 'https://rumble.com/embed/v5pv5f',
	20	'md5': '36a18a049856720189f30977ccbb2c34',
	21	'info_dict': {
	22	'id': 'v5pv5f',
	23	'ext': 'mp4',
	24	'title': 'WMAR 2 News Latest Headlines \| October 20, 6pm',
	25	'timestamp': 1571611968,
	26	'upload_date': '20191020',
	27	'channel_url': 'https://rumble.com/c/WMAR',
	28	'channel': 'WMAR',
	29	'thumbnail': 'https://sp.rmbl.ws/s8/1/5/M/z/1/5Mz1a.OvCc-small-WMAR-2-News-Latest-Headline.jpg',
	30	'duration': 234,
	31	'uploader': 'WMAR',
	32	'live_status': 'not_live',
	33	}
	34	}, {
	35	'url': 'https://rumble.com/embed/vslb7v',
	36	'md5': '7418035de1a30a178b8af34dc2b6a52b',
	37	'info_dict': {
	38	'id': 'vslb7v',
	39	'ext': 'mp4',
	40	'title': 'Defense Sec. says US Commitment to NATO Defense \'Ironclad\'',
	41	'timestamp': 1645142135,
	42	'upload_date': '20220217',
	43	'channel_url': 'https://rumble.com/c/CyberTechNews',
	44	'channel': 'CTNews',
	45	'thumbnail': 'https://sp.rmbl.ws/s8/6/7/i/9/h/7i9hd.OvCc.jpg',
	46	'duration': 901,
	47	'uploader': 'CTNews',
	48	'live_status': 'not_live',
	49	}
	50	}, {
	51	'url': 'https://rumble.com/embed/vunh1h',
	52	'info_dict': {
	53	'id': 'vunh1h',
	54	'ext': 'mp4',
	55	'title': '‘Gideon, op zoek naar de waarheid’ including ENG SUBS',
	56	'timestamp': 1647197663,
	57	'upload_date': '20220313',
	58	'channel_url': 'https://rumble.com/user/BLCKBX',
	59	'channel': 'BLCKBX',
	60	'thumbnail': r're:https://.+\.jpg',
	61	'duration': 5069,
	62	'uploader': 'BLCKBX',
	63	'live_status': 'not_live',
	64	'subtitles': {
	65	'en': [
	66	{
	67	'url': r're:https://.+\.vtt',
	68	'name': 'English',
	69	'ext': 'vtt'
	70	}
	71	]
	72	},
	73	},
	74	'params': {'skip_download': True}
	75	}, {
	76	'url': 'https://rumble.com/embed/v1essrt',
	77	'info_dict': {
	78	'id': 'v1essrt',
	79	'ext': 'mp4',
	80	'title': 'startswith:lofi hip hop radio - beats to relax/study',
	81	'timestamp': 1661519399,
	82	'upload_date': '20220826',
	83	'channel_url': 'https://rumble.com/c/LofiGirl',
	84	'channel': 'Lofi Girl',
	85	'thumbnail': r're:https://.+\.jpg',
	86	'duration': None,
	87	'uploader': 'Lofi Girl',
	88	'live_status': 'is_live',
	89	},
	90	'params': {'skip_download': True}
	91	}, {
	92	'url': 'https://rumble.com/embed/v1amumr',
	93	'info_dict': {
	94	'id': 'v1amumr',
	95	'ext': 'webm',
	96	'fps': 60,
	97	'title': 'Turning Point USA 2022 Student Action Summit DAY 1 - Rumble Exclusive Live',
	98	'timestamp': 1658518457,
	99	'upload_date': '20220722',
	100	'channel_url': 'https://rumble.com/c/RumbleEvents',
	101	'channel': 'Rumble Events',
	102	'thumbnail': r're:https://.+\.jpg',
	103	'duration': 16427,
	104	'uploader': 'Rumble Events',
	105	'live_status': 'was_live',
	106	},
	107	'params': {'skip_download': True}
	108	}, {
	109	'url': 'https://rumble.com/embed/ufe9n.v5pv5f',
	110	'only_matching': True,
	111	}]
	112
	113	_WEBPAGE_TESTS = [
	114	{
	115	'note': 'Rumble embed',
	116	'url': 'https://rumble.com/vdmum1-moose-the-dog-helps-girls-dig-a-snow-fort.html',
	117	'md5': '53af34098a7f92c4e51cf0bd1c33f009',
	118	'info_dict': {
	119	'id': 'vb0ofn',
	120	'ext': 'mp4',
	121	'timestamp': 1612662578,
	122	'uploader': 'LovingMontana',
	123	'channel': 'LovingMontana',
	124	'upload_date': '20210207',
	125	'title': 'Winter-loving dog helps girls dig a snow fort ',
	126	'channel_url': 'https://rumble.com/c/c-546523',
	127	'thumbnail': 'https://sp.rmbl.ws/s8/1/5/f/x/x/5fxxb.OvCc.1-small-Moose-The-Dog-Helps-Girls-D.jpg',
	128	'duration': 103,
	129	'live_status': 'not_live',
	130	}
	131	},
	132	{
	133	'note': 'Rumble JS embed',
	134	'url': 'https://therightscoop.com/what-does-9-plus-1-plus-1-equal-listen-to-this-audio-of-attempted-kavanaugh-assassins-call-and-youll-get-it',
	135	'md5': '4701209ac99095592e73dbba21889690',
	136	'info_dict': {
	137	'id': 'v15eqxl',
	138	'ext': 'mp4',
	139	'channel': 'Mr Producer Media',
	140	'duration': 92,
	141	'title': '911 Audio From The Man Who Wanted To Kill Supreme Court Justice Kavanaugh',
	142	'channel_url': 'https://rumble.com/c/RichSementa',
	143	'thumbnail': 'https://sp.rmbl.ws/s8/1/P/j/f/A/PjfAe.OvCc-small-911-Audio-From-The-Man-Who-.jpg',
	144	'timestamp': 1654892716,
	145	'uploader': 'Mr Producer Media',
	146	'upload_date': '20220610',
	147	'live_status': 'not_live',
	148	}
	149	},
	150	]
	151
	152	@classmethod
	153	def _extract_embed_urls(cls, url, webpage):
	154	embeds = tuple(super()._extract_embed_urls(url, webpage))
	155	if embeds:
	156	return embeds
	157	return [f'https://rumble.com/embed/{mobj.group("id")}' for mobj in re.finditer(
	158	r'<script>\sRumble\(\s"play"\s,\s{\s[\'"]video[\'"]\s:\s*[\'"](?P<id>[0-9a-z]+)[\'"]', webpage)]
	159
	160	def _real_extract(self, url):
	161	video_id = self._match_id(url)
	162	video = self._download_json(
	163	'https://rumble.com/embedJS/u3/', video_id,
	164	query={'request': 'video', 'ver': 2, 'v': video_id})
	165
	166	sys_msg = traverse_obj(video, ('sys', 'msg'))
	167	if sys_msg:
	168	self.report_warning(sys_msg, video_id=video_id)
	169
	170	if video.get('live') == 0:
	171	live_status = 'not_live' if video.get('livestream_has_dvr') is None else 'was_live'
	172	elif video.get('live') == 1:
	173	live_status = 'is_upcoming' if video.get('livestream_has_dvr') else 'was_live'
	174	elif video.get('live') == 2:
	175	live_status = 'is_live'
	176	else:
	177	live_status = None
	178
	179	formats = []
	180	for ext, ext_info in (video.get('ua') or {}).items():
	181	for height, video_info in (ext_info or {}).items():
	182	meta = video_info.get('meta') or {}
	183	if not video_info.get('url'):
	184	continue
	185	if ext == 'hls':
	186	if meta.get('live') is True and video.get('live') == 1:
	187	live_status = 'post_live'
	188	formats.extend(self._extract_m3u8_formats(
	189	video_info['url'], video_id,
	190	ext='mp4', m3u8_id='hls', fatal=False, live=live_status == 'is_live'))
	191	continue
	192	formats.append({
	193	'ext': ext,
	194	'url': video_info['url'],
	195	'format_id': '%s-%sp' % (ext, height),
	196	'height': int_or_none(height),
	197	'fps': video.get('fps'),
	198	**traverse_obj(meta, {
	199	'tbr': 'bitrate',
	200	'filesize': 'size',
	201	'width': 'w',
	202	'height': 'h',
	203	}, default={})
	204	})
	205
	206	subtitles = {
	207	lang: [{
	208	'url': sub_info['path'],
	209	'name': sub_info.get('language') or '',
	210	}] for lang, sub_info in (video.get('cc') or {}).items() if sub_info.get('path')
	211	}
	212
	213	author = video.get('author') or {}
	214	thumbnails = traverse_obj(video, ('t', ..., {'url': 'i', 'width': 'w', 'height': 'h'}))
	215	if not thumbnails and video.get('i'):
	216	thumbnails = [{'url': video['i']}]
	217
	218	if live_status in {'is_live', 'post_live'}:
	219	duration = None
	220	else:
	221	duration = int_or_none(video.get('duration'))
	222
	223	return {
	224	'id': video_id,
	225	'title': unescapeHTML(video.get('title')),
	226	'formats': formats,
	227	'subtitles': subtitles,
	228	'thumbnails': thumbnails,
	229	'timestamp': parse_iso8601(video.get('pubDate')),
	230	'channel': author.get('name'),
	231	'channel_url': author.get('url'),
	232	'duration': duration,
	233	'uploader': author.get('name'),
	234	'live_status': live_status,
	235	}
	236
	237
	238	class RumbleChannelIE(InfoExtractor):
	239	_VALID_URL = r'(?P<url>https?://(?:www\.)?rumble\.com/(?:c\|user)/(?P<id>[^&?#$/]+))'
	240
	241	_TESTS = [{
	242	'url': 'https://rumble.com/c/Styxhexenhammer666',
	243	'playlist_mincount': 1160,
	244	'info_dict': {
	245	'id': 'Styxhexenhammer666',
	246	},
	247	}, {
	248	'url': 'https://rumble.com/user/goldenpoodleharleyeuna',
	249	'playlist_mincount': 4,
	250	'info_dict': {
	251	'id': 'goldenpoodleharleyeuna',
	252	},
	253	}]
	254
	255	def entries(self, url, playlist_id):
	256	for page in itertools.count(1):
	257	try:
	258	webpage = self._download_webpage(f'{url}?page={page}', playlist_id, note='Downloading page %d' % page)
	259	except ExtractorError as e:
	260	if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
	261	break
	262	raise
	263	for video_url in re.findall(r'class=video-item--a\s?href=([^>]+\.html)', webpage):
	264	yield self.url_result('https://rumble.com' + video_url)
	265
	266	def _real_extract(self, url):
	267	url, playlist_id = self._match_valid_url(url).groups()
	268	return self.playlist_result(self.entries(url, playlist_id), playlist_id=playlist_id)