[yt-dlp.git] / yt_dlp / extractor / rumble.py

import itertools
import re

from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
    ExtractorError,
    UnsupportedError,
    clean_html,
    determine_ext,
    format_field,
    get_element_by_class,
    int_or_none,
    join_nonempty,
    parse_count,
    parse_iso8601,
    traverse_obj,
    unescapeHTML,
)


class RumbleEmbedIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?rumble\.com/embed/(?:[0-9a-z]+\.)?(?P<id>[0-9a-z]+)'
    _EMBED_REGEX = [fr'(?:<(?:script|iframe)[^>]+\bsrc=|["\']embedUrl["\']\s*:\s*)["\'](?P<url>{_VALID_URL})']
    _TESTS = [{
        'url': 'https://rumble.com/embed/v5pv5f',
        'md5': '36a18a049856720189f30977ccbb2c34',
        'info_dict': {
            'id': 'v5pv5f',
            'ext': 'mp4',
            'title': 'WMAR 2 News Latest Headlines | October 20, 6pm',
            'timestamp': 1571611968,
            'upload_date': '20191020',
            'channel_url': 'https://rumble.com/c/WMAR',
            'channel': 'WMAR',
            'thumbnail': 'https://sp.rmbl.ws/s8/1/5/M/z/1/5Mz1a.qR4e-small-WMAR-2-News-Latest-Headline.jpg',
            'duration': 234,
            'uploader': 'WMAR',
            'live_status': 'not_live',
        }
    }, {
        'url': 'https://rumble.com/embed/vslb7v',
        'md5': '7418035de1a30a178b8af34dc2b6a52b',
        'info_dict': {
            'id': 'vslb7v',
            'ext': 'mp4',
            'title': 'Defense Sec. says US Commitment to NATO Defense \'Ironclad\'',
            'timestamp': 1645142135,
            'upload_date': '20220217',
            'channel_url': 'https://rumble.com/c/CyberTechNews',
            'channel': 'CTNews',
            'thumbnail': 'https://sp.rmbl.ws/s8/6/7/i/9/h/7i9hd.OvCc.jpg',
            'duration': 901,
            'uploader': 'CTNews',
            'live_status': 'not_live',
        }
    }, {
        'url': 'https://rumble.com/embed/vunh1h',
        'info_dict': {
            'id': 'vunh1h',
            'ext': 'mp4',
            'title': '‘Gideon, op zoek naar de waarheid’ including ENG SUBS',
            'timestamp': 1647197663,
            'upload_date': '20220313',
            'channel_url': 'https://rumble.com/user/BLCKBX',
            'channel': 'BLCKBX',
            'thumbnail': r're:https://.+\.jpg',
            'duration': 5069,
            'uploader': 'BLCKBX',
            'live_status': 'not_live',
            'subtitles': {
                'en': [
                    {
                        'url': r're:https://.+\.vtt',
                        'name': 'English',
                        'ext': 'vtt'
                    }
                ]
            },
        },
        'params': {'skip_download': True}
    }, {
        'url': 'https://rumble.com/embed/v1essrt',
        'info_dict': {
            'id': 'v1essrt',
            'ext': 'mp4',
            'title': 'startswith:lofi hip hop radio 📚 - beats to relax/study to',
            'timestamp': 1661519399,
            'upload_date': '20220826',
            'channel_url': 'https://rumble.com/c/LofiGirl',
            'channel': 'Lofi Girl',
            'thumbnail': r're:https://.+\.jpg',
            'duration': None,
            'uploader': 'Lofi Girl',
            'live_status': 'is_live',
        },
        'params': {'skip_download': True}
    }, {
        'url': 'https://rumble.com/embed/v1amumr',
        'info_dict': {
            'id': 'v1amumr',
            'ext': 'mp4',
            'fps': 60,
            'title': 'Turning Point USA 2022 Student Action Summit DAY 1  - Rumble Exclusive Live',
            'timestamp': 1658518457,
            'upload_date': '20220722',
            'channel_url': 'https://rumble.com/c/RumbleEvents',
            'channel': 'Rumble Events',
            'thumbnail': r're:https://.+\.jpg',
            'duration': 16427,
            'uploader': 'Rumble Events',
            'live_status': 'was_live',
        },
        'params': {'skip_download': True}
    }, {
        'url': 'https://rumble.com/embed/ufe9n.v5pv5f',
        'only_matching': True,
    }]

    _WEBPAGE_TESTS = [
        {
            'note': 'Rumble JS embed',
            'url': 'https://therightscoop.com/what-does-9-plus-1-plus-1-equal-listen-to-this-audio-of-attempted-kavanaugh-assassins-call-and-youll-get-it',
            'md5': '4701209ac99095592e73dbba21889690',
            'info_dict': {
                'id': 'v15eqxl',
                'ext': 'mp4',
                'channel': 'Mr Producer Media',
                'duration': 92,
                'title': '911 Audio From The Man Who Wanted To Kill Supreme Court Justice Kavanaugh',
                'channel_url': 'https://rumble.com/c/RichSementa',
                'thumbnail': 'https://sp.rmbl.ws/s8/1/P/j/f/A/PjfAe.qR4e-small-911-Audio-From-The-Man-Who-.jpg',
                'timestamp': 1654892716,
                'uploader': 'Mr Producer Media',
                'upload_date': '20220610',
                'live_status': 'not_live',
            }
        },
    ]

    @classmethod
    def _extract_embed_urls(cls, url, webpage):
        embeds = tuple(super()._extract_embed_urls(url, webpage))
        if embeds:
            return embeds
        return [f'https://rumble.com/embed/{mobj.group("id")}' for mobj in re.finditer(
            r'<script>[^<]*\bRumble\(\s*"play"\s*,\s*{[^}]*[\'"]?video[\'"]?\s*:\s*[\'"](?P<id>[0-9a-z]+)[\'"]', webpage)]

    def _real_extract(self, url):
        video_id = self._match_id(url)
        video = self._download_json(
            'https://rumble.com/embedJS/u3/', video_id,
            query={'request': 'video', 'ver': 2, 'v': video_id})

        sys_msg = traverse_obj(video, ('sys', 'msg'))
        if sys_msg:
            self.report_warning(sys_msg, video_id=video_id)

        if video.get('live') == 0:
            live_status = 'not_live' if video.get('livestream_has_dvr') is None else 'was_live'
        elif video.get('live') == 1:
            live_status = 'is_upcoming' if video.get('livestream_has_dvr') else 'was_live'
        elif video.get('live') == 2:
            live_status = 'is_live'
        else:
            live_status = None

        formats = []
        for ext, ext_info in (video.get('ua') or {}).items():
            if isinstance(ext_info, dict):
                for height, video_info in ext_info.items():
                    if not traverse_obj(video_info, ('meta', 'h', {int_or_none})):
                        video_info.setdefault('meta', {})['h'] = height
                ext_info = ext_info.values()

            for video_info in ext_info:
                meta = video_info.get('meta') or {}
                if not video_info.get('url'):
                    continue
                if ext == 'hls':
                    if meta.get('live') is True and video.get('live') == 1:
                        live_status = 'post_live'
                    formats.extend(self._extract_m3u8_formats(
                        video_info['url'], video_id,
                        ext='mp4', m3u8_id='hls', fatal=False, live=live_status == 'is_live'))
                    continue
                timeline = ext == 'timeline'
                if timeline:
                    ext = determine_ext(video_info['url'])
                formats.append({
                    'ext': ext,
                    'acodec': 'none' if timeline else None,
                    'url': video_info['url'],
                    'format_id': join_nonempty(ext, format_field(meta, 'h', '%sp')),
                    'format_note': 'Timeline' if timeline else None,
                    'fps': None if timeline else video.get('fps'),
                    **traverse_obj(meta, {
                        'tbr': 'bitrate',
                        'filesize': 'size',
                        'width': 'w',
                        'height': 'h',
                    }, expected_type=lambda x: int(x) or None)
                })

        subtitles = {
            lang: [{
                'url': sub_info['path'],
                'name': sub_info.get('language') or '',
            }] for lang, sub_info in (video.get('cc') or {}).items() if sub_info.get('path')
        }

        author = video.get('author') or {}
        thumbnails = traverse_obj(video, ('t', ..., {'url': 'i', 'width': 'w', 'height': 'h'}))
        if not thumbnails and video.get('i'):
            thumbnails = [{'url': video['i']}]

        if live_status in {'is_live', 'post_live'}:
            duration = None
        else:
            duration = int_or_none(video.get('duration'))

        return {
            'id': video_id,
            'title': unescapeHTML(video.get('title')),
            'formats': formats,
            'subtitles': subtitles,
            'thumbnails': thumbnails,
            'timestamp': parse_iso8601(video.get('pubDate')),
            'channel': author.get('name'),
            'channel_url': author.get('url'),
            'duration': duration,
            'uploader': author.get('name'),
            'live_status': live_status,
        }


class RumbleIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?rumble\.com/(?P<id>v(?!ideos)[\w.-]+)[^/]*$'
    _EMBED_REGEX = [
        r'<a class=video-item--a href=(?P<url>/v[\w.-]+\.html)>',
        r'<a[^>]+class="videostream__link link"[^>]+href=(?P<url>/v[\w.-]+\.html)[^>]*>']
    _TESTS = [{
        'add_ie': ['RumbleEmbed'],
        'url': 'https://rumble.com/vdmum1-moose-the-dog-helps-girls-dig-a-snow-fort.html',
        'md5': '53af34098a7f92c4e51cf0bd1c33f009',
        'info_dict': {
            'id': 'vb0ofn',
            'ext': 'mp4',
            'timestamp': 1612662578,
            'uploader': 'LovingMontana',
            'channel': 'LovingMontana',
            'upload_date': '20210207',
            'title': 'Winter-loving dog helps girls dig a snow fort ',
            'description': 'Moose the dog is more than happy to help with digging out this epic snow fort. Great job, Moose!',
            'channel_url': 'https://rumble.com/c/c-546523',
            'thumbnail': r're:https://.+\.jpg',
            'duration': 103,
            'like_count': int,
            'dislike_count': int,
            'view_count': int,
            'live_status': 'not_live',
        }
    }, {
        'url': 'http://www.rumble.com/vDMUM1?key=value',
        'only_matching': True,
    }, {
        'note': 'timeline format',
        'url': 'https://rumble.com/v2ea9qb-the-u.s.-cannot-hide-this-in-ukraine-anymore-redacted-with-natali-and-clayt.html',
        'md5': '40d61fec6c0945bca3d0e1dc1aa53d79',
        'params': {'format': 'wv'},
        'info_dict': {
            'id': 'v2bou5f',
            'ext': 'mp4',
            'uploader': 'Redacted News',
            'upload_date': '20230322',
            'timestamp': 1679445010,
            'title': 'The U.S. CANNOT hide this in Ukraine anymore | Redacted with Natali and Clayton Morris',
            'duration': 892,
            'channel': 'Redacted News',
            'description': 'md5:aaad0c5c3426d7a361c29bdaaced7c42',
            'channel_url': 'https://rumble.com/c/Redacted',
            'live_status': 'not_live',
            'thumbnail': 'https://sp.rmbl.ws/s8/1/d/x/2/O/dx2Oi.qR4e-small-The-U.S.-CANNOT-hide-this-i.jpg',
            'like_count': int,
            'dislike_count': int,
            'view_count': int,
        },
    }, {
        'url': 'https://rumble.com/v2e7fju-the-covid-twitter-files-drop-protecting-fauci-while-censoring-the-truth-wma.html',
        'info_dict': {
            'id': 'v2blzyy',
            'ext': 'mp4',
            'live_status': 'was_live',
            'release_timestamp': 1679446804,
            'description': 'md5:2ac4908ccfecfb921f8ffa4b30c1e636',
            'release_date': '20230322',
            'timestamp': 1679445692,
            'duration': 4435,
            'upload_date': '20230322',
            'title': 'The Covid Twitter Files Drop: Protecting Fauci While Censoring The Truth w/Matt Taibbi',
            'uploader': 'Kim Iversen',
            'channel_url': 'https://rumble.com/c/KimIversen',
            'channel': 'Kim Iversen',
            'thumbnail': 'https://sp.rmbl.ws/s8/1/6/b/w/O/6bwOi.qR4e-small-The-Covid-Twitter-Files-Dro.jpg',
            'like_count': int,
            'dislike_count': int,
            'view_count': int,
        },
    }]

    _WEBPAGE_TESTS = [{
        'url': 'https://rumble.com/videos?page=2',
        'playlist_mincount': 24,
        'info_dict': {
            'id': 'videos?page=2',
            'title': 'All videos',
            'description': 'Browse videos uploaded to Rumble.com',
            'age_limit': 0,
        },
    }, {
        'url': 'https://rumble.com/browse/live',
        'playlist_mincount': 25,
        'info_dict': {
            'id': 'live',
            'title': 'Browse',
            'age_limit': 0,
        },
    }, {
        'url': 'https://rumble.com/search/video?q=rumble&sort=views',
        'playlist_mincount': 24,
        'info_dict': {
            'id': 'video?q=rumble&sort=views',
            'title': 'Search results for: rumble',
            'age_limit': 0,
        },
    }]

    def _real_extract(self, url):
        page_id = self._match_id(url)
        webpage = self._download_webpage(url, page_id)
        url_info = next(RumbleEmbedIE.extract_from_webpage(self._downloader, url, webpage), None)
        if not url_info:
            raise UnsupportedError(url)

        return {
            '_type': 'url_transparent',
            'ie_key': url_info['ie_key'],
            'url': url_info['url'],
            'release_timestamp': parse_iso8601(self._search_regex(
                r'(?:Livestream begins|Streamed on):\s+<time datetime="([^"]+)', webpage, 'release date', default=None)),
            'view_count': int_or_none(self._search_regex(
                r'"userInteractionCount"\s*:\s*(\d+)', webpage, 'view count', default=None)),
            'like_count': parse_count(self._search_regex(
                r'<span data-js="rumbles_up_votes">\s*([\d,.KM]+)', webpage, 'like count', default=None)),
            'dislike_count': parse_count(self._search_regex(
                r'<span data-js="rumbles_down_votes">\s*([\d,.KM]+)', webpage, 'dislike count', default=None)),
            'description': clean_html(get_element_by_class('media-description', webpage))
        }


class RumbleChannelIE(InfoExtractor):
    _VALID_URL = r'(?P<url>https?://(?:www\.)?rumble\.com/(?:c|user)/(?P<id>[^&?#$/]+))'

    _TESTS = [{
        'url': 'https://rumble.com/c/Styxhexenhammer666',
        'playlist_mincount': 1160,
        'info_dict': {
            'id': 'Styxhexenhammer666',
        },
    }, {
        'url': 'https://rumble.com/user/goldenpoodleharleyeuna',
        'playlist_mincount': 4,
        'info_dict': {
            'id': 'goldenpoodleharleyeuna',
        },
    }]

    def entries(self, url, playlist_id):
        for page in itertools.count(1):
            try:
                webpage = self._download_webpage(f'{url}?page={page}', playlist_id, note='Downloading page %d' % page)
            except ExtractorError as e:
                if isinstance(e.cause, HTTPError) and e.cause.status == 404:
                    break
                raise
            for video_url in re.findall(r'class=video-item--a\s?href=([^>]+\.html)', webpage):
                yield self.url_result('https://rumble.com' + video_url)

    def _real_extract(self, url):
        url, playlist_id = self._match_valid_url(url).groups()
        return self.playlist_result(self.entries(url, playlist_id), playlist_id=playlist_id)
Commit	Line	Data
f1d42a83	1	import itertools
62852977	2	import re
62852977	3
70c5802b	4	from .common import InfoExtractor
3d2623a8	5	from ..networking.exceptions import HTTPError
70c5802b	6	from ..utils import (
839e2a62 M	7	ExtractorError,
	8	UnsupportedError,
	9	clean_html,
78bc1868	10	determine_ext,
6994afc0	11	format_field,
839e2a62	12	get_element_by_class,
70c5802b	13	int_or_none,
6994afc0	14	join_nonempty,
839e2a62	15	parse_count,
70c5802b	16	parse_iso8601,
0d8affc1	17	traverse_obj,
4e34889f	18	unescapeHTML,
70c5802b	19	)
	20
	21
	22	class RumbleEmbedIE(InfoExtractor):
	23	_VALID_URL = r'https?://(?:www\.)?rumble\.com/embed/(?:[0-9a-z]+\.)?(?P<id>[0-9a-z]+)'
bfd973ec	24	_EMBED_REGEX = [fr'(?:<(?:script\|iframe)[^>]+\bsrc=\|["\']embedUrl["\']\s:\s)["\'](?P<url>{_VALID_URL})']
70c5802b	25	_TESTS = [{
	26	'url': 'https://rumble.com/embed/v5pv5f',
	27	'md5': '36a18a049856720189f30977ccbb2c34',
	28	'info_dict': {
	29	'id': 'v5pv5f',
	30	'ext': 'mp4',
	31	'title': 'WMAR 2 News Latest Headlines \| October 20, 6pm',
	32	'timestamp': 1571611968,
	33	'upload_date': '20191020',
64fa820c	34	'channel_url': 'https://rumble.com/c/WMAR',
64fa820c	35	'channel': 'WMAR',
23d829a3	36	'thumbnail': 'https://sp.rmbl.ws/s8/1/5/M/z/1/5Mz1a.qR4e-small-WMAR-2-News-Latest-Headline.jpg',
64fa820c	37	'duration': 234,
64fa820c	38	'uploader': 'WMAR',
0d8affc1	39	'live_status': 'not_live',
70c5802b	40	}
4e34889f	41	}, {
	42	'url': 'https://rumble.com/embed/vslb7v',
	43	'md5': '7418035de1a30a178b8af34dc2b6a52b',
	44	'info_dict': {
	45	'id': 'vslb7v',
	46	'ext': 'mp4',
	47	'title': 'Defense Sec. says US Commitment to NATO Defense \'Ironclad\'',
	48	'timestamp': 1645142135,
	49	'upload_date': '20220217',
	50	'channel_url': 'https://rumble.com/c/CyberTechNews',
	51	'channel': 'CTNews',
	52	'thumbnail': 'https://sp.rmbl.ws/s8/6/7/i/9/h/7i9hd.OvCc.jpg',
	53	'duration': 901,
64fa820c	54	'uploader': 'CTNews',
0d8affc1	55	'live_status': 'not_live',
4e34889f	56	}
0d8affc1 M	57	}, {
	58	'url': 'https://rumble.com/embed/vunh1h',
	59	'info_dict': {
	60	'id': 'vunh1h',
	61	'ext': 'mp4',
	62	'title': '‘Gideon, op zoek naar de waarheid’ including ENG SUBS',
	63	'timestamp': 1647197663,
	64	'upload_date': '20220313',
	65	'channel_url': 'https://rumble.com/user/BLCKBX',
	66	'channel': 'BLCKBX',
	67	'thumbnail': r're:https://.+\.jpg',
	68	'duration': 5069,
	69	'uploader': 'BLCKBX',
	70	'live_status': 'not_live',
	71	'subtitles': {
	72	'en': [
	73	{
	74	'url': r're:https://.+\.vtt',
	75	'name': 'English',
	76	'ext': 'vtt'
	77	}
	78	]
	79	},
	80	},
	81	'params': {'skip_download': True}
	82	}, {
	83	'url': 'https://rumble.com/embed/v1essrt',
	84	'info_dict': {
	85	'id': 'v1essrt',
	86	'ext': 'mp4',
23d829a3	87	'title': 'startswith:lofi hip hop radio 📚 - beats to relax/study to',
0d8affc1 M	88	'timestamp': 1661519399,
	89	'upload_date': '20220826',
	90	'channel_url': 'https://rumble.com/c/LofiGirl',
	91	'channel': 'Lofi Girl',
	92	'thumbnail': r're:https://.+\.jpg',
	93	'duration': None,
	94	'uploader': 'Lofi Girl',
	95	'live_status': 'is_live',
	96	},
	97	'params': {'skip_download': True}
	98	}, {
	99	'url': 'https://rumble.com/embed/v1amumr',
	100	'info_dict': {
	101	'id': 'v1amumr',
23d829a3	102	'ext': 'mp4',
0d8affc1 M	103	'fps': 60,
	104	'title': 'Turning Point USA 2022 Student Action Summit DAY 1 - Rumble Exclusive Live',
	105	'timestamp': 1658518457,
	106	'upload_date': '20220722',
	107	'channel_url': 'https://rumble.com/c/RumbleEvents',
	108	'channel': 'Rumble Events',
	109	'thumbnail': r're:https://.+\.jpg',
	110	'duration': 16427,
	111	'uploader': 'Rumble Events',
	112	'live_status': 'was_live',
	113	},
	114	'params': {'skip_download': True}
70c5802b	115	}, {
	116	'url': 'https://rumble.com/embed/ufe9n.v5pv5f',
	117	'only_matching': True,
	118	}]
	119
0d8affc1	120	_WEBPAGE_TESTS = [
0d8affc1 M	121	{
	122	'note': 'Rumble JS embed',
	123	'url': 'https://therightscoop.com/what-does-9-plus-1-plus-1-equal-listen-to-this-audio-of-attempted-kavanaugh-assassins-call-and-youll-get-it',
	124	'md5': '4701209ac99095592e73dbba21889690',
	125	'info_dict': {
	126	'id': 'v15eqxl',
	127	'ext': 'mp4',
	128	'channel': 'Mr Producer Media',
	129	'duration': 92,
	130	'title': '911 Audio From The Man Who Wanted To Kill Supreme Court Justice Kavanaugh',
	131	'channel_url': 'https://rumble.com/c/RichSementa',
23d829a3	132	'thumbnail': 'https://sp.rmbl.ws/s8/1/P/j/f/A/PjfAe.qR4e-small-911-Audio-From-The-Man-Who-.jpg',
0d8affc1 M	133	'timestamp': 1654892716,
	134	'uploader': 'Mr Producer Media',
	135	'upload_date': '20220610',
	136	'live_status': 'not_live',
	137	}
	138	},
	139	]
	140
79e591b5	141	@classmethod
bfd973ec	142	def _extract_embed_urls(cls, url, webpage):
bfd973ec	143	embeds = tuple(super()._extract_embed_urls(url, webpage))
79e591b5	144	if embeds:
bfd973ec	145	return embeds
79e591b5	146	return [f'https://rumble.com/embed/{mobj.group("id")}' for mobj in re.finditer(
0ce1f48b	147	r'<script>[^<]\bRumble\(\s"play"\s,\s{[^}][\'"]?video[\'"]?\s:\s*[\'"](?P<id>[0-9a-z]+)[\'"]', webpage)]
62852977	148
70c5802b	149	def _real_extract(self, url):
	150	video_id = self._match_id(url)
	151	video = self._download_json(
0d8affc1 M	152	'https://rumble.com/embedJS/u3/', video_id,
	153	query={'request': 'video', 'ver': 2, 'v': video_id})
	154
	155	sys_msg = traverse_obj(video, ('sys', 'msg'))
	156	if sys_msg:
	157	self.report_warning(sys_msg, video_id=video_id)
	158
	159	if video.get('live') == 0:
	160	live_status = 'not_live' if video.get('livestream_has_dvr') is None else 'was_live'
	161	elif video.get('live') == 1:
	162	live_status = 'is_upcoming' if video.get('livestream_has_dvr') else 'was_live'
	163	elif video.get('live') == 2:
	164	live_status = 'is_live'
	165	else:
	166	live_status = None
70c5802b	167
70c5802b	168	formats = []
0d8affc1	169	for ext, ext_info in (video.get('ua') or {}).items():
6994afc0	170	if isinstance(ext_info, dict):
	171	for height, video_info in ext_info.items():
	172	if not traverse_obj(video_info, ('meta', 'h', {int_or_none})):
	173	video_info.setdefault('meta', {})['h'] = height
	174	ext_info = ext_info.values()
	175
	176	for video_info in ext_info:
0d8affc1 M	177	meta = video_info.get('meta') or {}
	178	if not video_info.get('url'):
	179	continue
	180	if ext == 'hls':
	181	if meta.get('live') is True and video.get('live') == 1:
	182	live_status = 'post_live'
	183	formats.extend(self._extract_m3u8_formats(
	184	video_info['url'], video_id,
	185	ext='mp4', m3u8_id='hls', fatal=False, live=live_status == 'is_live'))
	186	continue
78bc1868	187	timeline = ext == 'timeline'
	188	if timeline:
	189	ext = determine_ext(video_info['url'])
0d8affc1 M	190	formats.append({
0d8affc1 M	191	'ext': ext,
78bc1868	192	'acodec': 'none' if timeline else None,
0d8affc1	193	'url': video_info['url'],
6994afc0	194	'format_id': join_nonempty(ext, format_field(meta, 'h', '%sp')),
78bc1868	195	'format_note': 'Timeline' if timeline else None,
78bc1868	196	'fps': None if timeline else video.get('fps'),
0d8affc1 M	197	**traverse_obj(meta, {
	198	'tbr': 'bitrate',
	199	'filesize': 'size',
	200	'width': 'w',
	201	'height': 'h',
acacb57c	202	}, expected_type=lambda x: int(x) or None)
0d8affc1	203	})
70c5802b	204
92922fe7 F	205	subtitles = {
	206	lang: [{
	207	'url': sub_info['path'],
	208	'name': sub_info.get('language') or '',
	209	}] for lang, sub_info in (video.get('cc') or {}).items() if sub_info.get('path')
	210	}
	211
70c5802b	212	author = video.get('author') or {}
0d8affc1 M	213	thumbnails = traverse_obj(video, ('t', ..., {'url': 'i', 'width': 'w', 'height': 'h'}))
	214	if not thumbnails and video.get('i'):
	215	thumbnails = [{'url': video['i']}]
	216
	217	if live_status in {'is_live', 'post_live'}:
	218	duration = None
	219	else:
	220	duration = int_or_none(video.get('duration'))
70c5802b	221
	222	return {
	223	'id': video_id,
0d8affc1	224	'title': unescapeHTML(video.get('title')),
70c5802b	225	'formats': formats,
92922fe7	226	'subtitles': subtitles,
0d8affc1	227	'thumbnails': thumbnails,
70c5802b	228	'timestamp': parse_iso8601(video.get('pubDate')),
	229	'channel': author.get('name'),
	230	'channel_url': author.get('url'),
0d8affc1	231	'duration': duration,
64fa820c	232	'uploader': author.get('name'),
0d8affc1	233	'live_status': live_status,
70c5802b	234	}
f1d42a83 AG	235
f1d42a83 AG	236
839e2a62 M	237	class RumbleIE(InfoExtractor):
839e2a62 M	238	_VALID_URL = r'https?://(?:www\.)?rumble\.com/(?P<id>v(?!ideos)[\w.-]+)[^/]*$'
23d829a3 TL	239	_EMBED_REGEX = [
	240	r'<a class=video-item--a href=(?P<url>/v[\w.-]+\.html)>',
	241	r'<a[^>]+class="videostream__link link"[^>]+href=(?P<url>/v[\w.-]+\.html)[^>]*>']
839e2a62 M	242	_TESTS = [{
	243	'add_ie': ['RumbleEmbed'],
	244	'url': 'https://rumble.com/vdmum1-moose-the-dog-helps-girls-dig-a-snow-fort.html',
	245	'md5': '53af34098a7f92c4e51cf0bd1c33f009',
	246	'info_dict': {
	247	'id': 'vb0ofn',
	248	'ext': 'mp4',
	249	'timestamp': 1612662578,
	250	'uploader': 'LovingMontana',
	251	'channel': 'LovingMontana',
	252	'upload_date': '20210207',
	253	'title': 'Winter-loving dog helps girls dig a snow fort ',
	254	'description': 'Moose the dog is more than happy to help with digging out this epic snow fort. Great job, Moose!',
	255	'channel_url': 'https://rumble.com/c/c-546523',
	256	'thumbnail': r're:https://.+\.jpg',
	257	'duration': 103,
	258	'like_count': int,
23d829a3	259	'dislike_count': int,
839e2a62 M	260	'view_count': int,
	261	'live_status': 'not_live',
	262	}
	263	}, {
	264	'url': 'http://www.rumble.com/vDMUM1?key=value',
	265	'only_matching': True,
78bc1868	266	}, {
	267	'note': 'timeline format',
	268	'url': 'https://rumble.com/v2ea9qb-the-u.s.-cannot-hide-this-in-ukraine-anymore-redacted-with-natali-and-clayt.html',
	269	'md5': '40d61fec6c0945bca3d0e1dc1aa53d79',
	270	'params': {'format': 'wv'},
	271	'info_dict': {
	272	'id': 'v2bou5f',
	273	'ext': 'mp4',
	274	'uploader': 'Redacted News',
	275	'upload_date': '20230322',
	276	'timestamp': 1679445010,
	277	'title': 'The U.S. CANNOT hide this in Ukraine anymore \| Redacted with Natali and Clayton Morris',
	278	'duration': 892,
	279	'channel': 'Redacted News',
	280	'description': 'md5:aaad0c5c3426d7a361c29bdaaced7c42',
	281	'channel_url': 'https://rumble.com/c/Redacted',
	282	'live_status': 'not_live',
	283	'thumbnail': 'https://sp.rmbl.ws/s8/1/d/x/2/O/dx2Oi.qR4e-small-The-U.S.-CANNOT-hide-this-i.jpg',
23d829a3 TL	284	'like_count': int,
	285	'dislike_count': int,
	286	'view_count': int,
78bc1868	287	},
6994afc0	288	}, {
	289	'url': 'https://rumble.com/v2e7fju-the-covid-twitter-files-drop-protecting-fauci-while-censoring-the-truth-wma.html',
	290	'info_dict': {
	291	'id': 'v2blzyy',
	292	'ext': 'mp4',
	293	'live_status': 'was_live',
	294	'release_timestamp': 1679446804,
	295	'description': 'md5:2ac4908ccfecfb921f8ffa4b30c1e636',
	296	'release_date': '20230322',
	297	'timestamp': 1679445692,
	298	'duration': 4435,
	299	'upload_date': '20230322',
	300	'title': 'The Covid Twitter Files Drop: Protecting Fauci While Censoring The Truth w/Matt Taibbi',
	301	'uploader': 'Kim Iversen',
	302	'channel_url': 'https://rumble.com/c/KimIversen',
	303	'channel': 'Kim Iversen',
	304	'thumbnail': 'https://sp.rmbl.ws/s8/1/6/b/w/O/6bwOi.qR4e-small-The-Covid-Twitter-Files-Dro.jpg',
23d829a3 TL	305	'like_count': int,
	306	'dislike_count': int,
	307	'view_count': int,
6994afc0	308	},
839e2a62 M	309	}]
	310
	311	_WEBPAGE_TESTS = [{
	312	'url': 'https://rumble.com/videos?page=2',
23d829a3	313	'playlist_mincount': 24,
839e2a62 M	314	'info_dict': {
	315	'id': 'videos?page=2',
	316	'title': 'All videos',
	317	'description': 'Browse videos uploaded to Rumble.com',
	318	'age_limit': 0,
	319	},
	320	}, {
23d829a3 TL	321	'url': 'https://rumble.com/browse/live',
23d829a3 TL	322	'playlist_mincount': 25,
839e2a62	323	'info_dict': {
23d829a3 TL	324	'id': 'live',
23d829a3 TL	325	'title': 'Browse',
839e2a62 M	326	'age_limit': 0,
	327	},
	328	}, {
	329	'url': 'https://rumble.com/search/video?q=rumble&sort=views',
23d829a3	330	'playlist_mincount': 24,
839e2a62 M	331	'info_dict': {
	332	'id': 'video?q=rumble&sort=views',
	333	'title': 'Search results for: rumble',
	334	'age_limit': 0,
	335	},
	336	}]
	337
	338	def _real_extract(self, url):
	339	page_id = self._match_id(url)
	340	webpage = self._download_webpage(url, page_id)
	341	url_info = next(RumbleEmbedIE.extract_from_webpage(self._downloader, url, webpage), None)
	342	if not url_info:
	343	raise UnsupportedError(url)
	344
23d829a3 TL	345	return {
	346	'_type': 'url_transparent',
	347	'ie_key': url_info['ie_key'],
	348	'url': url_info['url'],
	349	'release_timestamp': parse_iso8601(self._search_regex(
	350	r'(?:Livestream begins\|Streamed on):\s+<time datetime="([^"]+)', webpage, 'release date', default=None)),
	351	'view_count': int_or_none(self._search_regex(
	352	r'"userInteractionCount"\s:\s(\d+)', webpage, 'view count', default=None)),
	353	'like_count': parse_count(self._search_regex(
	354	r'<span data-js="rumbles_up_votes">\s*([\d,.KM]+)', webpage, 'like count', default=None)),
	355	'dislike_count': parse_count(self._search_regex(
	356	r'<span data-js="rumbles_down_votes">\s*([\d,.KM]+)', webpage, 'dislike count', default=None)),
	357	'description': clean_html(get_element_by_class('media-description', webpage))
	358	}
839e2a62 M	359
839e2a62 M	360
f1d42a83 AG	361	class RumbleChannelIE(InfoExtractor):
	362	_VALID_URL = r'(?P<url>https?://(?:www\.)?rumble\.com/(?:c\|user)/(?P<id>[^&?#$/]+))'
	363
	364	_TESTS = [{
	365	'url': 'https://rumble.com/c/Styxhexenhammer666',
	366	'playlist_mincount': 1160,
	367	'info_dict': {
	368	'id': 'Styxhexenhammer666',
	369	},
	370	}, {
	371	'url': 'https://rumble.com/user/goldenpoodleharleyeuna',
0d8affc1	372	'playlist_mincount': 4,
f1d42a83 AG	373	'info_dict': {
	374	'id': 'goldenpoodleharleyeuna',
	375	},
	376	}]
	377
	378	def entries(self, url, playlist_id):
	379	for page in itertools.count(1):
	380	try:
	381	webpage = self._download_webpage(f'{url}?page={page}', playlist_id, note='Downloading page %d' % page)
	382	except ExtractorError as e:
3d2623a8	383	if isinstance(e.cause, HTTPError) and e.cause.status == 404:
f1d42a83 AG	384	break
	385	raise
	386	for video_url in re.findall(r'class=video-item--a\s?href=([^>]+\.html)', webpage):
	387	yield self.url_result('https://rumble.com' + video_url)
	388
	389	def _real_extract(self, url):
	390	url, playlist_id = self._match_valid_url(url).groups()
	391	return self.playlist_result(self.entries(url, playlist_id), playlist_id=playlist_id)