4 from .common
import InfoExtractor
5 from ..compat
import compat_str
, compat_HTTPError
16 class RumbleEmbedIE(InfoExtractor
):
17 _VALID_URL
= r
'https?://(?:www\.)?rumble\.com/embed/(?:[0-9a-z]+\.)?(?P<id>[0-9a-z]+)'
18 _EMBED_REGEX
= [fr
'(?:<(?:script|iframe)[^>]+\bsrc=|["\']embedUrl
["\']\s*:\s*)["\'](?P
<url
>{_VALID_URL}
)']
20 'url
': 'https
://rumble
.com
/embed
/v5pv5f
',
21 'md5
': '36a18a049856720189f30977ccbb2c34
',
25 'title
': 'WMAR
2 News Latest Headlines | October
20, 6pm
',
26 'timestamp
': 1571611968,
27 'upload_date
': '20191020',
28 'channel_url
': 'https
://rumble
.com
/c
/WMAR
',
30 'thumbnail
': 'https
://sp
.rmbl
.ws
/s8
/1/5/M
/z
/1/5Mz1a
.OvCc
-small
-WMAR
-2-News
-Latest
-Headline
.jpg
',
35 'url
': 'https
://rumble
.com
/embed
/vslb7v
',
36 'md5
': '7418035de1a30a178b8af34dc2b6a52b
',
40 'title
': 'Defense Sec
. says US Commitment to NATO Defense
\'Ironclad
\'',
41 'timestamp
': 1645142135,
42 'upload_date
': '20220217',
43 'channel_url
': 'https
://rumble
.com
/c
/CyberTechNews
',
45 'thumbnail
': 'https
://sp
.rmbl
.ws
/s8
/6/7/i
/9/h
/7i9hd
.OvCc
.jpg
',
50 'url
': 'https
://rumble
.com
/embed
/ufe9n
.v5pv5f
',
51 'only_matching
': True,
55 def _extract_embed_urls(cls, url, webpage):
56 embeds = tuple(super()._extract_embed_urls(url, webpage))
59 return [f'https
://rumble
.com
/embed
/{mobj.group("id")}
' for mobj in re.finditer(
60 r'<script
>\s
*Rumble\
(\s
*"play"\s
*,\s
*{\s
*[\'"]video[\'"]\s
*:\s
*[\'"](?P<id>[0-9a-z]+)[\'"]', webpage)]
62 def _real_extract(self, url):
63 video_id = self._match_id(url)
64 video = self._download_json(
65 'https
://rumble
.com
/embedJS
/', video_id,
66 query={'request': 'video', 'v': video_id})
67 title = unescapeHTML(video['title
'])
70 for height, ua in (video.get('ua
') or {}).items():
72 f_url = try_get(ua, lambda x: x[i], compat_str)
74 ext = determine_ext(f_url)
77 'format_id
': '%s-%sp
' % (ext, height),
78 'height
': int_or_none(height),
81 bitrate = try_get(ua, lambda x: x[i + 2]['bitrate
'])
83 f['tbr
'] = int_or_none(bitrate)
85 self._sort_formats(formats)
89 'url
': sub_info['path
'],
90 'name
': sub_info.get('language
') or '',
91 }] for lang, sub_info in (video.get('cc
') or {}).items() if sub_info.get('path
')
94 author = video.get('author
') or {}
100 'subtitles
': subtitles,
101 'thumbnail
': video.get('i
'),
102 'timestamp
': parse_iso8601(video.get('pubDate
')),
103 'channel
': author.get('name
'),
104 'channel_url
': author.get('url
'),
105 'duration
': int_or_none(video.get('duration
')),
106 'uploader
': author.get('name
'),
110 class RumbleChannelIE(InfoExtractor):
111 _VALID_URL = r'(?P
<url
>https?
://(?
:www\
.)?rumble\
.com
/(?
:c|user
)/(?P
<id>[^
&?
#$/]+))'
114 'url': 'https://rumble.com/c/Styxhexenhammer666',
115 'playlist_mincount': 1160,
117 'id': 'Styxhexenhammer666',
120 'url': 'https://rumble.com/user/goldenpoodleharleyeuna',
123 'id': 'goldenpoodleharleyeuna',
127 def entries(self
, url
, playlist_id
):
128 for page
in itertools
.count(1):
130 webpage
= self
._download
_webpage
(f
'{url}?page={page}', playlist_id
, note
='Downloading page %d' % page
)
131 except ExtractorError
as e
:
132 if isinstance(e
.cause
, compat_HTTPError
) and e
.cause
.code
== 404:
135 for video_url
in re
.findall(r
'class=video-item--a\s?href=([^>]+\.html)', webpage
):
136 yield self
.url_result('https://rumble.com' + video_url
)
138 def _real_extract(self
, url
):
139 url
, playlist_id
= self
._match
_valid
_url
(url
).groups()
140 return self
.playlist_result(self
.entries(url
, playlist_id
), playlist_id
=playlist_id
)