]>
jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/rumble.py
4 from .common
import InfoExtractor
5 from ..compat
import compat_str
, compat_HTTPError
16 class RumbleEmbedIE(InfoExtractor
):
17 _VALID_URL
= r
'https?://(?:www\.)?rumble\.com/embed/(?:[0-9a-z]+\.)?(?P<id>[0-9a-z]+)'
19 'url': 'https://rumble.com/embed/v5pv5f',
20 'md5': '36a18a049856720189f30977ccbb2c34',
24 'title': 'WMAR 2 News Latest Headlines | October 20, 6pm',
25 'timestamp': 1571611968,
26 'upload_date': '20191020',
27 'channel_url': 'https://rumble.com/c/WMAR',
29 'thumbnail': 'https://sp.rmbl.ws/s8/1/5/M/z/1/5Mz1a.OvCc-small-WMAR-2-News-Latest-Headline.jpg',
34 'url': 'https://rumble.com/embed/vslb7v',
35 'md5': '7418035de1a30a178b8af34dc2b6a52b',
39 'title': 'Defense Sec. says US Commitment to NATO Defense \'Ironclad\'',
40 'timestamp': 1645142135,
41 'upload_date': '20220217',
42 'channel_url': 'https://rumble.com/c/CyberTechNews',
44 'thumbnail': 'https://sp.rmbl.ws/s8/6/7/i/9/h/7i9hd.OvCc.jpg',
49 'url': 'https://rumble.com/embed/ufe9n.v5pv5f',
50 'only_matching': True,
54 def _extract_urls(cls
, webpage
):
55 embeds
= tuple(re
.finditer(
56 fr
'(?:<(?:script|iframe)[^>]+\bsrc=|["\']embedUrl
["\']\s*:\s*)["\'](?P
<url
>{cls._VALID_URL}
)', webpage))
58 return [mobj.group('url
') for mobj in embeds]
59 return [f'https
://rumble
.com
/embed
/{mobj.group("id")}
' for mobj in re.finditer(
60 r'<script
>\s
*Rumble\
(\s
*"play"\s
*,\s
*{\s
*[\'"]video[\'"]\s
*:\s
*[\'"](?P<id>[0-9a-z]+)[\'"]', webpage)]
62 def _real_extract(self, url):
63 video_id = self._match_id(url)
64 video = self._download_json(
65 'https
://rumble
.com
/embedJS
/', video_id,
66 query={'request': 'video', 'v': video_id})
67 title = unescapeHTML(video['title
'])
70 for height, ua in (video.get('ua
') or {}).items():
72 f_url = try_get(ua, lambda x: x[i], compat_str)
74 ext = determine_ext(f_url)
77 'format_id
': '%s-%sp
' % (ext, height),
78 'height
': int_or_none(height),
81 bitrate = try_get(ua, lambda x: x[i + 2]['bitrate
'])
83 f['tbr
'] = int_or_none(bitrate)
85 self._sort_formats(formats)
89 'url
': sub_info['path
'],
90 'name
': sub_info.get('language
') or '',
91 }] for lang, sub_info in (video.get('cc
') or {}).items() if sub_info.get('path
')
94 author = video.get('author
') or {}
100 'subtitles
': subtitles,
101 'thumbnail
': video.get('i
'),
102 'timestamp
': parse_iso8601(video.get('pubDate
')),
103 'channel
': author.get('name
'),
104 'channel_url
': author.get('url
'),
105 'duration
': int_or_none(video.get('duration
')),
106 'uploader
': author.get('name
'),
110 class RumbleChannelIE(InfoExtractor):
111 _VALID_URL = r'(?P
<url
>https?
://(?
:www\
.)?rumble\
.com
/(?
:c|user
)/(?P
<id>[^
&?
#$/]+))'
114 'url': 'https://rumble.com/c/Styxhexenhammer666',
115 'playlist_mincount': 1160,
117 'id': 'Styxhexenhammer666',
120 'url': 'https://rumble.com/user/goldenpoodleharleyeuna',
123 'id': 'goldenpoodleharleyeuna',
127 def entries(self
, url
, playlist_id
):
128 for page
in itertools
.count(1):
130 webpage
= self
._download
_webpage
(f
'{url}?page={page}', playlist_id
, note
='Downloading page %d' % page
)
131 except ExtractorError
as e
:
132 if isinstance(e
.cause
, compat_HTTPError
) and e
.cause
.code
== 404:
135 for video_url
in re
.findall(r
'class=video-item--a\s?href=([^>]+\.html)', webpage
):
136 yield self
.url_result('https://rumble.com' + video_url
)
138 def _real_extract(self
, url
):
139 url
, playlist_id
= self
._match
_valid
_url
(url
).groups()
140 return self
.playlist_result(self
.entries(url
, playlist_id
), playlist_id
=playlist_id
)