]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/rumble.py
Completely change project name to yt-dlp (#85)
[yt-dlp.git] / yt_dlp / extractor / rumble.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..compat import compat_str
8 from ..utils import (
9 determine_ext,
10 int_or_none,
11 parse_iso8601,
12 try_get,
13 )
14
15
16 class RumbleEmbedIE(InfoExtractor):
17 _VALID_URL = r'https?://(?:www\.)?rumble\.com/embed/(?:[0-9a-z]+\.)?(?P<id>[0-9a-z]+)'
18 _TESTS = [{
19 'url': 'https://rumble.com/embed/v5pv5f',
20 'md5': '36a18a049856720189f30977ccbb2c34',
21 'info_dict': {
22 'id': 'v5pv5f',
23 'ext': 'mp4',
24 'title': 'WMAR 2 News Latest Headlines | October 20, 6pm',
25 'timestamp': 1571611968,
26 'upload_date': '20191020',
27 }
28 }, {
29 'url': 'https://rumble.com/embed/ufe9n.v5pv5f',
30 'only_matching': True,
31 }]
32
33 @staticmethod
34 def _extract_urls(webpage):
35 return [
36 mobj.group('url')
37 for mobj in re.finditer(
38 r'(?:<(?:script|iframe)[^>]+\bsrc=|["\']embedUrl["\']\s*:\s*)["\'](?P<url>%s)' % RumbleEmbedIE._VALID_URL,
39 webpage)]
40
41 def _real_extract(self, url):
42 video_id = self._match_id(url)
43 video = self._download_json(
44 'https://rumble.com/embedJS/', video_id,
45 query={'request': 'video', 'v': video_id})
46 title = video['title']
47
48 formats = []
49 for height, ua in (video.get('ua') or {}).items():
50 for i in range(2):
51 f_url = try_get(ua, lambda x: x[i], compat_str)
52 if f_url:
53 ext = determine_ext(f_url)
54 f = {
55 'ext': ext,
56 'format_id': '%s-%sp' % (ext, height),
57 'height': int_or_none(height),
58 'url': f_url,
59 }
60 bitrate = try_get(ua, lambda x: x[i + 2]['bitrate'])
61 if bitrate:
62 f['tbr'] = int_or_none(bitrate)
63 formats.append(f)
64 self._sort_formats(formats)
65
66 author = video.get('author') or {}
67
68 return {
69 'id': video_id,
70 'title': title,
71 'formats': formats,
72 'thumbnail': video.get('i'),
73 'timestamp': parse_iso8601(video.get('pubDate')),
74 'channel': author.get('name'),
75 'channel_url': author.get('url'),
76 'duration': int_or_none(video.get('duration')),
77 }