]>
Commit | Line | Data |
---|---|---|
b65e3b06 S |
1 | import itertools |
2 | import re | |
3 | ||
4 | from .common import InfoExtractor | |
6b688b89 | 5 | from ..utils import ( |
37fb591c AH |
6 | ExtractorError, |
7 | GeoRestrictedError, | |
6b688b89 | 8 | orderedSet, |
6ddd4bf6 | 9 | unified_strdate, |
6b688b89 S |
10 | urlencode_postdata, |
11 | ) | |
b65e3b06 S |
12 | |
13 | ||
14 | class BitChuteIE(InfoExtractor): | |
15 | _VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)' | |
bfd973ec | 16 | _EMBED_REGEX = [rf'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P<url>{_VALID_URL})'] |
b65e3b06 | 17 | _TESTS = [{ |
aca5774e | 18 | 'url': 'https://www.bitchute.com/video/UGlrF9o9b-Q/', |
19 | 'md5': '7e427d7ed7af5a75b5855705ec750e2b', | |
b65e3b06 S |
20 | 'info_dict': { |
21 | 'id': 'szoMrox2JEI', | |
22 | 'ext': 'mp4', | |
aca5774e | 23 | 'title': 'This is the first video on #BitChute !', |
24 | 'description': 'md5:a0337e7b1fe39e32336974af8173a034', | |
b65e3b06 | 25 | 'thumbnail': r're:^https?://.*\.jpg$', |
aca5774e | 26 | 'uploader': 'BitChute', |
27 | 'upload_date': '20170103', | |
b65e3b06 S |
28 | }, |
29 | }, { | |
30 | 'url': 'https://www.bitchute.com/embed/lbb5G1hjPhw/', | |
31 | 'only_matching': True, | |
32 | }, { | |
33 | 'url': 'https://www.bitchute.com/torrent/Zee5BE49045h/szoMrox2JEI.webtorrent', | |
34 | 'only_matching': True, | |
35 | }] | |
36 | ||
37 | def _real_extract(self, url): | |
38 | video_id = self._match_id(url) | |
39 | ||
40 | webpage = self._download_webpage( | |
02df4135 AU |
41 | 'https://www.bitchute.com/video/%s' % video_id, video_id, headers={ |
42 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36', | |
43 | }) | |
b65e3b06 | 44 | |
8578ea4d | 45 | title = self._html_search_regex( |
b65e3b06 S |
46 | (r'<[^>]+\bid=["\']video-title[^>]+>([^<]+)', r'<title>([^<]+)'), |
47 | webpage, 'title', default=None) or self._html_search_meta( | |
48 | 'description', webpage, 'title', | |
49 | default=None) or self._og_search_description(webpage) | |
50 | ||
6b688b89 S |
51 | format_urls = [] |
52 | for mobj in re.finditer( | |
53 | r'addWebSeed\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage): | |
54 | format_urls.append(mobj.group('url')) | |
55 | format_urls.extend(re.findall(r'as=(https?://[^&"\']+)', webpage)) | |
56 | ||
b65e3b06 | 57 | formats = [ |
6b688b89 S |
58 | {'url': format_url} |
59 | for format_url in orderedSet(format_urls)] | |
4c78c3d7 S |
60 | |
61 | if not formats: | |
37fb591c AH |
62 | entries = self._parse_html5_media_entries( |
63 | url, webpage, video_id) | |
64 | if not entries: | |
65 | error = self._html_search_regex(r'<h1 class="page-title">([^<]+)</h1>', webpage, 'error', default='Cannot find video') | |
66 | if error == 'Video Unavailable': | |
67 | raise GeoRestrictedError(error) | |
68 | raise ExtractorError(error) | |
69 | formats = entries[0]['formats'] | |
4c78c3d7 | 70 | |
d65f6e73 | 71 | self._check_formats(formats, video_id) |
b65e3b06 S |
72 | self._sort_formats(formats) |
73 | ||
74 | description = self._html_search_regex( | |
75 | r'(?s)<div\b[^>]+\bclass=["\']full hidden[^>]+>(.+?)</div>', | |
76 | webpage, 'description', fatal=False) | |
77 | thumbnail = self._og_search_thumbnail( | |
78 | webpage, default=None) or self._html_search_meta( | |
79 | 'twitter:image:src', webpage, 'thumbnail') | |
80 | uploader = self._html_search_regex( | |
bbf1defe GS |
81 | (r'(?s)<div class=["\']channel-banner.*?<p\b[^>]+\bclass=["\']name[^>]+>(.+?)</p>', |
82 | r'(?s)<p\b[^>]+\bclass=["\']video-author[^>]+>(.+?)</p>'), | |
83 | webpage, 'uploader', fatal=False) | |
b65e3b06 | 84 | |
6ddd4bf6 I |
85 | upload_date = unified_strdate(self._search_regex( |
86 | r'class=["\']video-publish-date[^>]+>[^<]+ at \d+:\d+ UTC on (.+?)\.', | |
87 | webpage, 'upload date', fatal=False)) | |
88 | ||
b65e3b06 S |
89 | return { |
90 | 'id': video_id, | |
91 | 'title': title, | |
92 | 'description': description, | |
93 | 'thumbnail': thumbnail, | |
94 | 'uploader': uploader, | |
6ddd4bf6 | 95 | 'upload_date': upload_date, |
b65e3b06 S |
96 | 'formats': formats, |
97 | } | |
98 | ||
99 | ||
100 | class BitChuteChannelIE(InfoExtractor): | |
101 | _VALID_URL = r'https?://(?:www\.)?bitchute\.com/channel/(?P<id>[^/?#&]+)' | |
102 | _TEST = { | |
103 | 'url': 'https://www.bitchute.com/channel/victoriaxrave/', | |
104 | 'playlist_mincount': 185, | |
105 | 'info_dict': { | |
106 | 'id': 'victoriaxrave', | |
107 | }, | |
108 | } | |
109 | ||
110 | _TOKEN = 'zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7' | |
111 | ||
112 | def _entries(self, channel_id): | |
113 | channel_url = 'https://www.bitchute.com/channel/%s/' % channel_id | |
de4c41b4 S |
114 | offset = 0 |
115 | for page_num in itertools.count(1): | |
b65e3b06 S |
116 | data = self._download_json( |
117 | '%sextend/' % channel_url, channel_id, | |
de4c41b4 | 118 | 'Downloading channel page %d' % page_num, |
b65e3b06 S |
119 | data=urlencode_postdata({ |
120 | 'csrfmiddlewaretoken': self._TOKEN, | |
121 | 'name': '', | |
de4c41b4 | 122 | 'offset': offset, |
b65e3b06 S |
123 | }), headers={ |
124 | 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', | |
125 | 'Referer': channel_url, | |
126 | 'X-Requested-With': 'XMLHttpRequest', | |
127 | 'Cookie': 'csrftoken=%s' % self._TOKEN, | |
128 | }) | |
129 | if data.get('success') is False: | |
130 | break | |
131 | html = data.get('html') | |
132 | if not html: | |
133 | break | |
134 | video_ids = re.findall( | |
135 | r'class=["\']channel-videos-image-container[^>]+>\s*<a\b[^>]+\bhref=["\']/video/([^"\'/]+)', | |
136 | html) | |
137 | if not video_ids: | |
138 | break | |
de4c41b4 | 139 | offset += len(video_ids) |
b65e3b06 S |
140 | for video_id in video_ids: |
141 | yield self.url_result( | |
142 | 'https://www.bitchute.com/video/%s' % video_id, | |
143 | ie=BitChuteIE.ie_key(), video_id=video_id) | |
144 | ||
145 | def _real_extract(self, url): | |
146 | channel_id = self._match_id(url) | |
147 | return self.playlist_result( | |
148 | self._entries(channel_id), playlist_id=channel_id) |