]>
Commit | Line | Data |
---|---|---|
c61473c1 | 1 | import functools |
b65e3b06 S |
2 | import re |
3 | ||
4 | from .common import InfoExtractor | |
3d2623a8 | 5 | from ..networking import HEADRequest |
6b688b89 | 6 | from ..utils import ( |
37fb591c | 7 | ExtractorError, |
c61473c1 | 8 | OnDemandPagedList, |
f72218c1 | 9 | clean_html, |
b1a1ec15 | 10 | extract_attributes, |
f72218c1 | 11 | get_element_by_class, |
efdc45a6 | 12 | get_element_by_id, |
b1a1ec15 | 13 | get_element_html_by_class, |
c61473c1 | 14 | get_elements_html_by_class, |
f72218c1 | 15 | int_or_none, |
6b688b89 | 16 | orderedSet, |
c61473c1 M |
17 | parse_count, |
18 | parse_duration, | |
f72218c1 | 19 | traverse_obj, |
6ddd4bf6 | 20 | unified_strdate, |
6b688b89 | 21 | urlencode_postdata, |
b1a1ec15 | 22 | urljoin, |
6b688b89 | 23 | ) |
b65e3b06 S |
24 | |
25 | ||
26 | class BitChuteIE(InfoExtractor): | |
27 | _VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)' | |
bfd973ec | 28 | _EMBED_REGEX = [rf'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P<url>{_VALID_URL})'] |
b65e3b06 | 29 | _TESTS = [{ |
aca5774e | 30 | 'url': 'https://www.bitchute.com/video/UGlrF9o9b-Q/', |
31 | 'md5': '7e427d7ed7af5a75b5855705ec750e2b', | |
b65e3b06 | 32 | 'info_dict': { |
f72218c1 | 33 | 'id': 'UGlrF9o9b-Q', |
b65e3b06 | 34 | 'ext': 'mp4', |
aca5774e | 35 | 'title': 'This is the first video on #BitChute !', |
36 | 'description': 'md5:a0337e7b1fe39e32336974af8173a034', | |
b65e3b06 | 37 | 'thumbnail': r're:^https?://.*\.jpg$', |
aca5774e | 38 | 'uploader': 'BitChute', |
39 | 'upload_date': '20170103', | |
b1a1ec15 S |
40 | 'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/', |
41 | 'channel': 'BitChute', | |
42 | 'channel_url': 'https://www.bitchute.com/channel/bitchute/' | |
43 | }, | |
44 | }, { | |
45 | # test case: video with different channel and uploader | |
46 | 'url': 'https://www.bitchute.com/video/Yti_j9A-UZ4/', | |
47 | 'md5': 'f10e6a8e787766235946d0868703f1d0', | |
48 | 'info_dict': { | |
49 | 'id': 'Yti_j9A-UZ4', | |
50 | 'ext': 'mp4', | |
51 | 'title': 'Israel at War | Full Measure', | |
52 | 'description': 'md5:38cf7bc6f42da1a877835539111c69ef', | |
53 | 'thumbnail': r're:^https?://.*\.jpg$', | |
54 | 'uploader': 'sharylattkisson', | |
55 | 'upload_date': '20231106', | |
56 | 'uploader_url': 'https://www.bitchute.com/profile/9K0kUWA9zmd9/', | |
57 | 'channel': 'Full Measure with Sharyl Attkisson', | |
58 | 'channel_url': 'https://www.bitchute.com/channel/sharylattkisson/' | |
b65e3b06 | 59 | }, |
f72218c1 | 60 | }, { |
61 | # video not downloadable in browser, but we can recover it | |
62 | 'url': 'https://www.bitchute.com/video/2s6B3nZjAk7R/', | |
63 | 'md5': '05c12397d5354bf24494885b08d24ed1', | |
64 | 'info_dict': { | |
65 | 'id': '2s6B3nZjAk7R', | |
66 | 'ext': 'mp4', | |
67 | 'filesize': 71537926, | |
68 | 'title': 'STYXHEXENHAMMER666 - Election Fraud, Clinton 2020, EU Armies, and Gun Control', | |
69 | 'description': 'md5:228ee93bd840a24938f536aeac9cf749', | |
70 | 'thumbnail': r're:^https?://.*\.jpg$', | |
71 | 'uploader': 'BitChute', | |
72 | 'upload_date': '20181113', | |
b1a1ec15 S |
73 | 'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/', |
74 | 'channel': 'BitChute', | |
75 | 'channel_url': 'https://www.bitchute.com/channel/bitchute/' | |
f72218c1 | 76 | }, |
77 | 'params': {'check_formats': None}, | |
efdc45a6 M |
78 | }, { |
79 | # restricted video | |
80 | 'url': 'https://www.bitchute.com/video/WEnQU7XGcTdl/', | |
81 | 'info_dict': { | |
82 | 'id': 'WEnQU7XGcTdl', | |
83 | 'ext': 'mp4', | |
84 | 'title': 'Impartial Truth - Ein Letzter Appell an die Vernunft', | |
85 | }, | |
86 | 'params': {'skip_download': True}, | |
87 | 'skip': 'Georestricted in DE', | |
b65e3b06 S |
88 | }, { |
89 | 'url': 'https://www.bitchute.com/embed/lbb5G1hjPhw/', | |
90 | 'only_matching': True, | |
91 | }, { | |
92 | 'url': 'https://www.bitchute.com/torrent/Zee5BE49045h/szoMrox2JEI.webtorrent', | |
93 | 'only_matching': True, | |
94 | }] | |
efdc45a6 | 95 | _GEO_BYPASS = False |
b65e3b06 | 96 | |
f72218c1 | 97 | _HEADERS = { |
98 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36', | |
99 | 'Referer': 'https://www.bitchute.com/', | |
100 | } | |
101 | ||
102 | def _check_format(self, video_url, video_id): | |
103 | urls = orderedSet( | |
104 | re.sub(r'(^https?://)(seed\d+)(?=\.bitchute\.com)', fr'\g<1>{host}', video_url) | |
0c4e0fbc N |
105 | for host in (r'\g<2>', 'seed122', 'seed125', 'seed126', 'seed128', |
106 | 'seed132', 'seed150', 'seed151', 'seed152', 'seed153', | |
107 | 'seed167', 'seed171', 'seed177', 'seed305', 'seed307', | |
108 | 'seedp29xb', 'zb10-7gsop1v78')) | |
f72218c1 | 109 | for url in urls: |
110 | try: | |
111 | response = self._request_webpage( | |
112 | HEADRequest(url), video_id=video_id, note=f'Checking {url}', headers=self._HEADERS) | |
113 | except ExtractorError as e: | |
114 | self.to_screen(f'{video_id}: URL is invalid, skipping: {e.cause}') | |
115 | continue | |
116 | return { | |
117 | 'url': url, | |
118 | 'filesize': int_or_none(response.headers.get('Content-Length')) | |
119 | } | |
120 | ||
efdc45a6 M |
121 | def _raise_if_restricted(self, webpage): |
122 | page_title = clean_html(get_element_by_class('page-title', webpage)) or '' | |
123 | if re.fullmatch(r'(?:Channel|Video) Restricted', page_title): | |
124 | reason = clean_html(get_element_by_id('page-detail', webpage)) or page_title | |
125 | self.raise_geo_restricted(reason) | |
126 | ||
b1a1ec15 S |
127 | @staticmethod |
128 | def _make_url(html): | |
129 | path = extract_attributes(get_element_html_by_class('spa', html) or '').get('href') | |
130 | return urljoin('https://www.bitchute.com', path) | |
131 | ||
b65e3b06 S |
132 | def _real_extract(self, url): |
133 | video_id = self._match_id(url) | |
b65e3b06 | 134 | webpage = self._download_webpage( |
f72218c1 | 135 | f'https://www.bitchute.com/video/{video_id}', video_id, headers=self._HEADERS) |
b65e3b06 | 136 | |
efdc45a6 | 137 | self._raise_if_restricted(webpage) |
f72218c1 | 138 | publish_date = clean_html(get_element_by_class('video-publish-date', webpage)) |
139 | entries = self._parse_html5_media_entries(url, webpage, video_id) | |
b65e3b06 | 140 | |
f72218c1 | 141 | formats = [] |
142 | for format_ in traverse_obj(entries, (0, 'formats', ...)): | |
143 | if self.get_param('check_formats') is not False: | |
144 | format_.update(self._check_format(format_.pop('url'), video_id) or {}) | |
145 | if 'url' not in format_: | |
146 | continue | |
147 | formats.append(format_) | |
4c78c3d7 S |
148 | |
149 | if not formats: | |
f72218c1 | 150 | self.raise_no_formats( |
151 | 'Video is unavailable. Please make sure this video is playable in the browser ' | |
152 | 'before reporting this issue.', expected=True, video_id=video_id) | |
b65e3b06 | 153 | |
b1a1ec15 S |
154 | details = get_element_by_class('details', webpage) or '' |
155 | uploader_html = get_element_html_by_class('creator', details) or '' | |
156 | channel_html = get_element_html_by_class('name', details) or '' | |
157 | ||
b65e3b06 S |
158 | return { |
159 | 'id': video_id, | |
f72218c1 | 160 | 'title': self._html_extract_title(webpage) or self._og_search_title(webpage), |
161 | 'description': self._og_search_description(webpage, default=None), | |
162 | 'thumbnail': self._og_search_thumbnail(webpage), | |
b1a1ec15 S |
163 | 'uploader': clean_html(uploader_html), |
164 | 'uploader_url': self._make_url(uploader_html), | |
165 | 'channel': clean_html(channel_html), | |
166 | 'channel_url': self._make_url(channel_html), | |
f72218c1 | 167 | 'upload_date': unified_strdate(self._search_regex( |
168 | r'at \d+:\d+ UTC on (.+?)\.', publish_date, 'upload date', fatal=False)), | |
b65e3b06 S |
169 | 'formats': formats, |
170 | } | |
171 | ||
172 | ||
173 | class BitChuteChannelIE(InfoExtractor): | |
c61473c1 M |
174 | _VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?P<type>channel|playlist)/(?P<id>[^/?#&]+)' |
175 | _TESTS = [{ | |
176 | 'url': 'https://www.bitchute.com/channel/bitchute/', | |
b65e3b06 | 177 | 'info_dict': { |
c61473c1 M |
178 | 'id': 'bitchute', |
179 | 'title': 'BitChute', | |
180 | 'description': 'md5:5329fb3866125afa9446835594a9b138', | |
b65e3b06 | 181 | }, |
c61473c1 M |
182 | 'playlist': [ |
183 | { | |
184 | 'md5': '7e427d7ed7af5a75b5855705ec750e2b', | |
185 | 'info_dict': { | |
186 | 'id': 'UGlrF9o9b-Q', | |
187 | 'ext': 'mp4', | |
c61473c1 M |
188 | 'title': 'This is the first video on #BitChute !', |
189 | 'description': 'md5:a0337e7b1fe39e32336974af8173a034', | |
190 | 'thumbnail': r're:^https?://.*\.jpg$', | |
191 | 'uploader': 'BitChute', | |
192 | 'upload_date': '20170103', | |
b1a1ec15 S |
193 | 'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/', |
194 | 'channel': 'BitChute', | |
195 | 'channel_url': 'https://www.bitchute.com/channel/bitchute/', | |
c61473c1 M |
196 | 'duration': 16, |
197 | 'view_count': int, | |
198 | }, | |
199 | } | |
200 | ], | |
201 | 'params': { | |
202 | 'skip_download': True, | |
203 | 'playlist_items': '-1', | |
204 | }, | |
205 | }, { | |
206 | 'url': 'https://www.bitchute.com/playlist/wV9Imujxasw9/', | |
207 | 'playlist_mincount': 20, | |
208 | 'info_dict': { | |
209 | 'id': 'wV9Imujxasw9', | |
210 | 'title': 'Bruce MacDonald and "The Light of Darkness"', | |
b1a1ec15 | 211 | 'description': 'md5:747724ef404eebdfc04277714f81863e', |
c61473c1 M |
212 | } |
213 | }] | |
b65e3b06 S |
214 | |
215 | _TOKEN = 'zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7' | |
c61473c1 M |
216 | PAGE_SIZE = 25 |
217 | HTML_CLASS_NAMES = { | |
218 | 'channel': { | |
219 | 'container': 'channel-videos-container', | |
220 | 'title': 'channel-videos-title', | |
221 | 'description': 'channel-videos-text', | |
222 | }, | |
223 | 'playlist': { | |
224 | 'container': 'playlist-video', | |
225 | 'title': 'title', | |
226 | 'description': 'description', | |
227 | } | |
228 | ||
229 | } | |
b65e3b06 | 230 | |
c61473c1 M |
231 | @staticmethod |
232 | def _make_url(playlist_id, playlist_type): | |
233 | return f'https://www.bitchute.com/{playlist_type}/{playlist_id}/' | |
234 | ||
235 | def _fetch_page(self, playlist_id, playlist_type, page_num): | |
236 | playlist_url = self._make_url(playlist_id, playlist_type) | |
237 | data = self._download_json( | |
238 | f'{playlist_url}extend/', playlist_id, f'Downloading page {page_num}', | |
239 | data=urlencode_postdata({ | |
240 | 'csrfmiddlewaretoken': self._TOKEN, | |
241 | 'name': '', | |
242 | 'offset': page_num * self.PAGE_SIZE, | |
243 | }), headers={ | |
244 | 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', | |
245 | 'Referer': playlist_url, | |
246 | 'X-Requested-With': 'XMLHttpRequest', | |
247 | 'Cookie': f'csrftoken={self._TOKEN}', | |
248 | }) | |
249 | if not data.get('success'): | |
250 | return | |
251 | classes = self.HTML_CLASS_NAMES[playlist_type] | |
252 | for video_html in get_elements_html_by_class(classes['container'], data.get('html')): | |
253 | video_id = self._search_regex( | |
254 | r'<a\s[^>]*\bhref=["\']/video/([^"\'/]+)', video_html, 'video id', default=None) | |
255 | if not video_id: | |
256 | continue | |
257 | yield self.url_result( | |
258 | f'https://www.bitchute.com/video/{video_id}', BitChuteIE, video_id, url_transparent=True, | |
259 | title=clean_html(get_element_by_class(classes['title'], video_html)), | |
260 | description=clean_html(get_element_by_class(classes['description'], video_html)), | |
261 | duration=parse_duration(get_element_by_class('video-duration', video_html)), | |
262 | view_count=parse_count(clean_html(get_element_by_class('video-views', video_html)))) | |
b65e3b06 S |
263 | |
264 | def _real_extract(self, url): | |
c61473c1 M |
265 | playlist_type, playlist_id = self._match_valid_url(url).group('type', 'id') |
266 | webpage = self._download_webpage(self._make_url(playlist_id, playlist_type), playlist_id) | |
267 | ||
268 | page_func = functools.partial(self._fetch_page, playlist_id, playlist_type) | |
b65e3b06 | 269 | return self.playlist_result( |
c61473c1 M |
270 | OnDemandPagedList(page_func, self.PAGE_SIZE), playlist_id, |
271 | title=self._html_extract_title(webpage, default=None), | |
272 | description=self._html_search_meta( | |
273 | ('description', 'og:description', 'twitter:description'), webpage, default=None), | |
274 | playlist_count=int_or_none(self._html_search_regex( | |
275 | r'<span>(\d+)\s+videos?</span>', webpage, 'playlist count', default=None))) |