]>
Commit | Line | Data |
---|---|---|
1 | import functools | |
2 | import re | |
3 | ||
4 | from .common import InfoExtractor | |
5 | from ..networking import HEADRequest | |
6 | from ..utils import ( | |
7 | ExtractorError, | |
8 | OnDemandPagedList, | |
9 | clean_html, | |
10 | extract_attributes, | |
11 | get_element_by_class, | |
12 | get_element_by_id, | |
13 | get_element_html_by_class, | |
14 | get_elements_html_by_class, | |
15 | int_or_none, | |
16 | orderedSet, | |
17 | parse_count, | |
18 | parse_duration, | |
19 | traverse_obj, | |
20 | unified_strdate, | |
21 | urlencode_postdata, | |
22 | urljoin, | |
23 | ) | |
24 | ||
25 | ||
26 | class BitChuteIE(InfoExtractor): | |
27 | _VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)' | |
28 | _EMBED_REGEX = [rf'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P<url>{_VALID_URL})'] | |
29 | _TESTS = [{ | |
30 | 'url': 'https://www.bitchute.com/video/UGlrF9o9b-Q/', | |
31 | 'md5': '7e427d7ed7af5a75b5855705ec750e2b', | |
32 | 'info_dict': { | |
33 | 'id': 'UGlrF9o9b-Q', | |
34 | 'ext': 'mp4', | |
35 | 'title': 'This is the first video on #BitChute !', | |
36 | 'description': 'md5:a0337e7b1fe39e32336974af8173a034', | |
37 | 'thumbnail': r're:^https?://.*\.jpg$', | |
38 | 'uploader': 'BitChute', | |
39 | 'upload_date': '20170103', | |
40 | 'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/', | |
41 | 'channel': 'BitChute', | |
42 | 'channel_url': 'https://www.bitchute.com/channel/bitchute/' | |
43 | }, | |
44 | }, { | |
45 | # test case: video with different channel and uploader | |
46 | 'url': 'https://www.bitchute.com/video/Yti_j9A-UZ4/', | |
47 | 'md5': 'f10e6a8e787766235946d0868703f1d0', | |
48 | 'info_dict': { | |
49 | 'id': 'Yti_j9A-UZ4', | |
50 | 'ext': 'mp4', | |
51 | 'title': 'Israel at War | Full Measure', | |
52 | 'description': 'md5:38cf7bc6f42da1a877835539111c69ef', | |
53 | 'thumbnail': r're:^https?://.*\.jpg$', | |
54 | 'uploader': 'sharylattkisson', | |
55 | 'upload_date': '20231106', | |
56 | 'uploader_url': 'https://www.bitchute.com/profile/9K0kUWA9zmd9/', | |
57 | 'channel': 'Full Measure with Sharyl Attkisson', | |
58 | 'channel_url': 'https://www.bitchute.com/channel/sharylattkisson/' | |
59 | }, | |
60 | }, { | |
61 | # video not downloadable in browser, but we can recover it | |
62 | 'url': 'https://www.bitchute.com/video/2s6B3nZjAk7R/', | |
63 | 'md5': '05c12397d5354bf24494885b08d24ed1', | |
64 | 'info_dict': { | |
65 | 'id': '2s6B3nZjAk7R', | |
66 | 'ext': 'mp4', | |
67 | 'filesize': 71537926, | |
68 | 'title': 'STYXHEXENHAMMER666 - Election Fraud, Clinton 2020, EU Armies, and Gun Control', | |
69 | 'description': 'md5:228ee93bd840a24938f536aeac9cf749', | |
70 | 'thumbnail': r're:^https?://.*\.jpg$', | |
71 | 'uploader': 'BitChute', | |
72 | 'upload_date': '20181113', | |
73 | 'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/', | |
74 | 'channel': 'BitChute', | |
75 | 'channel_url': 'https://www.bitchute.com/channel/bitchute/' | |
76 | }, | |
77 | 'params': {'check_formats': None}, | |
78 | }, { | |
79 | # restricted video | |
80 | 'url': 'https://www.bitchute.com/video/WEnQU7XGcTdl/', | |
81 | 'info_dict': { | |
82 | 'id': 'WEnQU7XGcTdl', | |
83 | 'ext': 'mp4', | |
84 | 'title': 'Impartial Truth - Ein Letzter Appell an die Vernunft', | |
85 | }, | |
86 | 'params': {'skip_download': True}, | |
87 | 'skip': 'Georestricted in DE', | |
88 | }, { | |
89 | 'url': 'https://www.bitchute.com/embed/lbb5G1hjPhw/', | |
90 | 'only_matching': True, | |
91 | }, { | |
92 | 'url': 'https://www.bitchute.com/torrent/Zee5BE49045h/szoMrox2JEI.webtorrent', | |
93 | 'only_matching': True, | |
94 | }] | |
95 | _GEO_BYPASS = False | |
96 | ||
97 | _HEADERS = { | |
98 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36', | |
99 | 'Referer': 'https://www.bitchute.com/', | |
100 | } | |
101 | ||
102 | def _check_format(self, video_url, video_id): | |
103 | urls = orderedSet( | |
104 | re.sub(r'(^https?://)(seed\d+)(?=\.bitchute\.com)', fr'\g<1>{host}', video_url) | |
105 | for host in (r'\g<2>', 'seed122', 'seed125', 'seed126', 'seed128', | |
106 | 'seed132', 'seed150', 'seed151', 'seed152', 'seed153', | |
107 | 'seed167', 'seed171', 'seed177', 'seed305', 'seed307', | |
108 | 'seedp29xb', 'zb10-7gsop1v78')) | |
109 | for url in urls: | |
110 | try: | |
111 | response = self._request_webpage( | |
112 | HEADRequest(url), video_id=video_id, note=f'Checking {url}', headers=self._HEADERS) | |
113 | except ExtractorError as e: | |
114 | self.to_screen(f'{video_id}: URL is invalid, skipping: {e.cause}') | |
115 | continue | |
116 | return { | |
117 | 'url': url, | |
118 | 'filesize': int_or_none(response.headers.get('Content-Length')) | |
119 | } | |
120 | ||
121 | def _raise_if_restricted(self, webpage): | |
122 | page_title = clean_html(get_element_by_class('page-title', webpage)) or '' | |
123 | if re.fullmatch(r'(?:Channel|Video) Restricted', page_title): | |
124 | reason = clean_html(get_element_by_id('page-detail', webpage)) or page_title | |
125 | self.raise_geo_restricted(reason) | |
126 | ||
127 | @staticmethod | |
128 | def _make_url(html): | |
129 | path = extract_attributes(get_element_html_by_class('spa', html) or '').get('href') | |
130 | return urljoin('https://www.bitchute.com', path) | |
131 | ||
132 | def _real_extract(self, url): | |
133 | video_id = self._match_id(url) | |
134 | webpage = self._download_webpage( | |
135 | f'https://www.bitchute.com/video/{video_id}', video_id, headers=self._HEADERS) | |
136 | ||
137 | self._raise_if_restricted(webpage) | |
138 | publish_date = clean_html(get_element_by_class('video-publish-date', webpage)) | |
139 | entries = self._parse_html5_media_entries(url, webpage, video_id) | |
140 | ||
141 | formats = [] | |
142 | for format_ in traverse_obj(entries, (0, 'formats', ...)): | |
143 | if self.get_param('check_formats') is not False: | |
144 | format_.update(self._check_format(format_.pop('url'), video_id) or {}) | |
145 | if 'url' not in format_: | |
146 | continue | |
147 | formats.append(format_) | |
148 | ||
149 | if not formats: | |
150 | self.raise_no_formats( | |
151 | 'Video is unavailable. Please make sure this video is playable in the browser ' | |
152 | 'before reporting this issue.', expected=True, video_id=video_id) | |
153 | ||
154 | details = get_element_by_class('details', webpage) or '' | |
155 | uploader_html = get_element_html_by_class('creator', details) or '' | |
156 | channel_html = get_element_html_by_class('name', details) or '' | |
157 | ||
158 | return { | |
159 | 'id': video_id, | |
160 | 'title': self._html_extract_title(webpage) or self._og_search_title(webpage), | |
161 | 'description': self._og_search_description(webpage, default=None), | |
162 | 'thumbnail': self._og_search_thumbnail(webpage), | |
163 | 'uploader': clean_html(uploader_html), | |
164 | 'uploader_url': self._make_url(uploader_html), | |
165 | 'channel': clean_html(channel_html), | |
166 | 'channel_url': self._make_url(channel_html), | |
167 | 'upload_date': unified_strdate(self._search_regex( | |
168 | r'at \d+:\d+ UTC on (.+?)\.', publish_date, 'upload date', fatal=False)), | |
169 | 'formats': formats, | |
170 | } | |
171 | ||
172 | ||
173 | class BitChuteChannelIE(InfoExtractor): | |
174 | _VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?P<type>channel|playlist)/(?P<id>[^/?#&]+)' | |
175 | _TESTS = [{ | |
176 | 'url': 'https://www.bitchute.com/channel/bitchute/', | |
177 | 'info_dict': { | |
178 | 'id': 'bitchute', | |
179 | 'title': 'BitChute', | |
180 | 'description': 'md5:5329fb3866125afa9446835594a9b138', | |
181 | }, | |
182 | 'playlist': [ | |
183 | { | |
184 | 'md5': '7e427d7ed7af5a75b5855705ec750e2b', | |
185 | 'info_dict': { | |
186 | 'id': 'UGlrF9o9b-Q', | |
187 | 'ext': 'mp4', | |
188 | 'title': 'This is the first video on #BitChute !', | |
189 | 'description': 'md5:a0337e7b1fe39e32336974af8173a034', | |
190 | 'thumbnail': r're:^https?://.*\.jpg$', | |
191 | 'uploader': 'BitChute', | |
192 | 'upload_date': '20170103', | |
193 | 'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/', | |
194 | 'channel': 'BitChute', | |
195 | 'channel_url': 'https://www.bitchute.com/channel/bitchute/', | |
196 | 'duration': 16, | |
197 | 'view_count': int, | |
198 | }, | |
199 | } | |
200 | ], | |
201 | 'params': { | |
202 | 'skip_download': True, | |
203 | 'playlist_items': '-1', | |
204 | }, | |
205 | }, { | |
206 | 'url': 'https://www.bitchute.com/playlist/wV9Imujxasw9/', | |
207 | 'playlist_mincount': 20, | |
208 | 'info_dict': { | |
209 | 'id': 'wV9Imujxasw9', | |
210 | 'title': 'Bruce MacDonald and "The Light of Darkness"', | |
211 | 'description': 'md5:747724ef404eebdfc04277714f81863e', | |
212 | } | |
213 | }] | |
214 | ||
215 | _TOKEN = 'zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7' | |
216 | PAGE_SIZE = 25 | |
217 | HTML_CLASS_NAMES = { | |
218 | 'channel': { | |
219 | 'container': 'channel-videos-container', | |
220 | 'title': 'channel-videos-title', | |
221 | 'description': 'channel-videos-text', | |
222 | }, | |
223 | 'playlist': { | |
224 | 'container': 'playlist-video', | |
225 | 'title': 'title', | |
226 | 'description': 'description', | |
227 | } | |
228 | ||
229 | } | |
230 | ||
231 | @staticmethod | |
232 | def _make_url(playlist_id, playlist_type): | |
233 | return f'https://www.bitchute.com/{playlist_type}/{playlist_id}/' | |
234 | ||
235 | def _fetch_page(self, playlist_id, playlist_type, page_num): | |
236 | playlist_url = self._make_url(playlist_id, playlist_type) | |
237 | data = self._download_json( | |
238 | f'{playlist_url}extend/', playlist_id, f'Downloading page {page_num}', | |
239 | data=urlencode_postdata({ | |
240 | 'csrfmiddlewaretoken': self._TOKEN, | |
241 | 'name': '', | |
242 | 'offset': page_num * self.PAGE_SIZE, | |
243 | }), headers={ | |
244 | 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', | |
245 | 'Referer': playlist_url, | |
246 | 'X-Requested-With': 'XMLHttpRequest', | |
247 | 'Cookie': f'csrftoken={self._TOKEN}', | |
248 | }) | |
249 | if not data.get('success'): | |
250 | return | |
251 | classes = self.HTML_CLASS_NAMES[playlist_type] | |
252 | for video_html in get_elements_html_by_class(classes['container'], data.get('html')): | |
253 | video_id = self._search_regex( | |
254 | r'<a\s[^>]*\bhref=["\']/video/([^"\'/]+)', video_html, 'video id', default=None) | |
255 | if not video_id: | |
256 | continue | |
257 | yield self.url_result( | |
258 | f'https://www.bitchute.com/video/{video_id}', BitChuteIE, video_id, url_transparent=True, | |
259 | title=clean_html(get_element_by_class(classes['title'], video_html)), | |
260 | description=clean_html(get_element_by_class(classes['description'], video_html)), | |
261 | duration=parse_duration(get_element_by_class('video-duration', video_html)), | |
262 | view_count=parse_count(clean_html(get_element_by_class('video-views', video_html)))) | |
263 | ||
264 | def _real_extract(self, url): | |
265 | playlist_type, playlist_id = self._match_valid_url(url).group('type', 'id') | |
266 | webpage = self._download_webpage(self._make_url(playlist_id, playlist_type), playlist_id) | |
267 | ||
268 | page_func = functools.partial(self._fetch_page, playlist_id, playlist_type) | |
269 | return self.playlist_result( | |
270 | OnDemandPagedList(page_func, self.PAGE_SIZE), playlist_id, | |
271 | title=self._html_extract_title(webpage, default=None), | |
272 | description=self._html_search_meta( | |
273 | ('description', 'og:description', 'twitter:description'), webpage, default=None), | |
274 | playlist_count=int_or_none(self._html_search_regex( | |
275 | r'<span>(\d+)\s+videos?</span>', webpage, 'playlist count', default=None))) |