]>
Commit | Line | Data |
---|---|---|
c61473c1 | 1 | import functools |
b65e3b06 S |
2 | import re |
3 | ||
4 | from .common import InfoExtractor | |
6b688b89 | 5 | from ..utils import ( |
37fb591c | 6 | ExtractorError, |
f72218c1 | 7 | HEADRequest, |
c61473c1 | 8 | OnDemandPagedList, |
f72218c1 | 9 | clean_html, |
10 | get_element_by_class, | |
efdc45a6 | 11 | get_element_by_id, |
c61473c1 | 12 | get_elements_html_by_class, |
f72218c1 | 13 | int_or_none, |
6b688b89 | 14 | orderedSet, |
c61473c1 M |
15 | parse_count, |
16 | parse_duration, | |
f72218c1 | 17 | traverse_obj, |
6ddd4bf6 | 18 | unified_strdate, |
6b688b89 S |
19 | urlencode_postdata, |
20 | ) | |
b65e3b06 S |
21 | |
22 | ||
23 | class BitChuteIE(InfoExtractor): | |
24 | _VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)' | |
bfd973ec | 25 | _EMBED_REGEX = [rf'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P<url>{_VALID_URL})'] |
b65e3b06 | 26 | _TESTS = [{ |
aca5774e | 27 | 'url': 'https://www.bitchute.com/video/UGlrF9o9b-Q/', |
28 | 'md5': '7e427d7ed7af5a75b5855705ec750e2b', | |
b65e3b06 | 29 | 'info_dict': { |
f72218c1 | 30 | 'id': 'UGlrF9o9b-Q', |
b65e3b06 | 31 | 'ext': 'mp4', |
aca5774e | 32 | 'title': 'This is the first video on #BitChute !', |
33 | 'description': 'md5:a0337e7b1fe39e32336974af8173a034', | |
b65e3b06 | 34 | 'thumbnail': r're:^https?://.*\.jpg$', |
aca5774e | 35 | 'uploader': 'BitChute', |
36 | 'upload_date': '20170103', | |
b65e3b06 | 37 | }, |
f72218c1 | 38 | }, { |
39 | # video not downloadable in browser, but we can recover it | |
40 | 'url': 'https://www.bitchute.com/video/2s6B3nZjAk7R/', | |
41 | 'md5': '05c12397d5354bf24494885b08d24ed1', | |
42 | 'info_dict': { | |
43 | 'id': '2s6B3nZjAk7R', | |
44 | 'ext': 'mp4', | |
45 | 'filesize': 71537926, | |
46 | 'title': 'STYXHEXENHAMMER666 - Election Fraud, Clinton 2020, EU Armies, and Gun Control', | |
47 | 'description': 'md5:228ee93bd840a24938f536aeac9cf749', | |
48 | 'thumbnail': r're:^https?://.*\.jpg$', | |
49 | 'uploader': 'BitChute', | |
50 | 'upload_date': '20181113', | |
51 | }, | |
52 | 'params': {'check_formats': None}, | |
efdc45a6 M |
53 | }, { |
54 | # restricted video | |
55 | 'url': 'https://www.bitchute.com/video/WEnQU7XGcTdl/', | |
56 | 'info_dict': { | |
57 | 'id': 'WEnQU7XGcTdl', | |
58 | 'ext': 'mp4', | |
59 | 'title': 'Impartial Truth - Ein Letzter Appell an die Vernunft', | |
60 | }, | |
61 | 'params': {'skip_download': True}, | |
62 | 'skip': 'Georestricted in DE', | |
b65e3b06 S |
63 | }, { |
64 | 'url': 'https://www.bitchute.com/embed/lbb5G1hjPhw/', | |
65 | 'only_matching': True, | |
66 | }, { | |
67 | 'url': 'https://www.bitchute.com/torrent/Zee5BE49045h/szoMrox2JEI.webtorrent', | |
68 | 'only_matching': True, | |
69 | }] | |
efdc45a6 | 70 | _GEO_BYPASS = False |
b65e3b06 | 71 | |
f72218c1 | 72 | _HEADERS = { |
73 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36', | |
74 | 'Referer': 'https://www.bitchute.com/', | |
75 | } | |
76 | ||
77 | def _check_format(self, video_url, video_id): | |
78 | urls = orderedSet( | |
79 | re.sub(r'(^https?://)(seed\d+)(?=\.bitchute\.com)', fr'\g<1>{host}', video_url) | |
80 | for host in (r'\g<2>', 'seed150', 'seed151', 'seed152', 'seed153')) | |
81 | for url in urls: | |
82 | try: | |
83 | response = self._request_webpage( | |
84 | HEADRequest(url), video_id=video_id, note=f'Checking {url}', headers=self._HEADERS) | |
85 | except ExtractorError as e: | |
86 | self.to_screen(f'{video_id}: URL is invalid, skipping: {e.cause}') | |
87 | continue | |
88 | return { | |
89 | 'url': url, | |
90 | 'filesize': int_or_none(response.headers.get('Content-Length')) | |
91 | } | |
92 | ||
efdc45a6 M |
93 | def _raise_if_restricted(self, webpage): |
94 | page_title = clean_html(get_element_by_class('page-title', webpage)) or '' | |
95 | if re.fullmatch(r'(?:Channel|Video) Restricted', page_title): | |
96 | reason = clean_html(get_element_by_id('page-detail', webpage)) or page_title | |
97 | self.raise_geo_restricted(reason) | |
98 | ||
b65e3b06 S |
99 | def _real_extract(self, url): |
100 | video_id = self._match_id(url) | |
b65e3b06 | 101 | webpage = self._download_webpage( |
f72218c1 | 102 | f'https://www.bitchute.com/video/{video_id}', video_id, headers=self._HEADERS) |
b65e3b06 | 103 | |
efdc45a6 | 104 | self._raise_if_restricted(webpage) |
f72218c1 | 105 | publish_date = clean_html(get_element_by_class('video-publish-date', webpage)) |
106 | entries = self._parse_html5_media_entries(url, webpage, video_id) | |
b65e3b06 | 107 | |
f72218c1 | 108 | formats = [] |
109 | for format_ in traverse_obj(entries, (0, 'formats', ...)): | |
110 | if self.get_param('check_formats') is not False: | |
111 | format_.update(self._check_format(format_.pop('url'), video_id) or {}) | |
112 | if 'url' not in format_: | |
113 | continue | |
114 | formats.append(format_) | |
4c78c3d7 S |
115 | |
116 | if not formats: | |
f72218c1 | 117 | self.raise_no_formats( |
118 | 'Video is unavailable. Please make sure this video is playable in the browser ' | |
119 | 'before reporting this issue.', expected=True, video_id=video_id) | |
b65e3b06 | 120 | |
b65e3b06 S |
121 | return { |
122 | 'id': video_id, | |
f72218c1 | 123 | 'title': self._html_extract_title(webpage) or self._og_search_title(webpage), |
124 | 'description': self._og_search_description(webpage, default=None), | |
125 | 'thumbnail': self._og_search_thumbnail(webpage), | |
126 | 'uploader': clean_html(get_element_by_class('owner', webpage)), | |
127 | 'upload_date': unified_strdate(self._search_regex( | |
128 | r'at \d+:\d+ UTC on (.+?)\.', publish_date, 'upload date', fatal=False)), | |
b65e3b06 S |
129 | 'formats': formats, |
130 | } | |
131 | ||
132 | ||
133 | class BitChuteChannelIE(InfoExtractor): | |
c61473c1 M |
134 | _VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?P<type>channel|playlist)/(?P<id>[^/?#&]+)' |
135 | _TESTS = [{ | |
136 | 'url': 'https://www.bitchute.com/channel/bitchute/', | |
b65e3b06 | 137 | 'info_dict': { |
c61473c1 M |
138 | 'id': 'bitchute', |
139 | 'title': 'BitChute', | |
140 | 'description': 'md5:5329fb3866125afa9446835594a9b138', | |
b65e3b06 | 141 | }, |
c61473c1 M |
142 | 'playlist': [ |
143 | { | |
144 | 'md5': '7e427d7ed7af5a75b5855705ec750e2b', | |
145 | 'info_dict': { | |
146 | 'id': 'UGlrF9o9b-Q', | |
147 | 'ext': 'mp4', | |
148 | 'filesize': None, | |
149 | 'title': 'This is the first video on #BitChute !', | |
150 | 'description': 'md5:a0337e7b1fe39e32336974af8173a034', | |
151 | 'thumbnail': r're:^https?://.*\.jpg$', | |
152 | 'uploader': 'BitChute', | |
153 | 'upload_date': '20170103', | |
154 | 'duration': 16, | |
155 | 'view_count': int, | |
156 | }, | |
157 | } | |
158 | ], | |
159 | 'params': { | |
160 | 'skip_download': True, | |
161 | 'playlist_items': '-1', | |
162 | }, | |
163 | }, { | |
164 | 'url': 'https://www.bitchute.com/playlist/wV9Imujxasw9/', | |
165 | 'playlist_mincount': 20, | |
166 | 'info_dict': { | |
167 | 'id': 'wV9Imujxasw9', | |
168 | 'title': 'Bruce MacDonald and "The Light of Darkness"', | |
169 | 'description': 'md5:04913227d2714af1d36d804aa2ab6b1e', | |
170 | } | |
171 | }] | |
b65e3b06 S |
172 | |
173 | _TOKEN = 'zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7' | |
c61473c1 M |
174 | PAGE_SIZE = 25 |
175 | HTML_CLASS_NAMES = { | |
176 | 'channel': { | |
177 | 'container': 'channel-videos-container', | |
178 | 'title': 'channel-videos-title', | |
179 | 'description': 'channel-videos-text', | |
180 | }, | |
181 | 'playlist': { | |
182 | 'container': 'playlist-video', | |
183 | 'title': 'title', | |
184 | 'description': 'description', | |
185 | } | |
186 | ||
187 | } | |
b65e3b06 | 188 | |
c61473c1 M |
189 | @staticmethod |
190 | def _make_url(playlist_id, playlist_type): | |
191 | return f'https://www.bitchute.com/{playlist_type}/{playlist_id}/' | |
192 | ||
193 | def _fetch_page(self, playlist_id, playlist_type, page_num): | |
194 | playlist_url = self._make_url(playlist_id, playlist_type) | |
195 | data = self._download_json( | |
196 | f'{playlist_url}extend/', playlist_id, f'Downloading page {page_num}', | |
197 | data=urlencode_postdata({ | |
198 | 'csrfmiddlewaretoken': self._TOKEN, | |
199 | 'name': '', | |
200 | 'offset': page_num * self.PAGE_SIZE, | |
201 | }), headers={ | |
202 | 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', | |
203 | 'Referer': playlist_url, | |
204 | 'X-Requested-With': 'XMLHttpRequest', | |
205 | 'Cookie': f'csrftoken={self._TOKEN}', | |
206 | }) | |
207 | if not data.get('success'): | |
208 | return | |
209 | classes = self.HTML_CLASS_NAMES[playlist_type] | |
210 | for video_html in get_elements_html_by_class(classes['container'], data.get('html')): | |
211 | video_id = self._search_regex( | |
212 | r'<a\s[^>]*\bhref=["\']/video/([^"\'/]+)', video_html, 'video id', default=None) | |
213 | if not video_id: | |
214 | continue | |
215 | yield self.url_result( | |
216 | f'https://www.bitchute.com/video/{video_id}', BitChuteIE, video_id, url_transparent=True, | |
217 | title=clean_html(get_element_by_class(classes['title'], video_html)), | |
218 | description=clean_html(get_element_by_class(classes['description'], video_html)), | |
219 | duration=parse_duration(get_element_by_class('video-duration', video_html)), | |
220 | view_count=parse_count(clean_html(get_element_by_class('video-views', video_html)))) | |
b65e3b06 S |
221 | |
222 | def _real_extract(self, url): | |
c61473c1 M |
223 | playlist_type, playlist_id = self._match_valid_url(url).group('type', 'id') |
224 | webpage = self._download_webpage(self._make_url(playlist_id, playlist_type), playlist_id) | |
225 | ||
226 | page_func = functools.partial(self._fetch_page, playlist_id, playlist_type) | |
b65e3b06 | 227 | return self.playlist_result( |
c61473c1 M |
228 | OnDemandPagedList(page_func, self.PAGE_SIZE), playlist_id, |
229 | title=self._html_extract_title(webpage, default=None), | |
230 | description=self._html_search_meta( | |
231 | ('description', 'og:description', 'twitter:description'), webpage, default=None), | |
232 | playlist_count=int_or_none(self._html_search_regex( | |
233 | r'<span>(\d+)\s+videos?</span>', webpage, 'playlist count', default=None))) |