]>
Commit | Line | Data |
---|---|---|
b65e3b06 S |
1 | import itertools |
2 | import re | |
3 | ||
4 | from .common import InfoExtractor | |
6b688b89 | 5 | from ..utils import ( |
37fb591c AH |
6 | ExtractorError, |
7 | GeoRestrictedError, | |
6b688b89 | 8 | orderedSet, |
6ddd4bf6 | 9 | unified_strdate, |
6b688b89 S |
10 | urlencode_postdata, |
11 | ) | |
b65e3b06 S |
12 | |
13 | ||
14 | class BitChuteIE(InfoExtractor): | |
15 | _VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)' | |
bfd973ec | 16 | _EMBED_REGEX = [rf'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P<url>{_VALID_URL})'] |
b65e3b06 | 17 | _TESTS = [{ |
aca5774e | 18 | 'url': 'https://www.bitchute.com/video/UGlrF9o9b-Q/', |
19 | 'md5': '7e427d7ed7af5a75b5855705ec750e2b', | |
b65e3b06 S |
20 | 'info_dict': { |
21 | 'id': 'szoMrox2JEI', | |
22 | 'ext': 'mp4', | |
aca5774e | 23 | 'title': 'This is the first video on #BitChute !', |
24 | 'description': 'md5:a0337e7b1fe39e32336974af8173a034', | |
b65e3b06 | 25 | 'thumbnail': r're:^https?://.*\.jpg$', |
aca5774e | 26 | 'uploader': 'BitChute', |
27 | 'upload_date': '20170103', | |
b65e3b06 S |
28 | }, |
29 | }, { | |
30 | 'url': 'https://www.bitchute.com/embed/lbb5G1hjPhw/', | |
31 | 'only_matching': True, | |
32 | }, { | |
33 | 'url': 'https://www.bitchute.com/torrent/Zee5BE49045h/szoMrox2JEI.webtorrent', | |
34 | 'only_matching': True, | |
35 | }] | |
36 | ||
37 | def _real_extract(self, url): | |
38 | video_id = self._match_id(url) | |
39 | ||
40 | webpage = self._download_webpage( | |
02df4135 AU |
41 | 'https://www.bitchute.com/video/%s' % video_id, video_id, headers={ |
42 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36', | |
43 | }) | |
b65e3b06 | 44 | |
8578ea4d | 45 | title = self._html_search_regex( |
b65e3b06 S |
46 | (r'<[^>]+\bid=["\']video-title[^>]+>([^<]+)', r'<title>([^<]+)'), |
47 | webpage, 'title', default=None) or self._html_search_meta( | |
48 | 'description', webpage, 'title', | |
49 | default=None) or self._og_search_description(webpage) | |
50 | ||
6b688b89 S |
51 | format_urls = [] |
52 | for mobj in re.finditer( | |
53 | r'addWebSeed\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage): | |
54 | format_urls.append(mobj.group('url')) | |
55 | format_urls.extend(re.findall(r'as=(https?://[^&"\']+)', webpage)) | |
56 | ||
b65e3b06 | 57 | formats = [ |
6b688b89 S |
58 | {'url': format_url} |
59 | for format_url in orderedSet(format_urls)] | |
4c78c3d7 S |
60 | |
61 | if not formats: | |
37fb591c AH |
62 | entries = self._parse_html5_media_entries( |
63 | url, webpage, video_id) | |
64 | if not entries: | |
65 | error = self._html_search_regex(r'<h1 class="page-title">([^<]+)</h1>', webpage, 'error', default='Cannot find video') | |
66 | if error == 'Video Unavailable': | |
67 | raise GeoRestrictedError(error) | |
1704c47b | 68 | raise ExtractorError(error, expected=True) |
37fb591c | 69 | formats = entries[0]['formats'] |
4c78c3d7 | 70 | |
d65f6e73 | 71 | self._check_formats(formats, video_id) |
1704c47b | 72 | if not formats: |
73 | raise self.raise_no_formats('Video is unavailable', expected=True, video_id=video_id) | |
b65e3b06 S |
74 | self._sort_formats(formats) |
75 | ||
76 | description = self._html_search_regex( | |
77 | r'(?s)<div\b[^>]+\bclass=["\']full hidden[^>]+>(.+?)</div>', | |
78 | webpage, 'description', fatal=False) | |
79 | thumbnail = self._og_search_thumbnail( | |
80 | webpage, default=None) or self._html_search_meta( | |
81 | 'twitter:image:src', webpage, 'thumbnail') | |
82 | uploader = self._html_search_regex( | |
bbf1defe GS |
83 | (r'(?s)<div class=["\']channel-banner.*?<p\b[^>]+\bclass=["\']name[^>]+>(.+?)</p>', |
84 | r'(?s)<p\b[^>]+\bclass=["\']video-author[^>]+>(.+?)</p>'), | |
85 | webpage, 'uploader', fatal=False) | |
b65e3b06 | 86 | |
6ddd4bf6 I |
87 | upload_date = unified_strdate(self._search_regex( |
88 | r'class=["\']video-publish-date[^>]+>[^<]+ at \d+:\d+ UTC on (.+?)\.', | |
89 | webpage, 'upload date', fatal=False)) | |
90 | ||
b65e3b06 S |
91 | return { |
92 | 'id': video_id, | |
93 | 'title': title, | |
94 | 'description': description, | |
95 | 'thumbnail': thumbnail, | |
96 | 'uploader': uploader, | |
6ddd4bf6 | 97 | 'upload_date': upload_date, |
b65e3b06 S |
98 | 'formats': formats, |
99 | } | |
100 | ||
101 | ||
102 | class BitChuteChannelIE(InfoExtractor): | |
103 | _VALID_URL = r'https?://(?:www\.)?bitchute\.com/channel/(?P<id>[^/?#&]+)' | |
104 | _TEST = { | |
105 | 'url': 'https://www.bitchute.com/channel/victoriaxrave/', | |
106 | 'playlist_mincount': 185, | |
107 | 'info_dict': { | |
108 | 'id': 'victoriaxrave', | |
109 | }, | |
110 | } | |
111 | ||
112 | _TOKEN = 'zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7' | |
113 | ||
114 | def _entries(self, channel_id): | |
115 | channel_url = 'https://www.bitchute.com/channel/%s/' % channel_id | |
de4c41b4 S |
116 | offset = 0 |
117 | for page_num in itertools.count(1): | |
b65e3b06 S |
118 | data = self._download_json( |
119 | '%sextend/' % channel_url, channel_id, | |
de4c41b4 | 120 | 'Downloading channel page %d' % page_num, |
b65e3b06 S |
121 | data=urlencode_postdata({ |
122 | 'csrfmiddlewaretoken': self._TOKEN, | |
123 | 'name': '', | |
de4c41b4 | 124 | 'offset': offset, |
b65e3b06 S |
125 | }), headers={ |
126 | 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', | |
127 | 'Referer': channel_url, | |
128 | 'X-Requested-With': 'XMLHttpRequest', | |
129 | 'Cookie': 'csrftoken=%s' % self._TOKEN, | |
130 | }) | |
131 | if data.get('success') is False: | |
132 | break | |
133 | html = data.get('html') | |
134 | if not html: | |
135 | break | |
136 | video_ids = re.findall( | |
137 | r'class=["\']channel-videos-image-container[^>]+>\s*<a\b[^>]+\bhref=["\']/video/([^"\'/]+)', | |
138 | html) | |
139 | if not video_ids: | |
140 | break | |
de4c41b4 | 141 | offset += len(video_ids) |
b65e3b06 S |
142 | for video_id in video_ids: |
143 | yield self.url_result( | |
144 | 'https://www.bitchute.com/video/%s' % video_id, | |
145 | ie=BitChuteIE.ie_key(), video_id=video_id) | |
146 | ||
147 | def _real_extract(self, url): | |
148 | channel_id = self._match_id(url) | |
149 | return self.playlist_result( | |
150 | self._entries(channel_id), playlist_id=channel_id) |