]>
Commit | Line | Data |
---|---|---|
b65e3b06 S |
1 | import itertools |
2 | import re | |
3 | ||
4 | from .common import InfoExtractor | |
6b688b89 | 5 | from ..utils import ( |
37fb591c AH |
6 | ExtractorError, |
7 | GeoRestrictedError, | |
6b688b89 | 8 | orderedSet, |
6ddd4bf6 | 9 | unified_strdate, |
6b688b89 S |
10 | urlencode_postdata, |
11 | ) | |
b65e3b06 S |
12 | |
13 | ||
14 | class BitChuteIE(InfoExtractor): | |
15 | _VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)' | |
16 | _TESTS = [{ | |
aca5774e | 17 | 'url': 'https://www.bitchute.com/video/UGlrF9o9b-Q/', |
18 | 'md5': '7e427d7ed7af5a75b5855705ec750e2b', | |
b65e3b06 S |
19 | 'info_dict': { |
20 | 'id': 'szoMrox2JEI', | |
21 | 'ext': 'mp4', | |
aca5774e | 22 | 'title': 'This is the first video on #BitChute !', |
23 | 'description': 'md5:a0337e7b1fe39e32336974af8173a034', | |
b65e3b06 | 24 | 'thumbnail': r're:^https?://.*\.jpg$', |
aca5774e | 25 | 'uploader': 'BitChute', |
26 | 'upload_date': '20170103', | |
b65e3b06 S |
27 | }, |
28 | }, { | |
29 | 'url': 'https://www.bitchute.com/embed/lbb5G1hjPhw/', | |
30 | 'only_matching': True, | |
31 | }, { | |
32 | 'url': 'https://www.bitchute.com/torrent/Zee5BE49045h/szoMrox2JEI.webtorrent', | |
33 | 'only_matching': True, | |
34 | }] | |
35 | ||
097f1663 | 36 | @staticmethod |
37 | def _extract_urls(webpage): | |
38 | return [ | |
39 | mobj.group('url') | |
40 | for mobj in re.finditer( | |
41 | r'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P<url>%s)' % BitChuteIE._VALID_URL, | |
42 | webpage)] | |
43 | ||
b65e3b06 S |
44 | def _real_extract(self, url): |
45 | video_id = self._match_id(url) | |
46 | ||
47 | webpage = self._download_webpage( | |
02df4135 AU |
48 | 'https://www.bitchute.com/video/%s' % video_id, video_id, headers={ |
49 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36', | |
50 | }) | |
b65e3b06 | 51 | |
8578ea4d | 52 | title = self._html_search_regex( |
b65e3b06 S |
53 | (r'<[^>]+\bid=["\']video-title[^>]+>([^<]+)', r'<title>([^<]+)'), |
54 | webpage, 'title', default=None) or self._html_search_meta( | |
55 | 'description', webpage, 'title', | |
56 | default=None) or self._og_search_description(webpage) | |
57 | ||
6b688b89 S |
58 | format_urls = [] |
59 | for mobj in re.finditer( | |
60 | r'addWebSeed\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage): | |
61 | format_urls.append(mobj.group('url')) | |
62 | format_urls.extend(re.findall(r'as=(https?://[^&"\']+)', webpage)) | |
63 | ||
b65e3b06 | 64 | formats = [ |
6b688b89 S |
65 | {'url': format_url} |
66 | for format_url in orderedSet(format_urls)] | |
4c78c3d7 S |
67 | |
68 | if not formats: | |
37fb591c AH |
69 | entries = self._parse_html5_media_entries( |
70 | url, webpage, video_id) | |
71 | if not entries: | |
72 | error = self._html_search_regex(r'<h1 class="page-title">([^<]+)</h1>', webpage, 'error', default='Cannot find video') | |
73 | if error == 'Video Unavailable': | |
74 | raise GeoRestrictedError(error) | |
75 | raise ExtractorError(error) | |
76 | formats = entries[0]['formats'] | |
4c78c3d7 | 77 | |
d65f6e73 | 78 | self._check_formats(formats, video_id) |
b65e3b06 S |
79 | self._sort_formats(formats) |
80 | ||
81 | description = self._html_search_regex( | |
82 | r'(?s)<div\b[^>]+\bclass=["\']full hidden[^>]+>(.+?)</div>', | |
83 | webpage, 'description', fatal=False) | |
84 | thumbnail = self._og_search_thumbnail( | |
85 | webpage, default=None) or self._html_search_meta( | |
86 | 'twitter:image:src', webpage, 'thumbnail') | |
87 | uploader = self._html_search_regex( | |
bbf1defe GS |
88 | (r'(?s)<div class=["\']channel-banner.*?<p\b[^>]+\bclass=["\']name[^>]+>(.+?)</p>', |
89 | r'(?s)<p\b[^>]+\bclass=["\']video-author[^>]+>(.+?)</p>'), | |
90 | webpage, 'uploader', fatal=False) | |
b65e3b06 | 91 | |
6ddd4bf6 I |
92 | upload_date = unified_strdate(self._search_regex( |
93 | r'class=["\']video-publish-date[^>]+>[^<]+ at \d+:\d+ UTC on (.+?)\.', | |
94 | webpage, 'upload date', fatal=False)) | |
95 | ||
b65e3b06 S |
96 | return { |
97 | 'id': video_id, | |
98 | 'title': title, | |
99 | 'description': description, | |
100 | 'thumbnail': thumbnail, | |
101 | 'uploader': uploader, | |
6ddd4bf6 | 102 | 'upload_date': upload_date, |
b65e3b06 S |
103 | 'formats': formats, |
104 | } | |
105 | ||
106 | ||
107 | class BitChuteChannelIE(InfoExtractor): | |
108 | _VALID_URL = r'https?://(?:www\.)?bitchute\.com/channel/(?P<id>[^/?#&]+)' | |
109 | _TEST = { | |
110 | 'url': 'https://www.bitchute.com/channel/victoriaxrave/', | |
111 | 'playlist_mincount': 185, | |
112 | 'info_dict': { | |
113 | 'id': 'victoriaxrave', | |
114 | }, | |
115 | } | |
116 | ||
117 | _TOKEN = 'zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7' | |
118 | ||
119 | def _entries(self, channel_id): | |
120 | channel_url = 'https://www.bitchute.com/channel/%s/' % channel_id | |
de4c41b4 S |
121 | offset = 0 |
122 | for page_num in itertools.count(1): | |
b65e3b06 S |
123 | data = self._download_json( |
124 | '%sextend/' % channel_url, channel_id, | |
de4c41b4 | 125 | 'Downloading channel page %d' % page_num, |
b65e3b06 S |
126 | data=urlencode_postdata({ |
127 | 'csrfmiddlewaretoken': self._TOKEN, | |
128 | 'name': '', | |
de4c41b4 | 129 | 'offset': offset, |
b65e3b06 S |
130 | }), headers={ |
131 | 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', | |
132 | 'Referer': channel_url, | |
133 | 'X-Requested-With': 'XMLHttpRequest', | |
134 | 'Cookie': 'csrftoken=%s' % self._TOKEN, | |
135 | }) | |
136 | if data.get('success') is False: | |
137 | break | |
138 | html = data.get('html') | |
139 | if not html: | |
140 | break | |
141 | video_ids = re.findall( | |
142 | r'class=["\']channel-videos-image-container[^>]+>\s*<a\b[^>]+\bhref=["\']/video/([^"\'/]+)', | |
143 | html) | |
144 | if not video_ids: | |
145 | break | |
de4c41b4 | 146 | offset += len(video_ids) |
b65e3b06 S |
147 | for video_id in video_ids: |
148 | yield self.url_result( | |
149 | 'https://www.bitchute.com/video/%s' % video_id, | |
150 | ie=BitChuteIE.ie_key(), video_id=video_id) | |
151 | ||
152 | def _real_extract(self, url): | |
153 | channel_id = self._match_id(url) | |
154 | return self.playlist_result( | |
155 | self._entries(channel_id), playlist_id=channel_id) |