]>
Commit | Line | Data |
---|---|---|
b65e3b06 S |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import itertools | |
5 | import re | |
6 | ||
7 | from .common import InfoExtractor | |
6b688b89 | 8 | from ..utils import ( |
37fb591c AH |
9 | ExtractorError, |
10 | GeoRestrictedError, | |
6b688b89 | 11 | orderedSet, |
6ddd4bf6 | 12 | unified_strdate, |
6b688b89 S |
13 | urlencode_postdata, |
14 | ) | |
b65e3b06 S |
15 | |
16 | ||
17 | class BitChuteIE(InfoExtractor): | |
18 | _VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)' | |
19 | _TESTS = [{ | |
aca5774e | 20 | 'url': 'https://www.bitchute.com/video/UGlrF9o9b-Q/', |
21 | 'md5': '7e427d7ed7af5a75b5855705ec750e2b', | |
b65e3b06 S |
22 | 'info_dict': { |
23 | 'id': 'szoMrox2JEI', | |
24 | 'ext': 'mp4', | |
aca5774e | 25 | 'title': 'This is the first video on #BitChute !', |
26 | 'description': 'md5:a0337e7b1fe39e32336974af8173a034', | |
b65e3b06 | 27 | 'thumbnail': r're:^https?://.*\.jpg$', |
aca5774e | 28 | 'uploader': 'BitChute', |
29 | 'upload_date': '20170103', | |
b65e3b06 S |
30 | }, |
31 | }, { | |
32 | 'url': 'https://www.bitchute.com/embed/lbb5G1hjPhw/', | |
33 | 'only_matching': True, | |
34 | }, { | |
35 | 'url': 'https://www.bitchute.com/torrent/Zee5BE49045h/szoMrox2JEI.webtorrent', | |
36 | 'only_matching': True, | |
37 | }] | |
38 | ||
097f1663 | 39 | @staticmethod |
40 | def _extract_urls(webpage): | |
41 | return [ | |
42 | mobj.group('url') | |
43 | for mobj in re.finditer( | |
44 | r'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P<url>%s)' % BitChuteIE._VALID_URL, | |
45 | webpage)] | |
46 | ||
b65e3b06 S |
47 | def _real_extract(self, url): |
48 | video_id = self._match_id(url) | |
49 | ||
50 | webpage = self._download_webpage( | |
02df4135 AU |
51 | 'https://www.bitchute.com/video/%s' % video_id, video_id, headers={ |
52 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36', | |
53 | }) | |
b65e3b06 | 54 | |
8578ea4d | 55 | title = self._html_search_regex( |
b65e3b06 S |
56 | (r'<[^>]+\bid=["\']video-title[^>]+>([^<]+)', r'<title>([^<]+)'), |
57 | webpage, 'title', default=None) or self._html_search_meta( | |
58 | 'description', webpage, 'title', | |
59 | default=None) or self._og_search_description(webpage) | |
60 | ||
6b688b89 S |
61 | format_urls = [] |
62 | for mobj in re.finditer( | |
63 | r'addWebSeed\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage): | |
64 | format_urls.append(mobj.group('url')) | |
65 | format_urls.extend(re.findall(r'as=(https?://[^&"\']+)', webpage)) | |
66 | ||
b65e3b06 | 67 | formats = [ |
6b688b89 S |
68 | {'url': format_url} |
69 | for format_url in orderedSet(format_urls)] | |
4c78c3d7 S |
70 | |
71 | if not formats: | |
37fb591c AH |
72 | entries = self._parse_html5_media_entries( |
73 | url, webpage, video_id) | |
74 | if not entries: | |
75 | error = self._html_search_regex(r'<h1 class="page-title">([^<]+)</h1>', webpage, 'error', default='Cannot find video') | |
76 | if error == 'Video Unavailable': | |
77 | raise GeoRestrictedError(error) | |
78 | raise ExtractorError(error) | |
79 | formats = entries[0]['formats'] | |
4c78c3d7 | 80 | |
d65f6e73 | 81 | self._check_formats(formats, video_id) |
b65e3b06 S |
82 | self._sort_formats(formats) |
83 | ||
84 | description = self._html_search_regex( | |
85 | r'(?s)<div\b[^>]+\bclass=["\']full hidden[^>]+>(.+?)</div>', | |
86 | webpage, 'description', fatal=False) | |
87 | thumbnail = self._og_search_thumbnail( | |
88 | webpage, default=None) or self._html_search_meta( | |
89 | 'twitter:image:src', webpage, 'thumbnail') | |
90 | uploader = self._html_search_regex( | |
bbf1defe GS |
91 | (r'(?s)<div class=["\']channel-banner.*?<p\b[^>]+\bclass=["\']name[^>]+>(.+?)</p>', |
92 | r'(?s)<p\b[^>]+\bclass=["\']video-author[^>]+>(.+?)</p>'), | |
93 | webpage, 'uploader', fatal=False) | |
b65e3b06 | 94 | |
6ddd4bf6 I |
95 | upload_date = unified_strdate(self._search_regex( |
96 | r'class=["\']video-publish-date[^>]+>[^<]+ at \d+:\d+ UTC on (.+?)\.', | |
97 | webpage, 'upload date', fatal=False)) | |
98 | ||
b65e3b06 S |
99 | return { |
100 | 'id': video_id, | |
101 | 'title': title, | |
102 | 'description': description, | |
103 | 'thumbnail': thumbnail, | |
104 | 'uploader': uploader, | |
6ddd4bf6 | 105 | 'upload_date': upload_date, |
b65e3b06 S |
106 | 'formats': formats, |
107 | } | |
108 | ||
109 | ||
110 | class BitChuteChannelIE(InfoExtractor): | |
111 | _VALID_URL = r'https?://(?:www\.)?bitchute\.com/channel/(?P<id>[^/?#&]+)' | |
112 | _TEST = { | |
113 | 'url': 'https://www.bitchute.com/channel/victoriaxrave/', | |
114 | 'playlist_mincount': 185, | |
115 | 'info_dict': { | |
116 | 'id': 'victoriaxrave', | |
117 | }, | |
118 | } | |
119 | ||
120 | _TOKEN = 'zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7' | |
121 | ||
122 | def _entries(self, channel_id): | |
123 | channel_url = 'https://www.bitchute.com/channel/%s/' % channel_id | |
de4c41b4 S |
124 | offset = 0 |
125 | for page_num in itertools.count(1): | |
b65e3b06 S |
126 | data = self._download_json( |
127 | '%sextend/' % channel_url, channel_id, | |
de4c41b4 | 128 | 'Downloading channel page %d' % page_num, |
b65e3b06 S |
129 | data=urlencode_postdata({ |
130 | 'csrfmiddlewaretoken': self._TOKEN, | |
131 | 'name': '', | |
de4c41b4 | 132 | 'offset': offset, |
b65e3b06 S |
133 | }), headers={ |
134 | 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', | |
135 | 'Referer': channel_url, | |
136 | 'X-Requested-With': 'XMLHttpRequest', | |
137 | 'Cookie': 'csrftoken=%s' % self._TOKEN, | |
138 | }) | |
139 | if data.get('success') is False: | |
140 | break | |
141 | html = data.get('html') | |
142 | if not html: | |
143 | break | |
144 | video_ids = re.findall( | |
145 | r'class=["\']channel-videos-image-container[^>]+>\s*<a\b[^>]+\bhref=["\']/video/([^"\'/]+)', | |
146 | html) | |
147 | if not video_ids: | |
148 | break | |
de4c41b4 | 149 | offset += len(video_ids) |
b65e3b06 S |
150 | for video_id in video_ids: |
151 | yield self.url_result( | |
152 | 'https://www.bitchute.com/video/%s' % video_id, | |
153 | ie=BitChuteIE.ie_key(), video_id=video_id) | |
154 | ||
155 | def _real_extract(self, url): | |
156 | channel_id = self._match_id(url) | |
157 | return self.playlist_result( | |
158 | self._entries(channel_id), playlist_id=channel_id) |