]>
Commit | Line | Data |
---|---|---|
b65e3b06 S |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import itertools | |
5 | import re | |
6 | ||
7 | from .common import InfoExtractor | |
6b688b89 S |
8 | from ..utils import ( |
9 | orderedSet, | |
6ddd4bf6 | 10 | unified_strdate, |
6b688b89 S |
11 | urlencode_postdata, |
12 | ) | |
b65e3b06 S |
13 | |
14 | ||
15 | class BitChuteIE(InfoExtractor): | |
16 | _VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)' | |
17 | _TESTS = [{ | |
18 | 'url': 'https://www.bitchute.com/video/szoMrox2JEI/', | |
19 | 'md5': '66c4a70e6bfc40dcb6be3eb1d74939eb', | |
20 | 'info_dict': { | |
21 | 'id': 'szoMrox2JEI', | |
22 | 'ext': 'mp4', | |
23 | 'title': 'Fuck bitches get money', | |
24 | 'description': 'md5:3f21f6fb5b1d17c3dee9cf6b5fe60b3a', | |
25 | 'thumbnail': r're:^https?://.*\.jpg$', | |
26 | 'uploader': 'Victoria X Rave', | |
6ddd4bf6 | 27 | 'upload_date': '20170813', |
b65e3b06 S |
28 | }, |
29 | }, { | |
30 | 'url': 'https://www.bitchute.com/embed/lbb5G1hjPhw/', | |
31 | 'only_matching': True, | |
32 | }, { | |
33 | 'url': 'https://www.bitchute.com/torrent/Zee5BE49045h/szoMrox2JEI.webtorrent', | |
34 | 'only_matching': True, | |
35 | }] | |
36 | ||
37 | def _real_extract(self, url): | |
38 | video_id = self._match_id(url) | |
39 | ||
40 | webpage = self._download_webpage( | |
02df4135 AU |
41 | 'https://www.bitchute.com/video/%s' % video_id, video_id, headers={ |
42 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36', | |
43 | }) | |
b65e3b06 | 44 | |
8578ea4d | 45 | title = self._html_search_regex( |
b65e3b06 S |
46 | (r'<[^>]+\bid=["\']video-title[^>]+>([^<]+)', r'<title>([^<]+)'), |
47 | webpage, 'title', default=None) or self._html_search_meta( | |
48 | 'description', webpage, 'title', | |
49 | default=None) or self._og_search_description(webpage) | |
50 | ||
6b688b89 S |
51 | format_urls = [] |
52 | for mobj in re.finditer( | |
53 | r'addWebSeed\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage): | |
54 | format_urls.append(mobj.group('url')) | |
55 | format_urls.extend(re.findall(r'as=(https?://[^&"\']+)', webpage)) | |
56 | ||
b65e3b06 | 57 | formats = [ |
6b688b89 S |
58 | {'url': format_url} |
59 | for format_url in orderedSet(format_urls)] | |
4c78c3d7 S |
60 | |
61 | if not formats: | |
62 | formats = self._parse_html5_media_entries( | |
63 | url, webpage, video_id)[0]['formats'] | |
64 | ||
d65f6e73 | 65 | self._check_formats(formats, video_id) |
b65e3b06 S |
66 | self._sort_formats(formats) |
67 | ||
68 | description = self._html_search_regex( | |
69 | r'(?s)<div\b[^>]+\bclass=["\']full hidden[^>]+>(.+?)</div>', | |
70 | webpage, 'description', fatal=False) | |
71 | thumbnail = self._og_search_thumbnail( | |
72 | webpage, default=None) or self._html_search_meta( | |
73 | 'twitter:image:src', webpage, 'thumbnail') | |
74 | uploader = self._html_search_regex( | |
bbf1defe GS |
75 | (r'(?s)<div class=["\']channel-banner.*?<p\b[^>]+\bclass=["\']name[^>]+>(.+?)</p>', |
76 | r'(?s)<p\b[^>]+\bclass=["\']video-author[^>]+>(.+?)</p>'), | |
77 | webpage, 'uploader', fatal=False) | |
b65e3b06 | 78 | |
6ddd4bf6 I |
79 | upload_date = unified_strdate(self._search_regex( |
80 | r'class=["\']video-publish-date[^>]+>[^<]+ at \d+:\d+ UTC on (.+?)\.', | |
81 | webpage, 'upload date', fatal=False)) | |
82 | ||
b65e3b06 S |
83 | return { |
84 | 'id': video_id, | |
85 | 'title': title, | |
86 | 'description': description, | |
87 | 'thumbnail': thumbnail, | |
88 | 'uploader': uploader, | |
6ddd4bf6 | 89 | 'upload_date': upload_date, |
b65e3b06 S |
90 | 'formats': formats, |
91 | } | |
92 | ||
93 | ||
94 | class BitChuteChannelIE(InfoExtractor): | |
95 | _VALID_URL = r'https?://(?:www\.)?bitchute\.com/channel/(?P<id>[^/?#&]+)' | |
96 | _TEST = { | |
97 | 'url': 'https://www.bitchute.com/channel/victoriaxrave/', | |
98 | 'playlist_mincount': 185, | |
99 | 'info_dict': { | |
100 | 'id': 'victoriaxrave', | |
101 | }, | |
102 | } | |
103 | ||
104 | _TOKEN = 'zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7' | |
105 | ||
106 | def _entries(self, channel_id): | |
107 | channel_url = 'https://www.bitchute.com/channel/%s/' % channel_id | |
de4c41b4 S |
108 | offset = 0 |
109 | for page_num in itertools.count(1): | |
b65e3b06 S |
110 | data = self._download_json( |
111 | '%sextend/' % channel_url, channel_id, | |
de4c41b4 | 112 | 'Downloading channel page %d' % page_num, |
b65e3b06 S |
113 | data=urlencode_postdata({ |
114 | 'csrfmiddlewaretoken': self._TOKEN, | |
115 | 'name': '', | |
de4c41b4 | 116 | 'offset': offset, |
b65e3b06 S |
117 | }), headers={ |
118 | 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', | |
119 | 'Referer': channel_url, | |
120 | 'X-Requested-With': 'XMLHttpRequest', | |
121 | 'Cookie': 'csrftoken=%s' % self._TOKEN, | |
122 | }) | |
123 | if data.get('success') is False: | |
124 | break | |
125 | html = data.get('html') | |
126 | if not html: | |
127 | break | |
128 | video_ids = re.findall( | |
129 | r'class=["\']channel-videos-image-container[^>]+>\s*<a\b[^>]+\bhref=["\']/video/([^"\'/]+)', | |
130 | html) | |
131 | if not video_ids: | |
132 | break | |
de4c41b4 | 133 | offset += len(video_ids) |
b65e3b06 S |
134 | for video_id in video_ids: |
135 | yield self.url_result( | |
136 | 'https://www.bitchute.com/video/%s' % video_id, | |
137 | ie=BitChuteIE.ie_key(), video_id=video_id) | |
138 | ||
139 | def _real_extract(self, url): | |
140 | channel_id = self._match_id(url) | |
141 | return self.playlist_result( | |
142 | self._entries(channel_id), playlist_id=channel_id) |