]>
Commit | Line | Data |
---|---|---|
3f90813f RD |
1 | import re |
2 | ||
3 | from .archiveorg import ArchiveOrgIE | |
4 | from .common import InfoExtractor | |
5 | from ..utils import ( | |
6 | InAdvancePagedList, | |
615a8444 | 7 | clean_html, |
3f90813f RD |
8 | int_or_none, |
9 | orderedSet, | |
10 | str_to_int, | |
11 | urljoin, | |
12 | ) | |
13 | ||
14 | ||
15 | class AltCensoredIE(InfoExtractor): | |
16 | IE_NAME = 'altcensored' | |
17 | _VALID_URL = r'https?://(?:www\.)?altcensored\.com/(?:watch\?v=|embed/)(?P<id>[^/?#]+)' | |
18 | _TESTS = [{ | |
19 | 'url': 'https://www.altcensored.com/watch?v=k0srjLSkga8', | |
20 | 'info_dict': { | |
21 | 'id': 'youtube-k0srjLSkga8', | |
22 | 'ext': 'webm', | |
23 | 'title': "QUELLES SONT LES CONSÉQUENCES DE L'HYPERSEXUALISATION DE LA SOCIÉTÉ ?", | |
24 | 'display_id': 'k0srjLSkga8.webm', | |
25 | 'release_date': '20180403', | |
e28e135d | 26 | 'creators': ['Virginie Vota'], |
3f90813f RD |
27 | 'release_year': 2018, |
28 | 'upload_date': '20230318', | |
29 | 'uploader': 'admin@altcensored.com', | |
30 | 'description': 'md5:0b38a8fc04103579d5c1db10a247dc30', | |
31 | 'timestamp': 1679161343, | |
32 | 'track': 'k0srjLSkga8', | |
33 | 'duration': 926.09, | |
34 | 'thumbnail': 'https://archive.org/download/youtube-k0srjLSkga8/youtube-k0srjLSkga8.thumbs/k0srjLSkga8_000925.jpg', | |
35 | 'view_count': int, | |
615a8444 | 36 | 'categories': ['News & Politics'], |
3f90813f RD |
37 | } |
38 | }] | |
39 | ||
40 | def _real_extract(self, url): | |
41 | video_id = self._match_id(url) | |
42 | webpage = self._download_webpage(url, video_id) | |
615a8444 | 43 | category = clean_html(self._html_search_regex( |
44 | r'<a href="/category/\d+">([^<]+)</a>', webpage, 'category', default=None)) | |
3f90813f RD |
45 | |
46 | return { | |
47 | '_type': 'url_transparent', | |
48 | 'url': f'https://archive.org/details/youtube-{video_id}', | |
49 | 'ie_key': ArchiveOrgIE.ie_key(), | |
50 | 'view_count': str_to_int(self._html_search_regex( | |
51 | r'YouTube Views:(?:\s| )*([\d,]+)', webpage, 'view count', default=None)), | |
615a8444 | 52 | 'categories': [category] if category else None, |
3f90813f RD |
53 | } |
54 | ||
55 | ||
56 | class AltCensoredChannelIE(InfoExtractor): | |
57 | IE_NAME = 'altcensored:channel' | |
58 | _VALID_URL = r'https?://(?:www\.)?altcensored\.com/channel/(?!page|table)(?P<id>[^/?#]+)' | |
59 | _PAGE_SIZE = 24 | |
60 | _TESTS = [{ | |
61 | 'url': 'https://www.altcensored.com/channel/UCFPTO55xxHqFqkzRZHu4kcw', | |
62 | 'info_dict': { | |
63 | 'title': 'Virginie Vota', | |
64 | 'id': 'UCFPTO55xxHqFqkzRZHu4kcw', | |
65 | }, | |
e28e135d | 66 | 'playlist_count': 85, |
3f90813f RD |
67 | }, { |
68 | 'url': 'https://altcensored.com/channel/UC9CcJ96HKMWn0LZlcxlpFTw', | |
69 | 'info_dict': { | |
70 | 'title': 'yukikaze775', | |
71 | 'id': 'UC9CcJ96HKMWn0LZlcxlpFTw', | |
72 | }, | |
e28e135d | 73 | 'playlist_count': 4, |
74 | }, { | |
75 | 'url': 'https://altcensored.com/channel/UCfYbb7nga6-icsFWWgS-kWw', | |
76 | 'info_dict': { | |
77 | 'title': 'Mister Metokur', | |
78 | 'id': 'UCfYbb7nga6-icsFWWgS-kWw', | |
79 | }, | |
80 | 'playlist_count': 121, | |
3f90813f RD |
81 | }] |
82 | ||
83 | def _real_extract(self, url): | |
84 | channel_id = self._match_id(url) | |
85 | webpage = self._download_webpage( | |
86 | url, channel_id, 'Download channel webpage', 'Unable to get channel webpage') | |
87 | title = self._html_search_meta('altcen_title', webpage, 'title', fatal=False) | |
88 | page_count = int_or_none(self._html_search_regex( | |
e28e135d | 89 | r'<a[^>]+href="/channel/[\w-]+/page/(\d+)">(?:\1)</a>', |
3f90813f RD |
90 | webpage, 'page count', default='1')) |
91 | ||
92 | def page_func(page_num): | |
93 | page_num += 1 | |
94 | webpage = self._download_webpage( | |
95 | f'https://altcensored.com/channel/{channel_id}/page/{page_num}', | |
96 | channel_id, note=f'Downloading page {page_num}') | |
97 | ||
98 | items = re.findall(r'<a[^>]+href="(/watch\?v=[^"]+)', webpage) | |
99 | return [self.url_result(urljoin('https://www.altcensored.com', path), AltCensoredIE) | |
100 | for path in orderedSet(items)] | |
101 | ||
102 | return self.playlist_result( | |
103 | InAdvancePagedList(page_func, page_count, self._PAGE_SIZE), | |
104 | playlist_id=channel_id, playlist_title=title) |