]>
Commit | Line | Data |
---|---|---|
1461d7be | 1 | import functools |
2 | import re | |
3 | ||
4 | from .common import InfoExtractor | |
5 | from ..utils import ( | |
b47d236d | 6 | clean_html, |
7 | get_element_by_class, | |
1461d7be | 8 | parse_count, |
b47d236d | 9 | remove_end, |
1461d7be | 10 | unified_strdate, |
11 | js_to_json, | |
12 | OnDemandPagedList, | |
13 | ) | |
14 | ||
15 | ||
16 | class TokentubeIE(InfoExtractor): | |
17 | _VALID_URL = r'https?://(?:www\.)?tokentube\.net/(?:view\?[vl]=|[vl]/)(?P<id>\d+)' | |
18 | _TESTS = [{ | |
19 | 'url': 'https://tokentube.net/l/3236632011/Praise-A-Thon-Pastori-Chrisin-ja-Pastori-Bennyn-kanssa-27-8-2021', | |
20 | 'info_dict': { | |
21 | 'id': '3236632011', | |
22 | 'ext': 'mp4', | |
23 | 'title': 'Praise-A-Thon Pastori Chrisin ja Pastori Bennyn kanssa 27.8.2021', | |
24 | 'description': '', | |
25 | 'uploader': 'Pastori Chris - Rapsodia.fi', | |
26 | 'upload_date': '20210827', | |
27 | }, | |
28 | 'params': { | |
29 | 'skip_download': True, | |
30 | }, | |
31 | }, { | |
32 | 'url': 'https://tokentube.net/v/3950239124/Linux-Ubuntu-Studio-perus-k%C3%A4ytt%C3%B6', | |
33 | 'md5': '0e1f00421f501f5eada9890d38fcfb56', | |
34 | 'info_dict': { | |
35 | 'id': '3950239124', | |
36 | 'ext': 'mp4', | |
37 | 'title': 'Linux Ubuntu Studio perus käyttö', | |
b47d236d | 38 | 'description': 'md5:46077d0daaba1974f2dc381257f9d64c', |
1461d7be | 39 | 'uploader': 'jyrilehtonen', |
40 | 'upload_date': '20210825', | |
41 | }, | |
42 | }, { | |
43 | 'url': 'https://tokentube.net/view?v=3582463289', | |
44 | 'info_dict': { | |
45 | 'id': '3582463289', | |
46 | 'ext': 'mp4', | |
47 | 'title': 'Police for Freedom - toiminta aloitetaan Suomessa ❤️??', | |
b47d236d | 48 | 'description': 'md5:37ebf1cb44264e0bf23ed98b337ee63e', |
1461d7be | 49 | 'uploader': 'Voitontie', |
50 | 'upload_date': '20210428', | |
51 | } | |
52 | }] | |
53 | ||
54 | def _real_extract(self, url): | |
55 | video_id = self._match_id(url) | |
56 | webpage = self._download_webpage(url, video_id) | |
57 | ||
58 | title = self._html_search_regex(r'<h1\s*class=["\']title-text["\']>(.+?)</h1>', webpage, 'title') | |
59 | ||
60 | data_json = self._html_search_regex(r'({["\']html5["\'].+?}}}+)', webpage, 'data json') | |
61 | data_json = self._parse_json(js_to_json(data_json), video_id, fatal=False) | |
62 | ||
63 | sources = data_json.get('sources') or self._parse_json( | |
64 | self._html_search_regex(r'updateSrc\(([^\)]+)\)', webpage, 'sources'), | |
65 | video_id, transform_source=js_to_json) | |
66 | ||
67 | formats = [{ | |
68 | 'url': format.get('src'), | |
69 | 'format_id': format.get('label'), | |
70 | 'height': format.get('res'), | |
71 | } for format in sources] | |
72 | ||
73 | view_count = parse_count(self._html_search_regex( | |
74 | r'<p\s*class=["\']views_counter["\']>\s*([\d\.,]+)\s*<span>views?</span></p>', | |
75 | webpage, 'view_count', fatal=False)) | |
76 | ||
77 | like_count = parse_count(self._html_search_regex( | |
78 | r'<div\s*class="sh_button\s*likes_count">\s*(\d+)\s*</div>', | |
79 | webpage, 'like count', fatal=False)) | |
80 | ||
81 | dislike_count = parse_count(self._html_search_regex( | |
82 | r'<div\s*class="sh_button\s*dislikes_count">\s*(\d+)\s*</div>', | |
83 | webpage, 'dislike count', fatal=False)) | |
84 | ||
85 | upload_date = unified_strdate(self._html_search_regex( | |
86 | r'<span\s*class="p-date">Published\s*on\s+([^<]+)', | |
87 | webpage, 'upload date', fatal=False)) | |
88 | ||
89 | uploader = self._html_search_regex( | |
90 | r'<a\s*class="place-left"[^>]+>(.+?)</a>', | |
91 | webpage, 'uploader', fatal=False) | |
92 | ||
b47d236d | 93 | description = (clean_html(get_element_by_class('p-d-txt', webpage)) |
94 | or self._html_search_meta(('og:description', 'description', 'twitter:description'), webpage)) | |
95 | ||
96 | description = remove_end(description, 'Category') | |
1461d7be | 97 | |
1461d7be | 98 | return { |
99 | 'id': video_id, | |
100 | 'formats': formats, | |
101 | 'title': title, | |
102 | 'view_count': view_count, | |
103 | 'like_count': like_count, | |
104 | 'dislike_count': dislike_count, | |
105 | 'upload_date': upload_date, | |
106 | 'description': description, | |
107 | 'uploader': uploader, | |
108 | } | |
109 | ||
110 | ||
111 | class TokentubeChannelIE(InfoExtractor): | |
112 | _PAGE_SIZE = 20 | |
113 | IE_NAME = 'Tokentube:channel' | |
114 | _VALID_URL = r'https?://(?:www\.)?tokentube\.net/channel/(?P<id>\d+)/[^/]+(?:/videos)?' | |
115 | _TESTS = [{ | |
116 | 'url': 'https://tokentube.net/channel/3697658904/TokenTube', | |
117 | 'info_dict': { | |
118 | 'id': '3697658904', | |
119 | }, | |
120 | 'playlist_mincount': 7, | |
121 | }, { | |
122 | 'url': 'https://tokentube.net/channel/3353234420/Linux/videos', | |
123 | 'info_dict': { | |
124 | 'id': '3353234420', | |
125 | }, | |
126 | 'playlist_mincount': 20, | |
127 | }, { | |
128 | 'url': 'https://tokentube.net/channel/3475834195/Voitontie', | |
129 | 'info_dict': { | |
130 | 'id': '3475834195', | |
131 | }, | |
132 | 'playlist_mincount': 150, | |
133 | }] | |
134 | ||
135 | def _fetch_page(self, channel_id, page): | |
136 | page += 1 | |
137 | videos_info = self._download_webpage( | |
138 | f'https://tokentube.net/videos?p=0&m=1&sort=recent&u={channel_id}&page={page}', | |
139 | channel_id, headers={'X-Requested-With': 'XMLHttpRequest'}, | |
140 | note=f'Downloading page {page}', fatal=False) | |
141 | if '</i> Sorry, no results were found.' not in videos_info: | |
142 | for path, media_id in re.findall( | |
143 | r'<a[^>]+\bhref=["\']([^"\']+/[lv]/(\d+)/\S+)["\'][^>]+>', | |
144 | videos_info): | |
145 | yield self.url_result(path, ie=TokentubeIE.ie_key(), video_id=media_id) | |
146 | ||
147 | def _real_extract(self, url): | |
148 | channel_id = self._match_id(url) | |
149 | ||
150 | entries = OnDemandPagedList(functools.partial( | |
151 | self._fetch_page, channel_id), self._PAGE_SIZE) | |
152 | ||
153 | return self.playlist_result(entries, channel_id) |