]>
Commit | Line | Data |
---|---|---|
1 | import functools | |
2 | import re | |
3 | ||
4 | from .common import InfoExtractor | |
5 | from ..networking.exceptions import HTTPError | |
6 | from ..utils import ( | |
7 | ExtractorError, | |
8 | OnDemandPagedList, | |
9 | clean_html, | |
10 | extract_attributes, | |
11 | get_element_by_id, | |
12 | int_or_none, | |
13 | parse_count, | |
14 | parse_duration, | |
15 | unified_timestamp, | |
16 | url_or_none, | |
17 | urlencode_postdata, | |
18 | urljoin, | |
19 | ) | |
20 | from ..utils.traversal import traverse_obj | |
21 | ||
22 | ||
23 | class NewgroundsIE(InfoExtractor): | |
24 | _NETRC_MACHINE = 'newgrounds' | |
25 | _VALID_URL = r'https?://(?:www\.)?newgrounds\.com/(?:audio/listen|portal/view)/(?P<id>\d+)(?:/format/flash)?' | |
26 | _TESTS = [{ | |
27 | 'url': 'https://www.newgrounds.com/audio/listen/549479', | |
28 | 'md5': 'fe6033d297591288fa1c1f780386f07a', | |
29 | 'info_dict': { | |
30 | 'id': '549479', | |
31 | 'ext': 'mp3', | |
32 | 'title': 'B7 - BusMode', | |
33 | 'uploader': 'Burn7', | |
34 | 'timestamp': 1378892945, | |
35 | 'upload_date': '20130911', | |
36 | 'duration': 143, | |
37 | 'view_count': int, | |
38 | 'description': 'md5:b8b3c2958875189f07d8e313462e8c4f', | |
39 | 'age_limit': 0, | |
40 | 'thumbnail': r're:^https://aicon\.ngfiles\.com/549/549479\.png', | |
41 | }, | |
42 | }, { | |
43 | 'url': 'https://www.newgrounds.com/portal/view/1', | |
44 | 'md5': 'fbfb40e2dc765a7e830cb251d370d981', | |
45 | 'info_dict': { | |
46 | 'id': '1', | |
47 | 'ext': 'mp4', | |
48 | 'title': 'Scrotum 1', | |
49 | 'uploader': 'Brian-Beaton', | |
50 | 'timestamp': 955078533, | |
51 | 'upload_date': '20000407', | |
52 | 'view_count': int, | |
53 | 'description': 'Scrotum plays "catch."', | |
54 | 'age_limit': 17, | |
55 | 'thumbnail': r're:^https://picon\.ngfiles\.com/0/flash_1_card\.png', | |
56 | }, | |
57 | }, { | |
58 | # source format unavailable, additional mp4 formats | |
59 | 'url': 'http://www.newgrounds.com/portal/view/689400', | |
60 | 'info_dict': { | |
61 | 'id': '689400', | |
62 | 'ext': 'mp4', | |
63 | 'title': 'ZTV News Episode 8', | |
64 | 'uploader': 'ZONE-SAMA', | |
65 | 'timestamp': 1487983183, | |
66 | 'upload_date': '20170225', | |
67 | 'view_count': int, | |
68 | 'description': 'md5:aff9b330ec2e78ed93b1ad6d017accc6', | |
69 | 'age_limit': 17, | |
70 | 'thumbnail': r're:^https://picon\.ngfiles\.com/689000/flash_689400_card\.png', | |
71 | }, | |
72 | 'params': { | |
73 | 'skip_download': True, | |
74 | }, | |
75 | }, { | |
76 | 'url': 'https://www.newgrounds.com/portal/view/297383', | |
77 | 'md5': '2c11f5fd8cb6b433a63c89ba3141436c', | |
78 | 'info_dict': { | |
79 | 'id': '297383', | |
80 | 'ext': 'mp4', | |
81 | 'title': 'Metal Gear Awesome', | |
82 | 'uploader': 'Egoraptor', | |
83 | 'timestamp': 1140681292, | |
84 | 'upload_date': '20060223', | |
85 | 'view_count': int, | |
86 | 'description': 'md5:9246c181614e23754571995104da92e0', | |
87 | 'age_limit': 13, | |
88 | 'thumbnail': r're:^https://picon\.ngfiles\.com/297000/flash_297383_card\.png', | |
89 | }, | |
90 | }, { | |
91 | 'url': 'https://www.newgrounds.com/portal/view/297383/format/flash', | |
92 | 'md5': '5d05585a9a0caca059f5abfbd3865524', | |
93 | 'info_dict': { | |
94 | 'id': '297383', | |
95 | 'ext': 'swf', | |
96 | 'title': 'Metal Gear Awesome', | |
97 | 'description': 'Metal Gear Awesome', | |
98 | 'uploader': 'Egoraptor', | |
99 | 'upload_date': '20060223', | |
100 | 'timestamp': 1140681292, | |
101 | 'view_count': int, | |
102 | 'age_limit': 13, | |
103 | 'thumbnail': r're:^https://picon\.ngfiles\.com/297000/flash_297383_card\.png', | |
104 | }, | |
105 | }, { | |
106 | 'url': 'https://www.newgrounds.com/portal/view/823109', | |
107 | 'info_dict': { | |
108 | 'id': '823109', | |
109 | 'ext': 'mp4', | |
110 | 'title': 'Rouge Futa Fleshlight Fuck', | |
111 | 'description': 'I made a fleshlight model and I wanted to use it in an animation. Based on a video by CDNaturally.', | |
112 | 'uploader': 'DefaultUser12', | |
113 | 'upload_date': '20211122', | |
114 | 'timestamp': 1637611540, | |
115 | 'view_count': int, | |
116 | 'age_limit': 18, | |
117 | 'thumbnail': r're:^https://picon\.ngfiles\.com/823000/flash_823109_card\.png', | |
118 | }, | |
119 | }] | |
120 | _AGE_LIMIT = { | |
121 | 'e': 0, | |
122 | 't': 13, | |
123 | 'm': 17, | |
124 | 'a': 18, | |
125 | } | |
126 | _LOGIN_URL = 'https://www.newgrounds.com/passport' | |
127 | ||
128 | def _perform_login(self, username, password): | |
129 | login_webpage = self._download_webpage(self._LOGIN_URL, None, 'Downloading login page') | |
130 | login_url = urljoin(self._LOGIN_URL, self._search_regex( | |
131 | r'<form action="([^"]+)"', login_webpage, 'login endpoint', default=None)) | |
132 | result = self._download_json(login_url, None, 'Logging in', headers={ | |
133 | 'Accept': 'application/json', | |
134 | 'Referer': self._LOGIN_URL, | |
135 | 'X-Requested-With': 'XMLHttpRequest', | |
136 | }, data=urlencode_postdata({ | |
137 | **self._hidden_inputs(login_webpage), | |
138 | 'username': username, | |
139 | 'password': password, | |
140 | })) | |
141 | if errors := traverse_obj(result, ('errors', ..., {str})): | |
142 | raise ExtractorError(', '.join(errors) or 'Unknown Error', expected=True) | |
143 | ||
144 | def _real_extract(self, url): | |
145 | media_id = self._match_id(url) | |
146 | try: | |
147 | webpage = self._download_webpage(url, media_id) | |
148 | except ExtractorError as error: | |
149 | if isinstance(error.cause, HTTPError) and error.cause.status == 401: | |
150 | self.raise_login_required() | |
151 | raise | |
152 | ||
153 | media_url_string = self._search_regex( | |
154 | r'embedController\(\[{"url"\s*:\s*("[^"]+"),', webpage, 'media url', default=None) | |
155 | if media_url_string: | |
156 | uploader = None | |
157 | formats = [{ | |
158 | 'url': self._parse_json(media_url_string, media_id), | |
159 | 'format_id': 'source', | |
160 | 'quality': 1, | |
161 | }] | |
162 | ||
163 | else: | |
164 | json_video = self._download_json(f'https://www.newgrounds.com/portal/video/{media_id}', media_id, headers={ | |
165 | 'Accept': 'application/json', | |
166 | 'Referer': url, | |
167 | 'X-Requested-With': 'XMLHttpRequest', | |
168 | }) | |
169 | ||
170 | formats = [] | |
171 | uploader = traverse_obj(json_video, ('author', {str})) | |
172 | for format_id, sources in traverse_obj(json_video, ('sources', {dict.items}, ...)): | |
173 | quality = int_or_none(format_id[:-1]) | |
174 | formats.extend({ | |
175 | 'format_id': format_id, | |
176 | 'quality': quality, | |
177 | 'url': url, | |
178 | } for url in traverse_obj(sources, (..., 'src', {url_or_none}))) | |
179 | ||
180 | if not uploader: | |
181 | uploader = self._html_search_regex( | |
182 | (r'(?s)<h4[^>]*>(.+?)</h4>.*?<em>\s*(?:Author|Artist)\s*</em>', | |
183 | r'(?:Author|Writer)\s*<a[^>]+>([^<]+)'), webpage, 'uploader', | |
184 | fatal=False) | |
185 | ||
186 | if len(formats) == 1: | |
187 | formats[0]['filesize'] = int_or_none(self._html_search_regex( | |
188 | r'"filesize"\s*:\s*["\']?([\d]+)["\']?,', webpage, 'filesize', default=None)) | |
189 | ||
190 | video_type_description = self._html_search_regex( | |
191 | r'"description"\s*:\s*["\']?([^"\']+)["\']?,', webpage, 'media type', default=None) | |
192 | if video_type_description == 'Audio File': | |
193 | formats[0]['vcodec'] = 'none' | |
194 | ||
195 | self._check_formats(formats, media_id) | |
196 | return { | |
197 | 'id': media_id, | |
198 | 'title': self._html_extract_title(webpage), | |
199 | 'uploader': uploader, | |
200 | 'timestamp': unified_timestamp(self._search_regex( | |
201 | r'itemprop="(?:uploadDate|datePublished)"\s+content="([^"]+)"', | |
202 | webpage, 'timestamp', default=None)), | |
203 | 'duration': parse_duration(self._html_search_regex( | |
204 | r'"duration"\s*:\s*["\']?(\d+)["\']?', webpage, 'duration', default=None)), | |
205 | 'formats': formats, | |
206 | 'thumbnail': self._og_search_thumbnail(webpage), | |
207 | 'description': ( | |
208 | clean_html(get_element_by_id('author_comments', webpage)) | |
209 | or self._og_search_description(webpage)), | |
210 | 'age_limit': self._AGE_LIMIT.get(self._html_search_regex( | |
211 | r'<h2\s+class=["\']rated-([etma])["\']', webpage, 'age_limit', default='e')), | |
212 | 'view_count': parse_count(self._html_search_regex( | |
213 | r'(?s)<dt>\s*(?:Views|Listens)\s*</dt>\s*<dd>([\d\.,]+)</dd>', | |
214 | webpage, 'view count', default=None)), | |
215 | } | |
216 | ||
217 | ||
218 | class NewgroundsPlaylistIE(InfoExtractor): | |
219 | IE_NAME = 'Newgrounds:playlist' | |
220 | _VALID_URL = r'https?://(?:www\.)?newgrounds\.com/(?:collection|[^/]+/search/[^/]+)/(?P<id>[^/?#&]+)' | |
221 | _TESTS = [{ | |
222 | 'url': 'https://www.newgrounds.com/collection/cats', | |
223 | 'info_dict': { | |
224 | 'id': 'cats', | |
225 | 'title': 'Cats', | |
226 | }, | |
227 | 'playlist_mincount': 45, | |
228 | }, { | |
229 | 'url': 'https://www.newgrounds.com/collection/dogs', | |
230 | 'info_dict': { | |
231 | 'id': 'dogs', | |
232 | 'title': 'Dogs', | |
233 | }, | |
234 | 'playlist_mincount': 26, | |
235 | }, { | |
236 | 'url': 'http://www.newgrounds.com/audio/search/title/cats', | |
237 | 'only_matching': True, | |
238 | }] | |
239 | ||
240 | def _real_extract(self, url): | |
241 | playlist_id = self._match_id(url) | |
242 | ||
243 | webpage = self._download_webpage(url, playlist_id) | |
244 | ||
245 | title = self._html_extract_title(webpage, default=None) | |
246 | ||
247 | # cut left menu | |
248 | webpage = self._search_regex( | |
249 | r'(?s)<div[^>]+\bclass=["\']column wide(.+)', | |
250 | webpage, 'wide column', default=webpage) | |
251 | ||
252 | entries = [] | |
253 | for a, path, media_id in re.findall( | |
254 | r'(<a[^>]+\bhref=["\'][^"\']+((?:portal/view|audio/listen)/(\d+))[^>]+>)', | |
255 | webpage): | |
256 | a_class = extract_attributes(a).get('class') | |
257 | if a_class not in ('item-portalsubmission', 'item-audiosubmission'): | |
258 | continue | |
259 | entries.append( | |
260 | self.url_result( | |
261 | f'https://www.newgrounds.com/{path}', | |
262 | ie=NewgroundsIE.ie_key(), video_id=media_id)) | |
263 | ||
264 | return self.playlist_result(entries, playlist_id, title) | |
265 | ||
266 | ||
267 | class NewgroundsUserIE(InfoExtractor): | |
268 | IE_NAME = 'Newgrounds:user' | |
269 | _VALID_URL = r'https?://(?P<id>[^\.]+)\.newgrounds\.com/(?:movies|audio)/?(?:[#?]|$)' | |
270 | _TESTS = [{ | |
271 | 'url': 'https://burn7.newgrounds.com/audio', | |
272 | 'info_dict': { | |
273 | 'id': 'burn7', | |
274 | }, | |
275 | 'playlist_mincount': 150, | |
276 | }, { | |
277 | 'url': 'https://burn7.newgrounds.com/movies', | |
278 | 'info_dict': { | |
279 | 'id': 'burn7', | |
280 | }, | |
281 | 'playlist_mincount': 2, | |
282 | }, { | |
283 | 'url': 'https://brian-beaton.newgrounds.com/movies', | |
284 | 'info_dict': { | |
285 | 'id': 'brian-beaton', | |
286 | }, | |
287 | 'playlist_mincount': 10, | |
288 | }] | |
289 | _PAGE_SIZE = 30 | |
290 | ||
291 | def _fetch_page(self, channel_id, url, page): | |
292 | page += 1 | |
293 | posts_info = self._download_json( | |
294 | f'{url}?page={page}', channel_id, | |
295 | note=f'Downloading page {page}', headers={ | |
296 | 'Accept': 'application/json, text/javascript, */*; q = 0.01', | |
297 | 'X-Requested-With': 'XMLHttpRequest', | |
298 | }) | |
299 | for post in traverse_obj(posts_info, ('items', ..., ..., {str})): | |
300 | path, media_id = self._search_regex( | |
301 | r'<a[^>]+\bhref=["\'][^"\']+((?:portal/view|audio/listen)/(\d+))[^>]+>', | |
302 | post, 'url', group=(1, 2)) | |
303 | yield self.url_result(f'https://www.newgrounds.com/{path}', NewgroundsIE.ie_key(), media_id) | |
304 | ||
305 | def _real_extract(self, url): | |
306 | channel_id = self._match_id(url) | |
307 | ||
308 | entries = OnDemandPagedList(functools.partial( | |
309 | self._fetch_page, channel_id, url), self._PAGE_SIZE) | |
310 | ||
311 | return self.playlist_result(entries, channel_id) |