]>
Commit | Line | Data |
---|---|---|
02c7ae81 | 1 | import functools |
70e79672 S |
2 | import re |
3 | ||
eb03f4da | 4 | from .common import InfoExtractor |
70e79672 | 5 | from ..utils import ( |
2c5e8a96 | 6 | clean_html, |
70e79672 | 7 | extract_attributes, |
2c5e8a96 | 8 | get_element_by_id, |
70e79672 | 9 | int_or_none, |
36576d7c | 10 | parse_count, |
28a4d6cc | 11 | parse_duration, |
28a4d6cc | 12 | unified_timestamp, |
02c7ae81 | 13 | OnDemandPagedList, |
14 | try_get, | |
70e79672 | 15 | ) |
eb03f4da | 16 | |
d0ae9e3a | 17 | |
eb03f4da | 18 | class NewgroundsIE(InfoExtractor): |
36576d7c | 19 | _VALID_URL = r'https?://(?:www\.)?newgrounds\.com/(?:audio/listen|portal/view)/(?P<id>\d+)(?:/format/flash)?' |
0de968b5 | 20 | _TESTS = [{ |
6c3affcb | 21 | 'url': 'https://www.newgrounds.com/audio/listen/549479', |
bd2d82a5 PH |
22 | 'md5': 'fe6033d297591288fa1c1f780386f07a', |
23 | 'info_dict': { | |
d55433bb PH |
24 | 'id': '549479', |
25 | 'ext': 'mp3', | |
36576d7c | 26 | 'title': 'B7 - BusMode', |
d55433bb | 27 | 'uploader': 'Burn7', |
28a4d6cc S |
28 | 'timestamp': 1378878540, |
29 | 'upload_date': '20130911', | |
30 | 'duration': 143, | |
2c5e8a96 | 31 | 'view_count': int, |
32 | 'description': 'md5:b8b3c2958875189f07d8e313462e8c4f', | |
28a4d6cc | 33 | }, |
0de968b5 | 34 | }, { |
139e10ad | 35 | 'url': 'https://www.newgrounds.com/portal/view/1', |
36 | 'md5': 'fbfb40e2dc765a7e830cb251d370d981', | |
0de968b5 | 37 | 'info_dict': { |
139e10ad | 38 | 'id': '1', |
0de968b5 | 39 | 'ext': 'mp4', |
36576d7c | 40 | 'title': 'Scrotum 1', |
139e10ad | 41 | 'uploader': 'Brian-Beaton', |
42 | 'timestamp': 955064100, | |
43 | 'upload_date': '20000406', | |
2c5e8a96 | 44 | 'view_count': int, |
36576d7c | 45 | 'description': 'Scrotum plays "catch."', |
3001a84d | 46 | 'age_limit': 17, |
0de968b5 | 47 | }, |
6e999fbc S |
48 | }, { |
49 | # source format unavailable, additional mp4 formats | |
50 | 'url': 'http://www.newgrounds.com/portal/view/689400', | |
51 | 'info_dict': { | |
52 | 'id': '689400', | |
53 | 'ext': 'mp4', | |
36576d7c S |
54 | 'title': 'ZTV News Episode 8', |
55 | 'uploader': 'ZONE-SAMA', | |
28a4d6cc S |
56 | 'timestamp': 1487965140, |
57 | 'upload_date': '20170224', | |
2c5e8a96 | 58 | 'view_count': int, |
59 | 'description': 'md5:aff9b330ec2e78ed93b1ad6d017accc6', | |
3001a84d | 60 | 'age_limit': 17, |
6e999fbc S |
61 | }, |
62 | 'params': { | |
63 | 'skip_download': True, | |
64 | }, | |
36576d7c S |
65 | }, { |
66 | 'url': 'https://www.newgrounds.com/portal/view/297383', | |
67 | 'md5': '2c11f5fd8cb6b433a63c89ba3141436c', | |
68 | 'info_dict': { | |
69 | 'id': '297383', | |
70 | 'ext': 'mp4', | |
71 | 'title': 'Metal Gear Awesome', | |
72 | 'uploader': 'Egoraptor', | |
73 | 'timestamp': 1140663240, | |
74 | 'upload_date': '20060223', | |
2c5e8a96 | 75 | 'view_count': int, |
76 | 'description': 'md5:9246c181614e23754571995104da92e0', | |
3001a84d | 77 | 'age_limit': 13, |
36576d7c S |
78 | } |
79 | }, { | |
80 | 'url': 'https://www.newgrounds.com/portal/view/297383/format/flash', | |
81 | 'md5': '5d05585a9a0caca059f5abfbd3865524', | |
82 | 'info_dict': { | |
83 | 'id': '297383', | |
84 | 'ext': 'swf', | |
85 | 'title': 'Metal Gear Awesome', | |
2c5e8a96 | 86 | 'description': 'Metal Gear Awesome', |
36576d7c S |
87 | 'uploader': 'Egoraptor', |
88 | 'upload_date': '20060223', | |
89 | 'timestamp': 1140663240, | |
3001a84d | 90 | 'age_limit': 13, |
36576d7c | 91 | } |
0de968b5 | 92 | }] |
3001a84d | 93 | _AGE_LIMIT = { |
94 | 'e': 0, | |
95 | 't': 13, | |
96 | 'm': 17, | |
97 | 'a': 18, | |
98 | } | |
eb03f4da R |
99 | |
100 | def _real_extract(self, url): | |
6c3affcb | 101 | media_id = self._match_id(url) |
139e10ad | 102 | formats = [] |
103 | uploader = None | |
6c3affcb | 104 | webpage = self._download_webpage(url, media_id) |
5f6a1245 | 105 | |
04f3fd2c | 106 | title = self._html_extract_title(webpage) |
0de968b5 | 107 | |
139e10ad | 108 | media_url_string = self._search_regex( |
02c7ae81 | 109 | r'"url"\s*:\s*("[^"]+"),', webpage, 'media url', default=None) |
139e10ad | 110 | |
111 | if media_url_string: | |
112 | media_url = self._parse_json(media_url_string, media_id) | |
113 | formats = [{ | |
114 | 'url': media_url, | |
115 | 'format_id': 'source', | |
116 | 'quality': 1, | |
117 | }] | |
139e10ad | 118 | else: |
36576d7c | 119 | json_video = self._download_json('https://www.newgrounds.com/portal/video/' + media_id, media_id, headers={ |
139e10ad | 120 | 'Accept': 'application/json', |
121 | 'Referer': url, | |
122 | 'X-Requested-With': 'XMLHttpRequest' | |
36576d7c | 123 | }) |
139e10ad | 124 | |
125 | uploader = json_video.get('author') | |
139e10ad | 126 | media_formats = json_video.get('sources', []) |
127 | for media_format in media_formats: | |
128 | media_sources = media_formats[media_format] | |
129 | for source in media_sources: | |
130 | formats.append({ | |
131 | 'format_id': media_format, | |
132 | 'quality': int_or_none(media_format[:-1]), | |
133 | 'url': source.get('src') | |
134 | }) | |
5f6a1245 | 135 | |
139e10ad | 136 | if not uploader: |
137 | uploader = self._html_search_regex( | |
138 | (r'(?s)<h4[^>]*>(.+?)</h4>.*?<em>\s*(?:Author|Artist)\s*</em>', | |
139 | r'(?:Author|Writer)\s*<a[^>]+>([^<]+)'), webpage, 'uploader', | |
140 | fatal=False) | |
eb03f4da | 141 | |
3001a84d | 142 | age_limit = self._html_search_regex( |
143 | r'<h2\s*class=["\']rated-([^"\'])["\'][^>]+>', webpage, 'age_limit', default='e') | |
144 | age_limit = self._AGE_LIMIT.get(age_limit) | |
145 | ||
9e167e1e S |
146 | timestamp = unified_timestamp(self._html_search_regex( |
147 | (r'<dt>\s*Uploaded\s*</dt>\s*<dd>([^<]+</dd>\s*<dd>[^<]+)', | |
148 | r'<dt>\s*Uploaded\s*</dt>\s*<dd>([^<]+)'), webpage, 'timestamp', | |
28a4d6cc | 149 | default=None)) |
2c5e8a96 | 150 | |
02c7ae81 | 151 | duration = parse_duration(self._html_search_regex( |
3001a84d | 152 | r'"duration"\s*:\s*["\']?(\d+)["\']?', webpage, |
9e167e1e | 153 | 'duration', default=None)) |
28a4d6cc | 154 | |
2c5e8a96 | 155 | description = clean_html(get_element_by_id('author_comments', webpage)) or self._og_search_description(webpage) |
156 | ||
02c7ae81 | 157 | view_count = parse_count(self._html_search_regex( |
e99b2d27 | 158 | r'(?s)<dt>\s*(?:Views|Listens)\s*</dt>\s*<dd>([\d\.,]+)</dd>', webpage, |
02c7ae81 | 159 | 'view count', default=None)) |
36576d7c | 160 | |
02c7ae81 | 161 | filesize = int_or_none(self._html_search_regex( |
162 | r'"filesize"\s*:\s*["\']?([\d]+)["\']?,', webpage, 'filesize', | |
28a4d6cc | 163 | default=None)) |
02c7ae81 | 164 | |
165 | video_type_description = self._html_search_regex( | |
166 | r'"description"\s*:\s*["\']?([^"\']+)["\']?,', webpage, 'filesize', | |
167 | default=None) | |
168 | ||
28a4d6cc | 169 | if len(formats) == 1: |
02c7ae81 | 170 | formats[0]['filesize'] = filesize |
28a4d6cc | 171 | |
02c7ae81 | 172 | if video_type_description == 'Audio File': |
28a4d6cc | 173 | formats[0]['vcodec'] = 'none' |
36576d7c | 174 | self._check_formats(formats, media_id) |
139e10ad | 175 | |
d0ae9e3a | 176 | return { |
6c3affcb | 177 | 'id': media_id, |
bd2d82a5 | 178 | 'title': title, |
eb03f4da | 179 | 'uploader': uploader, |
28a4d6cc S |
180 | 'timestamp': timestamp, |
181 | 'duration': duration, | |
6e999fbc | 182 | 'formats': formats, |
36576d7c | 183 | 'thumbnail': self._og_search_thumbnail(webpage), |
2c5e8a96 | 184 | 'description': description, |
3001a84d | 185 | 'age_limit': age_limit, |
36576d7c | 186 | 'view_count': view_count, |
d0ae9e3a | 187 | } |
70e79672 S |
188 | |
189 | ||
190 | class NewgroundsPlaylistIE(InfoExtractor): | |
02c7ae81 | 191 | IE_NAME = 'Newgrounds:playlist' |
70e79672 S |
192 | _VALID_URL = r'https?://(?:www\.)?newgrounds\.com/(?:collection|[^/]+/search/[^/]+)/(?P<id>[^/?#&]+)' |
193 | _TESTS = [{ | |
194 | 'url': 'https://www.newgrounds.com/collection/cats', | |
195 | 'info_dict': { | |
196 | 'id': 'cats', | |
197 | 'title': 'Cats', | |
198 | }, | |
36576d7c | 199 | 'playlist_mincount': 45, |
70e79672 | 200 | }, { |
36576d7c | 201 | 'url': 'https://www.newgrounds.com/collection/dogs', |
70e79672 | 202 | 'info_dict': { |
36576d7c S |
203 | 'id': 'dogs', |
204 | 'title': 'Dogs', | |
70e79672 | 205 | }, |
36576d7c | 206 | 'playlist_mincount': 26, |
70e79672 S |
207 | }, { |
208 | 'url': 'http://www.newgrounds.com/audio/search/title/cats', | |
209 | 'only_matching': True, | |
210 | }] | |
211 | ||
212 | def _real_extract(self, url): | |
213 | playlist_id = self._match_id(url) | |
214 | ||
215 | webpage = self._download_webpage(url, playlist_id) | |
216 | ||
04f3fd2c | 217 | title = self._html_extract_title(webpage, default=None) |
70e79672 S |
218 | |
219 | # cut left menu | |
220 | webpage = self._search_regex( | |
221 | r'(?s)<div[^>]+\bclass=["\']column wide(.+)', | |
222 | webpage, 'wide column', default=webpage) | |
223 | ||
224 | entries = [] | |
225 | for a, path, media_id in re.findall( | |
36576d7c | 226 | r'(<a[^>]+\bhref=["\'][^"\']+((?:portal/view|audio/listen)/(\d+))[^>]+>)', |
70e79672 S |
227 | webpage): |
228 | a_class = extract_attributes(a).get('class') | |
229 | if a_class not in ('item-portalsubmission', 'item-audiosubmission'): | |
230 | continue | |
231 | entries.append( | |
232 | self.url_result( | |
02c7ae81 | 233 | f'https://www.newgrounds.com/{path}', |
70e79672 S |
234 | ie=NewgroundsIE.ie_key(), video_id=media_id)) |
235 | ||
236 | return self.playlist_result(entries, playlist_id, title) | |
02c7ae81 | 237 | |
238 | ||
239 | class NewgroundsUserIE(InfoExtractor): | |
240 | IE_NAME = 'Newgrounds:user' | |
241 | _VALID_URL = r'https?://(?P<id>[^\.]+)\.newgrounds\.com/(?:movies|audio)/?(?:[#?]|$)' | |
242 | _TESTS = [{ | |
243 | 'url': 'https://burn7.newgrounds.com/audio', | |
244 | 'info_dict': { | |
245 | 'id': 'burn7', | |
246 | }, | |
247 | 'playlist_mincount': 150, | |
248 | }, { | |
249 | 'url': 'https://burn7.newgrounds.com/movies', | |
250 | 'info_dict': { | |
251 | 'id': 'burn7', | |
252 | }, | |
253 | 'playlist_mincount': 2, | |
254 | }, { | |
255 | 'url': 'https://brian-beaton.newgrounds.com/movies', | |
256 | 'info_dict': { | |
257 | 'id': 'brian-beaton', | |
258 | }, | |
259 | 'playlist_mincount': 10, | |
260 | }] | |
261 | _PAGE_SIZE = 30 | |
262 | ||
263 | def _fetch_page(self, channel_id, url, page): | |
264 | page += 1 | |
265 | posts_info = self._download_json( | |
266 | f'{url}/page/{page}', channel_id, | |
267 | note=f'Downloading page {page}', headers={ | |
268 | 'Accept': 'application/json, text/javascript, */*; q = 0.01', | |
269 | 'X-Requested-With': 'XMLHttpRequest', | |
270 | }) | |
271 | sequence = posts_info.get('sequence', []) | |
272 | for year in sequence: | |
273 | posts = try_get(posts_info, lambda x: x['years'][str(year)]['items']) | |
274 | for post in posts: | |
275 | path, media_id = self._search_regex( | |
276 | r'<a[^>]+\bhref=["\'][^"\']+((?:portal/view|audio/listen)/(\d+))[^>]+>', | |
277 | post, 'url', group=(1, 2)) | |
278 | yield self.url_result(f'https://www.newgrounds.com/{path}', NewgroundsIE.ie_key(), media_id) | |
279 | ||
280 | def _real_extract(self, url): | |
281 | channel_id = self._match_id(url) | |
282 | ||
283 | entries = OnDemandPagedList(functools.partial( | |
284 | self._fetch_page, channel_id, url), self._PAGE_SIZE) | |
285 | ||
286 | return self.playlist_result(entries, channel_id) |