]>
Commit | Line | Data |
---|---|---|
02c7ae81 | 1 | # coding: utf-8 |
bd2d82a5 PH |
2 | from __future__ import unicode_literals |
3 | ||
02c7ae81 | 4 | import functools |
70e79672 S |
5 | import re |
6 | ||
eb03f4da | 7 | from .common import InfoExtractor |
70e79672 | 8 | from ..utils import ( |
2c5e8a96 | 9 | clean_html, |
70e79672 | 10 | extract_attributes, |
2c5e8a96 | 11 | get_element_by_id, |
70e79672 | 12 | int_or_none, |
36576d7c | 13 | parse_count, |
28a4d6cc | 14 | parse_duration, |
28a4d6cc | 15 | unified_timestamp, |
02c7ae81 | 16 | OnDemandPagedList, |
17 | try_get, | |
70e79672 | 18 | ) |
eb03f4da | 19 | |
d0ae9e3a | 20 | |
eb03f4da | 21 | class NewgroundsIE(InfoExtractor): |
36576d7c | 22 | _VALID_URL = r'https?://(?:www\.)?newgrounds\.com/(?:audio/listen|portal/view)/(?P<id>\d+)(?:/format/flash)?' |
0de968b5 | 23 | _TESTS = [{ |
6c3affcb | 24 | 'url': 'https://www.newgrounds.com/audio/listen/549479', |
bd2d82a5 PH |
25 | 'md5': 'fe6033d297591288fa1c1f780386f07a', |
26 | 'info_dict': { | |
d55433bb PH |
27 | 'id': '549479', |
28 | 'ext': 'mp3', | |
36576d7c | 29 | 'title': 'B7 - BusMode', |
d55433bb | 30 | 'uploader': 'Burn7', |
28a4d6cc S |
31 | 'timestamp': 1378878540, |
32 | 'upload_date': '20130911', | |
33 | 'duration': 143, | |
2c5e8a96 | 34 | 'view_count': int, |
35 | 'description': 'md5:b8b3c2958875189f07d8e313462e8c4f', | |
28a4d6cc | 36 | }, |
0de968b5 | 37 | }, { |
139e10ad | 38 | 'url': 'https://www.newgrounds.com/portal/view/1', |
39 | 'md5': 'fbfb40e2dc765a7e830cb251d370d981', | |
0de968b5 | 40 | 'info_dict': { |
139e10ad | 41 | 'id': '1', |
0de968b5 | 42 | 'ext': 'mp4', |
36576d7c | 43 | 'title': 'Scrotum 1', |
139e10ad | 44 | 'uploader': 'Brian-Beaton', |
45 | 'timestamp': 955064100, | |
46 | 'upload_date': '20000406', | |
2c5e8a96 | 47 | 'view_count': int, |
36576d7c | 48 | 'description': 'Scrotum plays "catch."', |
3001a84d | 49 | 'age_limit': 17, |
0de968b5 | 50 | }, |
6e999fbc S |
51 | }, { |
52 | # source format unavailable, additional mp4 formats | |
53 | 'url': 'http://www.newgrounds.com/portal/view/689400', | |
54 | 'info_dict': { | |
55 | 'id': '689400', | |
56 | 'ext': 'mp4', | |
36576d7c S |
57 | 'title': 'ZTV News Episode 8', |
58 | 'uploader': 'ZONE-SAMA', | |
28a4d6cc S |
59 | 'timestamp': 1487965140, |
60 | 'upload_date': '20170224', | |
2c5e8a96 | 61 | 'view_count': int, |
62 | 'description': 'md5:aff9b330ec2e78ed93b1ad6d017accc6', | |
3001a84d | 63 | 'age_limit': 17, |
6e999fbc S |
64 | }, |
65 | 'params': { | |
66 | 'skip_download': True, | |
67 | }, | |
36576d7c S |
68 | }, { |
69 | 'url': 'https://www.newgrounds.com/portal/view/297383', | |
70 | 'md5': '2c11f5fd8cb6b433a63c89ba3141436c', | |
71 | 'info_dict': { | |
72 | 'id': '297383', | |
73 | 'ext': 'mp4', | |
74 | 'title': 'Metal Gear Awesome', | |
75 | 'uploader': 'Egoraptor', | |
76 | 'timestamp': 1140663240, | |
77 | 'upload_date': '20060223', | |
2c5e8a96 | 78 | 'view_count': int, |
79 | 'description': 'md5:9246c181614e23754571995104da92e0', | |
3001a84d | 80 | 'age_limit': 13, |
36576d7c S |
81 | } |
82 | }, { | |
83 | 'url': 'https://www.newgrounds.com/portal/view/297383/format/flash', | |
84 | 'md5': '5d05585a9a0caca059f5abfbd3865524', | |
85 | 'info_dict': { | |
86 | 'id': '297383', | |
87 | 'ext': 'swf', | |
88 | 'title': 'Metal Gear Awesome', | |
2c5e8a96 | 89 | 'description': 'Metal Gear Awesome', |
36576d7c S |
90 | 'uploader': 'Egoraptor', |
91 | 'upload_date': '20060223', | |
92 | 'timestamp': 1140663240, | |
3001a84d | 93 | 'age_limit': 13, |
36576d7c | 94 | } |
0de968b5 | 95 | }] |
3001a84d | 96 | _AGE_LIMIT = { |
97 | 'e': 0, | |
98 | 't': 13, | |
99 | 'm': 17, | |
100 | 'a': 18, | |
101 | } | |
eb03f4da R |
102 | |
103 | def _real_extract(self, url): | |
6c3affcb | 104 | media_id = self._match_id(url) |
139e10ad | 105 | formats = [] |
106 | uploader = None | |
6c3affcb | 107 | webpage = self._download_webpage(url, media_id) |
5f6a1245 | 108 | |
bd2d82a5 | 109 | title = self._html_search_regex( |
02c7ae81 | 110 | r'<title>(.+?)</title>', webpage, 'title') |
0de968b5 | 111 | |
139e10ad | 112 | media_url_string = self._search_regex( |
02c7ae81 | 113 | r'"url"\s*:\s*("[^"]+"),', webpage, 'media url', default=None) |
139e10ad | 114 | |
115 | if media_url_string: | |
116 | media_url = self._parse_json(media_url_string, media_id) | |
117 | formats = [{ | |
118 | 'url': media_url, | |
119 | 'format_id': 'source', | |
120 | 'quality': 1, | |
121 | }] | |
139e10ad | 122 | else: |
36576d7c | 123 | json_video = self._download_json('https://www.newgrounds.com/portal/video/' + media_id, media_id, headers={ |
139e10ad | 124 | 'Accept': 'application/json', |
125 | 'Referer': url, | |
126 | 'X-Requested-With': 'XMLHttpRequest' | |
36576d7c | 127 | }) |
139e10ad | 128 | |
129 | uploader = json_video.get('author') | |
139e10ad | 130 | media_formats = json_video.get('sources', []) |
131 | for media_format in media_formats: | |
132 | media_sources = media_formats[media_format] | |
133 | for source in media_sources: | |
134 | formats.append({ | |
135 | 'format_id': media_format, | |
136 | 'quality': int_or_none(media_format[:-1]), | |
137 | 'url': source.get('src') | |
138 | }) | |
5f6a1245 | 139 | |
139e10ad | 140 | if not uploader: |
141 | uploader = self._html_search_regex( | |
142 | (r'(?s)<h4[^>]*>(.+?)</h4>.*?<em>\s*(?:Author|Artist)\s*</em>', | |
143 | r'(?:Author|Writer)\s*<a[^>]+>([^<]+)'), webpage, 'uploader', | |
144 | fatal=False) | |
eb03f4da | 145 | |
3001a84d | 146 | age_limit = self._html_search_regex( |
147 | r'<h2\s*class=["\']rated-([^"\'])["\'][^>]+>', webpage, 'age_limit', default='e') | |
148 | age_limit = self._AGE_LIMIT.get(age_limit) | |
149 | ||
9e167e1e S |
150 | timestamp = unified_timestamp(self._html_search_regex( |
151 | (r'<dt>\s*Uploaded\s*</dt>\s*<dd>([^<]+</dd>\s*<dd>[^<]+)', | |
152 | r'<dt>\s*Uploaded\s*</dt>\s*<dd>([^<]+)'), webpage, 'timestamp', | |
28a4d6cc | 153 | default=None)) |
2c5e8a96 | 154 | |
02c7ae81 | 155 | duration = parse_duration(self._html_search_regex( |
3001a84d | 156 | r'"duration"\s*:\s*["\']?(\d+)["\']?', webpage, |
9e167e1e | 157 | 'duration', default=None)) |
28a4d6cc | 158 | |
2c5e8a96 | 159 | description = clean_html(get_element_by_id('author_comments', webpage)) or self._og_search_description(webpage) |
160 | ||
02c7ae81 | 161 | view_count = parse_count(self._html_search_regex( |
e99b2d27 | 162 | r'(?s)<dt>\s*(?:Views|Listens)\s*</dt>\s*<dd>([\d\.,]+)</dd>', webpage, |
02c7ae81 | 163 | 'view count', default=None)) |
36576d7c | 164 | |
02c7ae81 | 165 | filesize = int_or_none(self._html_search_regex( |
166 | r'"filesize"\s*:\s*["\']?([\d]+)["\']?,', webpage, 'filesize', | |
28a4d6cc | 167 | default=None)) |
02c7ae81 | 168 | |
169 | video_type_description = self._html_search_regex( | |
170 | r'"description"\s*:\s*["\']?([^"\']+)["\']?,', webpage, 'filesize', | |
171 | default=None) | |
172 | ||
28a4d6cc | 173 | if len(formats) == 1: |
02c7ae81 | 174 | formats[0]['filesize'] = filesize |
28a4d6cc | 175 | |
02c7ae81 | 176 | if video_type_description == 'Audio File': |
28a4d6cc | 177 | formats[0]['vcodec'] = 'none' |
36576d7c S |
178 | self._check_formats(formats, media_id) |
179 | self._sort_formats(formats) | |
139e10ad | 180 | |
d0ae9e3a | 181 | return { |
6c3affcb | 182 | 'id': media_id, |
bd2d82a5 | 183 | 'title': title, |
eb03f4da | 184 | 'uploader': uploader, |
28a4d6cc S |
185 | 'timestamp': timestamp, |
186 | 'duration': duration, | |
6e999fbc | 187 | 'formats': formats, |
36576d7c | 188 | 'thumbnail': self._og_search_thumbnail(webpage), |
2c5e8a96 | 189 | 'description': description, |
3001a84d | 190 | 'age_limit': age_limit, |
36576d7c | 191 | 'view_count': view_count, |
d0ae9e3a | 192 | } |
70e79672 S |
193 | |
194 | ||
195 | class NewgroundsPlaylistIE(InfoExtractor): | |
02c7ae81 | 196 | IE_NAME = 'Newgrounds:playlist' |
70e79672 S |
197 | _VALID_URL = r'https?://(?:www\.)?newgrounds\.com/(?:collection|[^/]+/search/[^/]+)/(?P<id>[^/?#&]+)' |
198 | _TESTS = [{ | |
199 | 'url': 'https://www.newgrounds.com/collection/cats', | |
200 | 'info_dict': { | |
201 | 'id': 'cats', | |
202 | 'title': 'Cats', | |
203 | }, | |
36576d7c | 204 | 'playlist_mincount': 45, |
70e79672 | 205 | }, { |
36576d7c | 206 | 'url': 'https://www.newgrounds.com/collection/dogs', |
70e79672 | 207 | 'info_dict': { |
36576d7c S |
208 | 'id': 'dogs', |
209 | 'title': 'Dogs', | |
70e79672 | 210 | }, |
36576d7c | 211 | 'playlist_mincount': 26, |
70e79672 S |
212 | }, { |
213 | 'url': 'http://www.newgrounds.com/audio/search/title/cats', | |
214 | 'only_matching': True, | |
215 | }] | |
216 | ||
217 | def _real_extract(self, url): | |
218 | playlist_id = self._match_id(url) | |
219 | ||
220 | webpage = self._download_webpage(url, playlist_id) | |
221 | ||
222 | title = self._search_regex( | |
223 | r'<title>([^>]+)</title>', webpage, 'title', default=None) | |
224 | ||
225 | # cut left menu | |
226 | webpage = self._search_regex( | |
227 | r'(?s)<div[^>]+\bclass=["\']column wide(.+)', | |
228 | webpage, 'wide column', default=webpage) | |
229 | ||
230 | entries = [] | |
231 | for a, path, media_id in re.findall( | |
36576d7c | 232 | r'(<a[^>]+\bhref=["\'][^"\']+((?:portal/view|audio/listen)/(\d+))[^>]+>)', |
70e79672 S |
233 | webpage): |
234 | a_class = extract_attributes(a).get('class') | |
235 | if a_class not in ('item-portalsubmission', 'item-audiosubmission'): | |
236 | continue | |
237 | entries.append( | |
238 | self.url_result( | |
02c7ae81 | 239 | f'https://www.newgrounds.com/{path}', |
70e79672 S |
240 | ie=NewgroundsIE.ie_key(), video_id=media_id)) |
241 | ||
242 | return self.playlist_result(entries, playlist_id, title) | |
02c7ae81 | 243 | |
244 | ||
245 | class NewgroundsUserIE(InfoExtractor): | |
246 | IE_NAME = 'Newgrounds:user' | |
247 | _VALID_URL = r'https?://(?P<id>[^\.]+)\.newgrounds\.com/(?:movies|audio)/?(?:[#?]|$)' | |
248 | _TESTS = [{ | |
249 | 'url': 'https://burn7.newgrounds.com/audio', | |
250 | 'info_dict': { | |
251 | 'id': 'burn7', | |
252 | }, | |
253 | 'playlist_mincount': 150, | |
254 | }, { | |
255 | 'url': 'https://burn7.newgrounds.com/movies', | |
256 | 'info_dict': { | |
257 | 'id': 'burn7', | |
258 | }, | |
259 | 'playlist_mincount': 2, | |
260 | }, { | |
261 | 'url': 'https://brian-beaton.newgrounds.com/movies', | |
262 | 'info_dict': { | |
263 | 'id': 'brian-beaton', | |
264 | }, | |
265 | 'playlist_mincount': 10, | |
266 | }] | |
267 | _PAGE_SIZE = 30 | |
268 | ||
269 | def _fetch_page(self, channel_id, url, page): | |
270 | page += 1 | |
271 | posts_info = self._download_json( | |
272 | f'{url}/page/{page}', channel_id, | |
273 | note=f'Downloading page {page}', headers={ | |
274 | 'Accept': 'application/json, text/javascript, */*; q = 0.01', | |
275 | 'X-Requested-With': 'XMLHttpRequest', | |
276 | }) | |
277 | sequence = posts_info.get('sequence', []) | |
278 | for year in sequence: | |
279 | posts = try_get(posts_info, lambda x: x['years'][str(year)]['items']) | |
280 | for post in posts: | |
281 | path, media_id = self._search_regex( | |
282 | r'<a[^>]+\bhref=["\'][^"\']+((?:portal/view|audio/listen)/(\d+))[^>]+>', | |
283 | post, 'url', group=(1, 2)) | |
284 | yield self.url_result(f'https://www.newgrounds.com/{path}', NewgroundsIE.ie_key(), media_id) | |
285 | ||
286 | def _real_extract(self, url): | |
287 | channel_id = self._match_id(url) | |
288 | ||
289 | entries = OnDemandPagedList(functools.partial( | |
290 | self._fetch_page, channel_id, url), self._PAGE_SIZE) | |
291 | ||
292 | return self.playlist_result(entries, channel_id) |