]>
Commit | Line | Data |
---|---|---|
bd2d82a5 PH |
1 | from __future__ import unicode_literals |
2 | ||
70e79672 S |
3 | import re |
4 | ||
eb03f4da | 5 | from .common import InfoExtractor |
70e79672 | 6 | from ..utils import ( |
139e10ad | 7 | ExtractorError, |
70e79672 S |
8 | extract_attributes, |
9 | int_or_none, | |
28a4d6cc S |
10 | parse_duration, |
11 | parse_filesize, | |
12 | unified_timestamp, | |
70e79672 | 13 | ) |
eb03f4da | 14 | |
d0ae9e3a | 15 | |
eb03f4da | 16 | class NewgroundsIE(InfoExtractor): |
0de968b5 YCH |
17 | _VALID_URL = r'https?://(?:www\.)?newgrounds\.com/(?:audio/listen|portal/view)/(?P<id>[0-9]+)' |
18 | _TESTS = [{ | |
6c3affcb | 19 | 'url': 'https://www.newgrounds.com/audio/listen/549479', |
bd2d82a5 PH |
20 | 'md5': 'fe6033d297591288fa1c1f780386f07a', |
21 | 'info_dict': { | |
d55433bb PH |
22 | 'id': '549479', |
23 | 'ext': 'mp3', | |
139e10ad | 24 | 'title': 'Burn7 - B7 - BusMode', |
d55433bb | 25 | 'uploader': 'Burn7', |
28a4d6cc S |
26 | 'timestamp': 1378878540, |
27 | 'upload_date': '20130911', | |
28 | 'duration': 143, | |
29 | }, | |
0de968b5 | 30 | }, { |
139e10ad | 31 | 'url': 'https://www.newgrounds.com/portal/view/1', |
32 | 'md5': 'fbfb40e2dc765a7e830cb251d370d981', | |
0de968b5 | 33 | 'info_dict': { |
139e10ad | 34 | 'id': '1', |
0de968b5 | 35 | 'ext': 'mp4', |
139e10ad | 36 | 'title': 'Brian-Beaton - Scrotum 1', |
37 | 'uploader': 'Brian-Beaton', | |
38 | 'timestamp': 955064100, | |
39 | 'upload_date': '20000406', | |
0de968b5 | 40 | }, |
6e999fbc S |
41 | }, { |
42 | # source format unavailable, additional mp4 formats | |
43 | 'url': 'http://www.newgrounds.com/portal/view/689400', | |
44 | 'info_dict': { | |
45 | 'id': '689400', | |
46 | 'ext': 'mp4', | |
139e10ad | 47 | 'title': 'Bennettthesage - ZTV News Episode 8', |
6e999fbc | 48 | 'uploader': 'BennettTheSage', |
28a4d6cc S |
49 | 'timestamp': 1487965140, |
50 | 'upload_date': '20170224', | |
6e999fbc S |
51 | }, |
52 | 'params': { | |
53 | 'skip_download': True, | |
54 | }, | |
0de968b5 | 55 | }] |
eb03f4da R |
56 | |
57 | def _real_extract(self, url): | |
6c3affcb | 58 | media_id = self._match_id(url) |
139e10ad | 59 | formats = [] |
60 | uploader = None | |
6c3affcb | 61 | webpage = self._download_webpage(url, media_id) |
5f6a1245 | 62 | |
bd2d82a5 | 63 | title = self._html_search_regex( |
0de968b5 YCH |
64 | r'<title>([^>]+)</title>', webpage, 'title') |
65 | ||
139e10ad | 66 | media_url_string = self._search_regex( |
67 | r'"url"\s*:\s*("[^"]+"),', webpage, 'media url', default=None, fatal=False) | |
68 | ||
69 | if media_url_string: | |
70 | media_url = self._parse_json(media_url_string, media_id) | |
71 | formats = [{ | |
72 | 'url': media_url, | |
73 | 'format_id': 'source', | |
74 | 'quality': 1, | |
75 | }] | |
76 | ||
77 | max_resolution = int_or_none(self._search_regex( | |
78 | r'max_resolution["\']\s*:\s*(\d+)', webpage, 'max resolution', | |
79 | default=None)) | |
80 | if max_resolution: | |
81 | url_base = media_url.rpartition('.')[0] | |
82 | for resolution in (360, 720, 1080): | |
83 | if resolution > max_resolution: | |
84 | break | |
85 | formats.append({ | |
86 | 'url': '%s.%dp.mp4' % (url_base, resolution), | |
87 | 'format_id': '%dp' % resolution, | |
88 | 'height': resolution, | |
89 | }) | |
90 | else: | |
91 | video_id = int_or_none(self._search_regex( | |
92 | r'data-movie-id=\\"([0-9]+)\\"', webpage, '')) | |
93 | if not video_id: | |
94 | raise ExtractorError('Could not extract media data') | |
95 | ||
96 | url_video_data = 'https://www.newgrounds.com/portal/video/%s' % video_id | |
97 | headers = { | |
98 | 'Accept': 'application/json', | |
99 | 'Referer': url, | |
100 | 'X-Requested-With': 'XMLHttpRequest' | |
101 | } | |
102 | json_video = self._download_json(url_video_data, video_id, headers=headers, fatal=False) | |
103 | if not json_video: | |
104 | raise ExtractorError('Could not fetch media data') | |
105 | ||
106 | uploader = json_video.get('author') | |
107 | title = json_video.get('title') | |
108 | media_formats = json_video.get('sources', []) | |
109 | for media_format in media_formats: | |
110 | media_sources = media_formats[media_format] | |
111 | for source in media_sources: | |
112 | formats.append({ | |
113 | 'format_id': media_format, | |
114 | 'quality': int_or_none(media_format[:-1]), | |
115 | 'url': source.get('src') | |
116 | }) | |
5f6a1245 | 117 | |
6e999fbc S |
118 | self._check_formats(formats, media_id) |
119 | self._sort_formats(formats) | |
120 | ||
139e10ad | 121 | if not uploader: |
122 | uploader = self._html_search_regex( | |
123 | (r'(?s)<h4[^>]*>(.+?)</h4>.*?<em>\s*(?:Author|Artist)\s*</em>', | |
124 | r'(?:Author|Writer)\s*<a[^>]+>([^<]+)'), webpage, 'uploader', | |
125 | fatal=False) | |
eb03f4da | 126 | |
9e167e1e S |
127 | timestamp = unified_timestamp(self._html_search_regex( |
128 | (r'<dt>\s*Uploaded\s*</dt>\s*<dd>([^<]+</dd>\s*<dd>[^<]+)', | |
129 | r'<dt>\s*Uploaded\s*</dt>\s*<dd>([^<]+)'), webpage, 'timestamp', | |
28a4d6cc S |
130 | default=None)) |
131 | duration = parse_duration(self._search_regex( | |
9e167e1e S |
132 | r'(?s)<dd>\s*Song\s*</dd>\s*<dd>.+?</dd>\s*<dd>([^<]+)', webpage, |
133 | 'duration', default=None)) | |
28a4d6cc S |
134 | |
135 | filesize_approx = parse_filesize(self._html_search_regex( | |
9e167e1e | 136 | r'(?s)<dd>\s*Song\s*</dd>\s*<dd>(.+?)</dd>', webpage, 'filesize', |
28a4d6cc S |
137 | default=None)) |
138 | if len(formats) == 1: | |
139 | formats[0]['filesize_approx'] = filesize_approx | |
140 | ||
141 | if '<dd>Song' in webpage: | |
142 | formats[0]['vcodec'] = 'none' | |
143 | ||
139e10ad | 144 | if uploader: |
145 | title = "%s - %s" % (uploader, title) | |
146 | ||
d0ae9e3a | 147 | return { |
6c3affcb | 148 | 'id': media_id, |
bd2d82a5 | 149 | 'title': title, |
eb03f4da | 150 | 'uploader': uploader, |
28a4d6cc S |
151 | 'timestamp': timestamp, |
152 | 'duration': duration, | |
6e999fbc | 153 | 'formats': formats, |
d0ae9e3a | 154 | } |
70e79672 S |
155 | |
156 | ||
157 | class NewgroundsPlaylistIE(InfoExtractor): | |
158 | _VALID_URL = r'https?://(?:www\.)?newgrounds\.com/(?:collection|[^/]+/search/[^/]+)/(?P<id>[^/?#&]+)' | |
159 | _TESTS = [{ | |
160 | 'url': 'https://www.newgrounds.com/collection/cats', | |
161 | 'info_dict': { | |
162 | 'id': 'cats', | |
163 | 'title': 'Cats', | |
164 | }, | |
165 | 'playlist_mincount': 46, | |
166 | }, { | |
167 | 'url': 'http://www.newgrounds.com/portal/search/author/ZONE-SAMA', | |
168 | 'info_dict': { | |
169 | 'id': 'ZONE-SAMA', | |
170 | 'title': 'Portal Search: ZONE-SAMA', | |
171 | }, | |
172 | 'playlist_mincount': 47, | |
173 | }, { | |
174 | 'url': 'http://www.newgrounds.com/audio/search/title/cats', | |
175 | 'only_matching': True, | |
176 | }] | |
177 | ||
178 | def _real_extract(self, url): | |
179 | playlist_id = self._match_id(url) | |
180 | ||
181 | webpage = self._download_webpage(url, playlist_id) | |
182 | ||
183 | title = self._search_regex( | |
184 | r'<title>([^>]+)</title>', webpage, 'title', default=None) | |
185 | ||
186 | # cut left menu | |
187 | webpage = self._search_regex( | |
188 | r'(?s)<div[^>]+\bclass=["\']column wide(.+)', | |
189 | webpage, 'wide column', default=webpage) | |
190 | ||
191 | entries = [] | |
192 | for a, path, media_id in re.findall( | |
193 | r'(<a[^>]+\bhref=["\']/?((?:portal/view|audio/listen)/(\d+))[^>]+>)', | |
194 | webpage): | |
195 | a_class = extract_attributes(a).get('class') | |
196 | if a_class not in ('item-portalsubmission', 'item-audiosubmission'): | |
197 | continue | |
198 | entries.append( | |
199 | self.url_result( | |
200 | 'https://www.newgrounds.com/%s' % path, | |
201 | ie=NewgroundsIE.ie_key(), video_id=media_id)) | |
202 | ||
203 | return self.playlist_result(entries, playlist_id, title) |