]>
Commit | Line | Data |
---|---|---|
d0390a0c PH |
1 | from __future__ import unicode_literals |
2 | ||
9c250931 | 3 | import functools |
80cbb6dd | 4 | import re |
80cbb6dd PH |
5 | |
6 | from .common import InfoExtractor | |
c96eca42 PH |
7 | from ..compat import ( |
8 | compat_urllib_parse_unquote, | |
9c250931 | 9 | compat_urlparse, |
c96eca42 | 10 | ) |
1cc79574 | 11 | from ..utils import ( |
9c250931 | 12 | clean_html, |
baa7b197 | 13 | ExtractorError, |
dbc1366b | 14 | HEADRequest, |
9c250931 | 15 | OnDemandPagedList, |
49f523ca | 16 | NO_DEFAULT, |
7f4173ae | 17 | parse_count, |
b80505a4 | 18 | str_to_int, |
80cbb6dd PH |
19 | ) |
20 | ||
21 | ||
22 | class MixcloudIE(InfoExtractor): | |
c96eca42 | 23 | _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([^/]+)/(?!stream|uploads|favorites|listens|playlists)([^/]+)' |
d0390a0c | 24 | IE_NAME = 'mixcloud' |
80cbb6dd | 25 | |
58ba6c01 | 26 | _TESTS = [{ |
d0390a0c | 27 | 'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/', |
d0390a0c | 28 | 'info_dict': { |
abb82f1d | 29 | 'id': 'dholbach-cryptkeeper', |
f896e1cc | 30 | 'ext': 'm4a', |
d0390a0c PH |
31 | 'title': 'Cryptkeeper', |
32 | 'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.', | |
33 | 'uploader': 'Daniel Holbach', | |
34 | 'uploader_id': 'dholbach', | |
57c7411f PH |
35 | 'thumbnail': 're:https?://.*\.jpg', |
36 | 'view_count': int, | |
37 | 'like_count': int, | |
19e1d359 | 38 | }, |
58ba6c01 S |
39 | }, { |
40 | 'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/', | |
41 | 'info_dict': { | |
42 | 'id': 'gillespeterson-caribou-7-inch-vinyl-mix-chat', | |
7a757b71 JMF |
43 | 'ext': 'mp3', |
44 | 'title': 'Caribou 7 inch Vinyl Mix & Chat', | |
58ba6c01 | 45 | 'description': 'md5:2b8aec6adce69f9d41724647c65875e8', |
7a757b71 | 46 | 'uploader': 'Gilles Peterson Worldwide', |
58ba6c01 | 47 | 'uploader_id': 'gillespeterson', |
7a757b71 | 48 | 'thumbnail': 're:https?://.*/images/', |
58ba6c01 S |
49 | 'view_count': int, |
50 | 'like_count': int, | |
51 | }, | |
52 | }] | |
80cbb6dd | 53 | |
c5826a49 JMF |
54 | def _check_url(self, url, track_id, ext): |
55 | try: | |
56 | # We only want to know if the request succeed | |
57 | # don't download the whole file | |
58 | self._request_webpage( | |
59 | HEADRequest(url), track_id, | |
60 | 'Trying %s URL' % ext) | |
61 | return True | |
62 | except ExtractorError: | |
63 | return False | |
80cbb6dd | 64 | |
80cbb6dd PH |
65 | def _real_extract(self, url): |
66 | mobj = re.match(self._VALID_URL, url) | |
19e1d359 JMF |
67 | uploader = mobj.group(1) |
68 | cloudcast_name = mobj.group(2) | |
c2daf8df | 69 | track_id = compat_urllib_parse_unquote('-'.join((uploader, cloudcast_name))) |
dd2535c3 | 70 | |
19e1d359 | 71 | webpage = self._download_webpage(url, track_id) |
19e1d359 | 72 | |
49f523ca S |
73 | message = self._html_search_regex( |
74 | r'(?s)<div[^>]+class="global-message cloudcast-disabled-notice-light"[^>]*>(.+?)<(?:a|/div)', | |
75 | webpage, 'error message', default=None) | |
76 | ||
dd2535c3 | 77 | preview_url = self._search_regex( |
49f523ca S |
78 | r'\s(?:data-preview-url|m-preview)="([^"]+)"', |
79 | webpage, 'preview url', default=None if message else NO_DEFAULT) | |
80 | ||
81 | if message: | |
82 | raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True) | |
83 | ||
2a63b0f1 JMF |
84 | song_url = re.sub(r'audiocdn(\d+)', r'stream\1', preview_url) |
85 | song_url = song_url.replace('/previews/', '/c/originals/') | |
c5826a49 JMF |
86 | if not self._check_url(song_url, track_id, 'mp3'): |
87 | song_url = song_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/') | |
88 | if not self._check_url(song_url, track_id, 'm4a'): | |
89 | raise ExtractorError('Unable to extract track url') | |
57c7411f PH |
90 | |
91 | PREFIX = ( | |
7a757b71 | 92 | r'm-play-on-spacebar[^>]+' |
57c7411f PH |
93 | r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+') |
94 | title = self._html_search_regex( | |
95 | PREFIX + r'm-title="([^"]+)"', webpage, 'title') | |
96 | thumbnail = self._proto_relative_url(self._html_search_regex( | |
97 | PREFIX + r'm-thumbnail-url="([^"]+)"', webpage, 'thumbnail', | |
98 | fatal=False)) | |
99 | uploader = self._html_search_regex( | |
100 | PREFIX + r'm-owner-name="([^"]+)"', | |
101 | webpage, 'uploader', fatal=False) | |
102 | uploader_id = self._search_regex( | |
103 | r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False) | |
104 | description = self._og_search_description(webpage) | |
7f4173ae S |
105 | like_count = parse_count(self._search_regex( |
106 | r'\bbutton-favorite[^>]+>.*?<span[^>]+class=["\']toggle-number[^>]+>\s*([^<]+)', | |
57c7411f | 107 | webpage, 'like count', fatal=False)) |
b80505a4 S |
108 | view_count = str_to_int(self._search_regex( |
109 | [r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"', | |
110 | r'/listeners/?">([0-9,.]+)</a>'], | |
57c7411f | 111 | webpage, 'play count', fatal=False)) |
19e1d359 JMF |
112 | |
113 | return { | |
114 | 'id': track_id, | |
57c7411f | 115 | 'title': title, |
c5826a49 | 116 | 'url': song_url, |
57c7411f PH |
117 | 'description': description, |
118 | 'thumbnail': thumbnail, | |
119 | 'uploader': uploader, | |
120 | 'uploader_id': uploader_id, | |
57c7411f PH |
121 | 'view_count': view_count, |
122 | 'like_count': like_count, | |
19e1d359 | 123 | } |
c96eca42 PH |
124 | |
125 | ||
9c250931 YCH |
126 | class MixcloudPlaylistBaseIE(InfoExtractor): |
127 | _PAGE_SIZE = 24 | |
c96eca42 | 128 | |
9c250931 YCH |
129 | def _fetch_tracks_page(self, path, video_id, page_name, current_page): |
130 | resp = self._download_webpage( | |
131 | 'https://www.mixcloud.com/%s/' % path, video_id, | |
132 | note='Download %s (page %d)' % (page_name, current_page + 1), | |
133 | errnote='Unable to download %s' % page_name, | |
134 | query={'page': (current_page + 1), 'list': 'main', '_ajax': '1'}, | |
135 | headers={'X-Requested-With': 'XMLHttpRequest'}) | |
136 | ||
137 | for url in re.findall(r'm-play-button m-url="(?P<url>[^"]+)"', resp): | |
138 | yield self.url_result( | |
139 | compat_urlparse.urljoin('https://www.mixcloud.com', clean_html(url)), | |
140 | MixcloudIE.ie_key()) | |
141 | ||
142 | def _get_user_description(self, page_content): | |
143 | return self._html_search_regex( | |
144 | r'<div[^>]+class="description-text"[^>]*>(.+?)</div>', | |
145 | page_content, 'user description', fatal=False) | |
146 | ||
147 | ||
148 | class MixcloudUserIE(MixcloudPlaylistBaseIE): | |
c96eca42 PH |
149 | _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/(?P<type>uploads|favorites|listens)?/?$' |
150 | IE_NAME = 'mixcloud:user' | |
151 | ||
152 | _TESTS = [{ | |
153 | 'url': 'http://www.mixcloud.com/dholbach/', | |
154 | 'info_dict': { | |
9c250931 | 155 | 'id': 'dholbach_uploads', |
c96eca42 PH |
156 | 'title': 'Daniel Holbach (uploads)', |
157 | 'description': 'md5:327af72d1efeb404a8216c27240d1370', | |
158 | }, | |
9c250931 | 159 | 'playlist_mincount': 11, |
c96eca42 PH |
160 | }, { |
161 | 'url': 'http://www.mixcloud.com/dholbach/uploads/', | |
162 | 'info_dict': { | |
9c250931 | 163 | 'id': 'dholbach_uploads', |
c96eca42 PH |
164 | 'title': 'Daniel Holbach (uploads)', |
165 | 'description': 'md5:327af72d1efeb404a8216c27240d1370', | |
166 | }, | |
9c250931 | 167 | 'playlist_mincount': 11, |
c96eca42 PH |
168 | }, { |
169 | 'url': 'http://www.mixcloud.com/dholbach/favorites/', | |
170 | 'info_dict': { | |
9c250931 | 171 | 'id': 'dholbach_favorites', |
c96eca42 PH |
172 | 'title': 'Daniel Holbach (favorites)', |
173 | 'description': 'md5:327af72d1efeb404a8216c27240d1370', | |
174 | }, | |
9c250931 YCH |
175 | 'params': { |
176 | 'playlist_items': '1-100', | |
177 | }, | |
178 | 'playlist_mincount': 100, | |
c96eca42 PH |
179 | }, { |
180 | 'url': 'http://www.mixcloud.com/dholbach/listens/', | |
181 | 'info_dict': { | |
9c250931 | 182 | 'id': 'dholbach_listens', |
c96eca42 PH |
183 | 'title': 'Daniel Holbach (listens)', |
184 | 'description': 'md5:327af72d1efeb404a8216c27240d1370', | |
185 | }, | |
9c250931 YCH |
186 | 'params': { |
187 | 'playlist_items': '1-100', | |
188 | }, | |
189 | 'playlist_mincount': 100, | |
c96eca42 PH |
190 | }] |
191 | ||
c96eca42 PH |
192 | def _real_extract(self, url): |
193 | mobj = re.match(self._VALID_URL, url) | |
9c250931 YCH |
194 | user_id = mobj.group('user') |
195 | list_type = mobj.group('type') | |
c96eca42 PH |
196 | |
197 | # if only a profile URL was supplied, default to download all uploads | |
198 | if list_type is None: | |
9c250931 | 199 | list_type = 'uploads' |
c96eca42 | 200 | |
9c250931 | 201 | video_id = '%s_%s' % (user_id, list_type) |
c96eca42 | 202 | |
9c250931 YCH |
203 | profile = self._download_webpage( |
204 | 'https://www.mixcloud.com/%s/' % user_id, video_id, | |
205 | note='Downloading user profile', | |
206 | errnote='Unable to download user profile') | |
c96eca42 | 207 | |
9c250931 | 208 | username = self._og_search_title(profile) |
c96eca42 PH |
209 | description = self._get_user_description(profile) |
210 | ||
9c250931 YCH |
211 | entries = OnDemandPagedList( |
212 | functools.partial( | |
213 | self._fetch_tracks_page, | |
214 | '%s/%s' % (user_id, list_type), video_id, 'list of %s' % list_type), | |
215 | self._PAGE_SIZE, use_cache=True) | |
c96eca42 | 216 | |
9c250931 YCH |
217 | return self.playlist_result( |
218 | entries, video_id, '%s (%s)' % (username, list_type), description) | |
c96eca42 | 219 | |
c96eca42 | 220 | |
9c250931 | 221 | class MixcloudPlaylistIE(MixcloudPlaylistBaseIE): |
c96eca42 PH |
222 | _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/playlists/(?P<playlist>[^/]+)/?$' |
223 | IE_NAME = 'mixcloud:playlist' | |
224 | ||
225 | _TESTS = [{ | |
226 | 'url': 'https://www.mixcloud.com/RedBullThre3style/playlists/tokyo-finalists-2015/', | |
227 | 'info_dict': { | |
9c250931 | 228 | 'id': 'RedBullThre3style_tokyo-finalists-2015', |
c96eca42 PH |
229 | 'title': 'National Champions 2015', |
230 | 'description': 'md5:6ff5fb01ac76a31abc9b3939c16243a3', | |
231 | }, | |
9c250931 | 232 | 'playlist_mincount': 16, |
c96eca42 PH |
233 | }, { |
234 | 'url': 'https://www.mixcloud.com/maxvibes/playlists/jazzcat-on-ness-radio/', | |
235 | 'info_dict': { | |
9c250931 | 236 | 'id': 'maxvibes_jazzcat-on-ness-radio', |
c96eca42 | 237 | 'title': 'Jazzcat on Ness Radio', |
f896e1cc | 238 | 'description': 'md5:7bbbf0d6359a0b8cda85224be0f8f263', |
c96eca42 PH |
239 | }, |
240 | 'playlist_mincount': 23 | |
241 | }] | |
242 | ||
c96eca42 PH |
243 | def _real_extract(self, url): |
244 | mobj = re.match(self._VALID_URL, url) | |
9c250931 YCH |
245 | user_id = mobj.group('user') |
246 | playlist_id = mobj.group('playlist') | |
247 | video_id = '%s_%s' % (user_id, playlist_id) | |
c96eca42 | 248 | |
9c250931 YCH |
249 | profile = self._download_webpage( |
250 | url, user_id, | |
251 | note='Downloading playlist page', | |
252 | errnote='Unable to download playlist page') | |
c96eca42 PH |
253 | |
254 | description = self._get_user_description(profile) | |
9c250931 YCH |
255 | playlist_title = self._html_search_regex( |
256 | r'<span[^>]+class="[^"]*list-playlist-title[^"]*"[^>]*>(.*?)</span>', | |
257 | profile, 'playlist title') | |
c96eca42 | 258 | |
9c250931 YCH |
259 | entries = OnDemandPagedList( |
260 | functools.partial( | |
261 | self._fetch_tracks_page, | |
262 | '%s/playlists/%s' % (user_id, playlist_id), video_id, 'tracklist'), | |
263 | self._PAGE_SIZE) | |
c96eca42 | 264 | |
9c250931 | 265 | return self.playlist_result(entries, video_id, playlist_title, description) |