]>
Commit | Line | Data |
---|---|---|
d0390a0c PH |
1 | from __future__ import unicode_literals |
2 | ||
dd91dfcd | 3 | import base64 |
9c250931 | 4 | import functools |
e6da9240 | 5 | import itertools |
80cbb6dd | 6 | import re |
80cbb6dd PH |
7 | |
8 | from .common import InfoExtractor | |
c96eca42 | 9 | from ..compat import ( |
dd91dfcd YCH |
10 | compat_chr, |
11 | compat_ord, | |
c96eca42 | 12 | compat_urllib_parse_unquote, |
9c250931 | 13 | compat_urlparse, |
c96eca42 | 14 | ) |
1cc79574 | 15 | from ..utils import ( |
9c250931 | 16 | clean_html, |
baa7b197 | 17 | ExtractorError, |
9c250931 | 18 | OnDemandPagedList, |
7f4173ae | 19 | parse_count, |
b80505a4 | 20 | str_to_int, |
80cbb6dd PH |
21 | ) |
22 | ||
23 | ||
24 | class MixcloudIE(InfoExtractor): | |
c96eca42 | 25 | _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([^/]+)/(?!stream|uploads|favorites|listens|playlists)([^/]+)' |
d0390a0c | 26 | IE_NAME = 'mixcloud' |
80cbb6dd | 27 | |
58ba6c01 | 28 | _TESTS = [{ |
d0390a0c | 29 | 'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/', |
d0390a0c | 30 | 'info_dict': { |
abb82f1d | 31 | 'id': 'dholbach-cryptkeeper', |
f896e1cc | 32 | 'ext': 'm4a', |
d0390a0c PH |
33 | 'title': 'Cryptkeeper', |
34 | 'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.', | |
35 | 'uploader': 'Daniel Holbach', | |
36 | 'uploader_id': 'dholbach', | |
57c7411f PH |
37 | 'thumbnail': 're:https?://.*\.jpg', |
38 | 'view_count': int, | |
39 | 'like_count': int, | |
19e1d359 | 40 | }, |
58ba6c01 S |
41 | }, { |
42 | 'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/', | |
43 | 'info_dict': { | |
44 | 'id': 'gillespeterson-caribou-7-inch-vinyl-mix-chat', | |
7a757b71 JMF |
45 | 'ext': 'mp3', |
46 | 'title': 'Caribou 7 inch Vinyl Mix & Chat', | |
58ba6c01 | 47 | 'description': 'md5:2b8aec6adce69f9d41724647c65875e8', |
7a757b71 | 48 | 'uploader': 'Gilles Peterson Worldwide', |
58ba6c01 | 49 | 'uploader_id': 'gillespeterson', |
dd91dfcd | 50 | 'thumbnail': 're:https?://.*', |
58ba6c01 S |
51 | 'view_count': int, |
52 | 'like_count': int, | |
53 | }, | |
54 | }] | |
80cbb6dd | 55 | |
dd91dfcd YCH |
56 | # See https://www.mixcloud.com/media/js2/www_js_2.9e23256562c080482435196ca3975ab5.js |
57 | @staticmethod | |
58 | def _decrypt_play_info(play_info): | |
59 | KEY = 'pleasedontdownloadourmusictheartistswontgetpaid' | |
60 | ||
61 | play_info = base64.b64decode(play_info.encode('ascii')) | |
62 | ||
63 | return ''.join([ | |
64 | compat_chr(compat_ord(ch) ^ compat_ord(KEY[idx % len(KEY)])) | |
65 | for idx, ch in enumerate(play_info)]) | |
80cbb6dd | 66 | |
80cbb6dd PH |
67 | def _real_extract(self, url): |
68 | mobj = re.match(self._VALID_URL, url) | |
19e1d359 JMF |
69 | uploader = mobj.group(1) |
70 | cloudcast_name = mobj.group(2) | |
c2daf8df | 71 | track_id = compat_urllib_parse_unquote('-'.join((uploader, cloudcast_name))) |
dd2535c3 | 72 | |
19e1d359 | 73 | webpage = self._download_webpage(url, track_id) |
19e1d359 | 74 | |
49f523ca S |
75 | message = self._html_search_regex( |
76 | r'(?s)<div[^>]+class="global-message cloudcast-disabled-notice-light"[^>]*>(.+?)<(?:a|/div)', | |
77 | webpage, 'error message', default=None) | |
78 | ||
dd91dfcd YCH |
79 | encrypted_play_info = self._search_regex( |
80 | r'm-play-info="([^"]+)"', webpage, 'play info') | |
81 | play_info = self._parse_json( | |
82 | self._decrypt_play_info(encrypted_play_info), track_id) | |
49f523ca | 83 | |
dd91dfcd | 84 | if message and 'stream_url' not in play_info: |
49f523ca S |
85 | raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True) |
86 | ||
dd91dfcd | 87 | song_url = play_info['stream_url'] |
57c7411f PH |
88 | |
89 | PREFIX = ( | |
7a757b71 | 90 | r'm-play-on-spacebar[^>]+' |
57c7411f PH |
91 | r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+') |
92 | title = self._html_search_regex( | |
93 | PREFIX + r'm-title="([^"]+)"', webpage, 'title') | |
94 | thumbnail = self._proto_relative_url(self._html_search_regex( | |
95 | PREFIX + r'm-thumbnail-url="([^"]+)"', webpage, 'thumbnail', | |
96 | fatal=False)) | |
97 | uploader = self._html_search_regex( | |
98 | PREFIX + r'm-owner-name="([^"]+)"', | |
99 | webpage, 'uploader', fatal=False) | |
100 | uploader_id = self._search_regex( | |
101 | r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False) | |
102 | description = self._og_search_description(webpage) | |
7f4173ae S |
103 | like_count = parse_count(self._search_regex( |
104 | r'\bbutton-favorite[^>]+>.*?<span[^>]+class=["\']toggle-number[^>]+>\s*([^<]+)', | |
57c7411f | 105 | webpage, 'like count', fatal=False)) |
b80505a4 S |
106 | view_count = str_to_int(self._search_regex( |
107 | [r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"', | |
108 | r'/listeners/?">([0-9,.]+)</a>'], | |
57c7411f | 109 | webpage, 'play count', fatal=False)) |
19e1d359 JMF |
110 | |
111 | return { | |
112 | 'id': track_id, | |
57c7411f | 113 | 'title': title, |
c5826a49 | 114 | 'url': song_url, |
57c7411f PH |
115 | 'description': description, |
116 | 'thumbnail': thumbnail, | |
117 | 'uploader': uploader, | |
118 | 'uploader_id': uploader_id, | |
57c7411f PH |
119 | 'view_count': view_count, |
120 | 'like_count': like_count, | |
19e1d359 | 121 | } |
c96eca42 PH |
122 | |
123 | ||
9c250931 YCH |
124 | class MixcloudPlaylistBaseIE(InfoExtractor): |
125 | _PAGE_SIZE = 24 | |
c96eca42 | 126 | |
e6da9240 YCH |
127 | def _find_urls_in_page(self, page): |
128 | for url in re.findall(r'm-play-button m-url="(?P<url>[^"]+)"', page): | |
129 | yield self.url_result( | |
130 | compat_urlparse.urljoin('https://www.mixcloud.com', clean_html(url)), | |
131 | MixcloudIE.ie_key()) | |
132 | ||
133 | def _fetch_tracks_page(self, path, video_id, page_name, current_page, real_page_number=None): | |
134 | real_page_number = real_page_number or current_page + 1 | |
135 | return self._download_webpage( | |
9c250931 YCH |
136 | 'https://www.mixcloud.com/%s/' % path, video_id, |
137 | note='Download %s (page %d)' % (page_name, current_page + 1), | |
138 | errnote='Unable to download %s' % page_name, | |
e6da9240 | 139 | query={'page': real_page_number, 'list': 'main', '_ajax': '1'}, |
9c250931 YCH |
140 | headers={'X-Requested-With': 'XMLHttpRequest'}) |
141 | ||
e6da9240 YCH |
142 | def _tracks_page_func(self, page, video_id, page_name, current_page): |
143 | resp = self._fetch_tracks_page(page, video_id, page_name, current_page) | |
144 | ||
145 | for item in self._find_urls_in_page(resp): | |
146 | yield item | |
9c250931 YCH |
147 | |
148 | def _get_user_description(self, page_content): | |
149 | return self._html_search_regex( | |
150 | r'<div[^>]+class="description-text"[^>]*>(.+?)</div>', | |
151 | page_content, 'user description', fatal=False) | |
152 | ||
153 | ||
154 | class MixcloudUserIE(MixcloudPlaylistBaseIE): | |
c96eca42 PH |
155 | _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/(?P<type>uploads|favorites|listens)?/?$' |
156 | IE_NAME = 'mixcloud:user' | |
157 | ||
158 | _TESTS = [{ | |
159 | 'url': 'http://www.mixcloud.com/dholbach/', | |
160 | 'info_dict': { | |
9c250931 | 161 | 'id': 'dholbach_uploads', |
c96eca42 PH |
162 | 'title': 'Daniel Holbach (uploads)', |
163 | 'description': 'md5:327af72d1efeb404a8216c27240d1370', | |
164 | }, | |
9c250931 | 165 | 'playlist_mincount': 11, |
c96eca42 PH |
166 | }, { |
167 | 'url': 'http://www.mixcloud.com/dholbach/uploads/', | |
168 | 'info_dict': { | |
9c250931 | 169 | 'id': 'dholbach_uploads', |
c96eca42 PH |
170 | 'title': 'Daniel Holbach (uploads)', |
171 | 'description': 'md5:327af72d1efeb404a8216c27240d1370', | |
172 | }, | |
9c250931 | 173 | 'playlist_mincount': 11, |
c96eca42 PH |
174 | }, { |
175 | 'url': 'http://www.mixcloud.com/dholbach/favorites/', | |
176 | 'info_dict': { | |
9c250931 | 177 | 'id': 'dholbach_favorites', |
c96eca42 PH |
178 | 'title': 'Daniel Holbach (favorites)', |
179 | 'description': 'md5:327af72d1efeb404a8216c27240d1370', | |
180 | }, | |
9c250931 YCH |
181 | 'params': { |
182 | 'playlist_items': '1-100', | |
183 | }, | |
184 | 'playlist_mincount': 100, | |
c96eca42 PH |
185 | }, { |
186 | 'url': 'http://www.mixcloud.com/dholbach/listens/', | |
187 | 'info_dict': { | |
9c250931 | 188 | 'id': 'dholbach_listens', |
c96eca42 PH |
189 | 'title': 'Daniel Holbach (listens)', |
190 | 'description': 'md5:327af72d1efeb404a8216c27240d1370', | |
191 | }, | |
9c250931 YCH |
192 | 'params': { |
193 | 'playlist_items': '1-100', | |
194 | }, | |
195 | 'playlist_mincount': 100, | |
c96eca42 PH |
196 | }] |
197 | ||
c96eca42 PH |
198 | def _real_extract(self, url): |
199 | mobj = re.match(self._VALID_URL, url) | |
9c250931 YCH |
200 | user_id = mobj.group('user') |
201 | list_type = mobj.group('type') | |
c96eca42 PH |
202 | |
203 | # if only a profile URL was supplied, default to download all uploads | |
204 | if list_type is None: | |
9c250931 | 205 | list_type = 'uploads' |
c96eca42 | 206 | |
9c250931 | 207 | video_id = '%s_%s' % (user_id, list_type) |
c96eca42 | 208 | |
9c250931 YCH |
209 | profile = self._download_webpage( |
210 | 'https://www.mixcloud.com/%s/' % user_id, video_id, | |
211 | note='Downloading user profile', | |
212 | errnote='Unable to download user profile') | |
c96eca42 | 213 | |
9c250931 | 214 | username = self._og_search_title(profile) |
c96eca42 PH |
215 | description = self._get_user_description(profile) |
216 | ||
9c250931 YCH |
217 | entries = OnDemandPagedList( |
218 | functools.partial( | |
e6da9240 | 219 | self._tracks_page_func, |
9c250931 YCH |
220 | '%s/%s' % (user_id, list_type), video_id, 'list of %s' % list_type), |
221 | self._PAGE_SIZE, use_cache=True) | |
c96eca42 | 222 | |
9c250931 YCH |
223 | return self.playlist_result( |
224 | entries, video_id, '%s (%s)' % (username, list_type), description) | |
c96eca42 | 225 | |
c96eca42 | 226 | |
9c250931 | 227 | class MixcloudPlaylistIE(MixcloudPlaylistBaseIE): |
c96eca42 PH |
228 | _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/playlists/(?P<playlist>[^/]+)/?$' |
229 | IE_NAME = 'mixcloud:playlist' | |
230 | ||
231 | _TESTS = [{ | |
232 | 'url': 'https://www.mixcloud.com/RedBullThre3style/playlists/tokyo-finalists-2015/', | |
233 | 'info_dict': { | |
9c250931 | 234 | 'id': 'RedBullThre3style_tokyo-finalists-2015', |
c96eca42 PH |
235 | 'title': 'National Champions 2015', |
236 | 'description': 'md5:6ff5fb01ac76a31abc9b3939c16243a3', | |
237 | }, | |
9c250931 | 238 | 'playlist_mincount': 16, |
c96eca42 PH |
239 | }, { |
240 | 'url': 'https://www.mixcloud.com/maxvibes/playlists/jazzcat-on-ness-radio/', | |
241 | 'info_dict': { | |
9c250931 | 242 | 'id': 'maxvibes_jazzcat-on-ness-radio', |
c96eca42 | 243 | 'title': 'Jazzcat on Ness Radio', |
f896e1cc | 244 | 'description': 'md5:7bbbf0d6359a0b8cda85224be0f8f263', |
c96eca42 PH |
245 | }, |
246 | 'playlist_mincount': 23 | |
247 | }] | |
248 | ||
c96eca42 PH |
249 | def _real_extract(self, url): |
250 | mobj = re.match(self._VALID_URL, url) | |
9c250931 YCH |
251 | user_id = mobj.group('user') |
252 | playlist_id = mobj.group('playlist') | |
253 | video_id = '%s_%s' % (user_id, playlist_id) | |
c96eca42 | 254 | |
9c250931 YCH |
255 | profile = self._download_webpage( |
256 | url, user_id, | |
257 | note='Downloading playlist page', | |
258 | errnote='Unable to download playlist page') | |
c96eca42 PH |
259 | |
260 | description = self._get_user_description(profile) | |
9c250931 YCH |
261 | playlist_title = self._html_search_regex( |
262 | r'<span[^>]+class="[^"]*list-playlist-title[^"]*"[^>]*>(.*?)</span>', | |
263 | profile, 'playlist title') | |
c96eca42 | 264 | |
9c250931 YCH |
265 | entries = OnDemandPagedList( |
266 | functools.partial( | |
e6da9240 | 267 | self._tracks_page_func, |
9c250931 YCH |
268 | '%s/playlists/%s' % (user_id, playlist_id), video_id, 'tracklist'), |
269 | self._PAGE_SIZE) | |
c96eca42 | 270 | |
9c250931 | 271 | return self.playlist_result(entries, video_id, playlist_title, description) |
e6da9240 YCH |
272 | |
273 | ||
274 | class MixcloudStreamIE(MixcloudPlaylistBaseIE): | |
275 | _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/(?P<id>[^/]+)/stream/?$' | |
276 | IE_NAME = 'mixcloud:stream' | |
277 | ||
278 | _TEST = { | |
279 | 'url': 'https://www.mixcloud.com/FirstEar/stream/', | |
280 | 'info_dict': { | |
281 | 'id': 'FirstEar', | |
282 | 'title': 'First Ear', | |
283 | 'description': 'Curators of good music\nfirstearmusic.com', | |
284 | }, | |
285 | 'playlist_mincount': 192, | |
286 | } | |
287 | ||
288 | def _real_extract(self, url): | |
289 | user_id = self._match_id(url) | |
290 | ||
291 | webpage = self._download_webpage(url, user_id) | |
292 | ||
293 | entries = [] | |
294 | prev_page_url = None | |
295 | ||
296 | def _handle_page(page): | |
297 | entries.extend(self._find_urls_in_page(page)) | |
298 | return self._search_regex( | |
299 | r'm-next-page-url="([^"]+)"', page, | |
300 | 'next page URL', default=None) | |
301 | ||
302 | next_page_url = _handle_page(webpage) | |
303 | ||
304 | for idx in itertools.count(0): | |
305 | if not next_page_url or prev_page_url == next_page_url: | |
306 | break | |
307 | ||
308 | prev_page_url = next_page_url | |
309 | current_page = int(self._search_regex( | |
310 | r'\?page=(\d+)', next_page_url, 'next page number')) | |
311 | ||
312 | next_page_url = _handle_page(self._fetch_tracks_page( | |
313 | '%s/stream' % user_id, user_id, 'stream', idx, | |
314 | real_page_number=current_page)) | |
315 | ||
316 | username = self._og_search_title(webpage) | |
317 | description = self._get_user_description(webpage) | |
318 | ||
319 | return self.playlist_result(entries, user_id, username, description) |