]>
Commit | Line | Data |
---|---|---|
d0390a0c PH |
1 | from __future__ import unicode_literals |
2 | ||
dd91dfcd | 3 | import base64 |
9c250931 | 4 | import functools |
e6da9240 | 5 | import itertools |
80cbb6dd | 6 | import re |
80cbb6dd PH |
7 | |
8 | from .common import InfoExtractor | |
c96eca42 | 9 | from ..compat import ( |
dd91dfcd YCH |
10 | compat_chr, |
11 | compat_ord, | |
c96eca42 | 12 | compat_urllib_parse_unquote, |
9c250931 | 13 | compat_urlparse, |
c96eca42 | 14 | ) |
1cc79574 | 15 | from ..utils import ( |
9c250931 | 16 | clean_html, |
baa7b197 | 17 | ExtractorError, |
9c250931 | 18 | OnDemandPagedList, |
7f4173ae | 19 | parse_count, |
b80505a4 | 20 | str_to_int, |
80cbb6dd PH |
21 | ) |
22 | ||
23 | ||
24 | class MixcloudIE(InfoExtractor): | |
655cb545 | 25 | _VALID_URL = r'https?://(?:(?:www|beta|m)\.)?mixcloud\.com/([^/]+)/(?!stream|uploads|favorites|listens|playlists)([^/]+)' |
d0390a0c | 26 | IE_NAME = 'mixcloud' |
80cbb6dd | 27 | |
58ba6c01 | 28 | _TESTS = [{ |
d0390a0c | 29 | 'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/', |
d0390a0c | 30 | 'info_dict': { |
abb82f1d | 31 | 'id': 'dholbach-cryptkeeper', |
f896e1cc | 32 | 'ext': 'm4a', |
d0390a0c PH |
33 | 'title': 'Cryptkeeper', |
34 | 'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.', | |
35 | 'uploader': 'Daniel Holbach', | |
36 | 'uploader_id': 'dholbach', | |
ec85ded8 | 37 | 'thumbnail': r're:https?://.*\.jpg', |
57c7411f PH |
38 | 'view_count': int, |
39 | 'like_count': int, | |
19e1d359 | 40 | }, |
58ba6c01 S |
41 | }, { |
42 | 'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/', | |
43 | 'info_dict': { | |
44 | 'id': 'gillespeterson-caribou-7-inch-vinyl-mix-chat', | |
7a757b71 JMF |
45 | 'ext': 'mp3', |
46 | 'title': 'Caribou 7 inch Vinyl Mix & Chat', | |
58ba6c01 | 47 | 'description': 'md5:2b8aec6adce69f9d41724647c65875e8', |
7a757b71 | 48 | 'uploader': 'Gilles Peterson Worldwide', |
58ba6c01 | 49 | 'uploader_id': 'gillespeterson', |
dd91dfcd | 50 | 'thumbnail': 're:https?://.*', |
58ba6c01 S |
51 | 'view_count': int, |
52 | 'like_count': int, | |
53 | }, | |
655cb545 S |
54 | }, { |
55 | 'url': 'https://beta.mixcloud.com/RedLightRadio/nosedrip-15-red-light-radio-01-18-2016/', | |
56 | 'only_matching': True, | |
58ba6c01 | 57 | }] |
80cbb6dd | 58 | |
dd91dfcd YCH |
59 | # See https://www.mixcloud.com/media/js2/www_js_2.9e23256562c080482435196ca3975ab5.js |
60 | @staticmethod | |
61 | def _decrypt_play_info(play_info): | |
62 | KEY = 'pleasedontdownloadourmusictheartistswontgetpaid' | |
63 | ||
64 | play_info = base64.b64decode(play_info.encode('ascii')) | |
65 | ||
66 | return ''.join([ | |
67 | compat_chr(compat_ord(ch) ^ compat_ord(KEY[idx % len(KEY)])) | |
68 | for idx, ch in enumerate(play_info)]) | |
80cbb6dd | 69 | |
80cbb6dd PH |
70 | def _real_extract(self, url): |
71 | mobj = re.match(self._VALID_URL, url) | |
19e1d359 JMF |
72 | uploader = mobj.group(1) |
73 | cloudcast_name = mobj.group(2) | |
c2daf8df | 74 | track_id = compat_urllib_parse_unquote('-'.join((uploader, cloudcast_name))) |
dd2535c3 | 75 | |
19e1d359 | 76 | webpage = self._download_webpage(url, track_id) |
19e1d359 | 77 | |
49f523ca S |
78 | message = self._html_search_regex( |
79 | r'(?s)<div[^>]+class="global-message cloudcast-disabled-notice-light"[^>]*>(.+?)<(?:a|/div)', | |
80 | webpage, 'error message', default=None) | |
81 | ||
dd91dfcd YCH |
82 | encrypted_play_info = self._search_regex( |
83 | r'm-play-info="([^"]+)"', webpage, 'play info') | |
84 | play_info = self._parse_json( | |
85 | self._decrypt_play_info(encrypted_play_info), track_id) | |
49f523ca | 86 | |
dd91dfcd | 87 | if message and 'stream_url' not in play_info: |
49f523ca S |
88 | raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True) |
89 | ||
dd91dfcd | 90 | song_url = play_info['stream_url'] |
57c7411f PH |
91 | |
92 | PREFIX = ( | |
7a757b71 | 93 | r'm-play-on-spacebar[^>]+' |
57c7411f PH |
94 | r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+') |
95 | title = self._html_search_regex( | |
96 | PREFIX + r'm-title="([^"]+)"', webpage, 'title') | |
97 | thumbnail = self._proto_relative_url(self._html_search_regex( | |
98 | PREFIX + r'm-thumbnail-url="([^"]+)"', webpage, 'thumbnail', | |
99 | fatal=False)) | |
100 | uploader = self._html_search_regex( | |
101 | PREFIX + r'm-owner-name="([^"]+)"', | |
102 | webpage, 'uploader', fatal=False) | |
103 | uploader_id = self._search_regex( | |
104 | r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False) | |
105 | description = self._og_search_description(webpage) | |
7f4173ae S |
106 | like_count = parse_count(self._search_regex( |
107 | r'\bbutton-favorite[^>]+>.*?<span[^>]+class=["\']toggle-number[^>]+>\s*([^<]+)', | |
896cc727 | 108 | webpage, 'like count', default=None)) |
b80505a4 S |
109 | view_count = str_to_int(self._search_regex( |
110 | [r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"', | |
111 | r'/listeners/?">([0-9,.]+)</a>'], | |
896cc727 | 112 | webpage, 'play count', default=None)) |
19e1d359 JMF |
113 | |
114 | return { | |
115 | 'id': track_id, | |
57c7411f | 116 | 'title': title, |
c5826a49 | 117 | 'url': song_url, |
57c7411f PH |
118 | 'description': description, |
119 | 'thumbnail': thumbnail, | |
120 | 'uploader': uploader, | |
121 | 'uploader_id': uploader_id, | |
57c7411f PH |
122 | 'view_count': view_count, |
123 | 'like_count': like_count, | |
19e1d359 | 124 | } |
c96eca42 PH |
125 | |
126 | ||
9c250931 YCH |
127 | class MixcloudPlaylistBaseIE(InfoExtractor): |
128 | _PAGE_SIZE = 24 | |
c96eca42 | 129 | |
e6da9240 YCH |
130 | def _find_urls_in_page(self, page): |
131 | for url in re.findall(r'm-play-button m-url="(?P<url>[^"]+)"', page): | |
132 | yield self.url_result( | |
133 | compat_urlparse.urljoin('https://www.mixcloud.com', clean_html(url)), | |
134 | MixcloudIE.ie_key()) | |
135 | ||
136 | def _fetch_tracks_page(self, path, video_id, page_name, current_page, real_page_number=None): | |
137 | real_page_number = real_page_number or current_page + 1 | |
138 | return self._download_webpage( | |
9c250931 YCH |
139 | 'https://www.mixcloud.com/%s/' % path, video_id, |
140 | note='Download %s (page %d)' % (page_name, current_page + 1), | |
141 | errnote='Unable to download %s' % page_name, | |
e6da9240 | 142 | query={'page': real_page_number, 'list': 'main', '_ajax': '1'}, |
9c250931 YCH |
143 | headers={'X-Requested-With': 'XMLHttpRequest'}) |
144 | ||
e6da9240 YCH |
145 | def _tracks_page_func(self, page, video_id, page_name, current_page): |
146 | resp = self._fetch_tracks_page(page, video_id, page_name, current_page) | |
147 | ||
148 | for item in self._find_urls_in_page(resp): | |
149 | yield item | |
9c250931 YCH |
150 | |
151 | def _get_user_description(self, page_content): | |
152 | return self._html_search_regex( | |
153 | r'<div[^>]+class="description-text"[^>]*>(.+?)</div>', | |
154 | page_content, 'user description', fatal=False) | |
155 | ||
156 | ||
157 | class MixcloudUserIE(MixcloudPlaylistBaseIE): | |
c96eca42 PH |
158 | _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/(?P<type>uploads|favorites|listens)?/?$' |
159 | IE_NAME = 'mixcloud:user' | |
160 | ||
161 | _TESTS = [{ | |
162 | 'url': 'http://www.mixcloud.com/dholbach/', | |
163 | 'info_dict': { | |
9c250931 | 164 | 'id': 'dholbach_uploads', |
c96eca42 PH |
165 | 'title': 'Daniel Holbach (uploads)', |
166 | 'description': 'md5:327af72d1efeb404a8216c27240d1370', | |
167 | }, | |
9c250931 | 168 | 'playlist_mincount': 11, |
c96eca42 PH |
169 | }, { |
170 | 'url': 'http://www.mixcloud.com/dholbach/uploads/', | |
171 | 'info_dict': { | |
9c250931 | 172 | 'id': 'dholbach_uploads', |
c96eca42 PH |
173 | 'title': 'Daniel Holbach (uploads)', |
174 | 'description': 'md5:327af72d1efeb404a8216c27240d1370', | |
175 | }, | |
9c250931 | 176 | 'playlist_mincount': 11, |
c96eca42 PH |
177 | }, { |
178 | 'url': 'http://www.mixcloud.com/dholbach/favorites/', | |
179 | 'info_dict': { | |
9c250931 | 180 | 'id': 'dholbach_favorites', |
c96eca42 PH |
181 | 'title': 'Daniel Holbach (favorites)', |
182 | 'description': 'md5:327af72d1efeb404a8216c27240d1370', | |
183 | }, | |
9c250931 YCH |
184 | 'params': { |
185 | 'playlist_items': '1-100', | |
186 | }, | |
187 | 'playlist_mincount': 100, | |
c96eca42 PH |
188 | }, { |
189 | 'url': 'http://www.mixcloud.com/dholbach/listens/', | |
190 | 'info_dict': { | |
9c250931 | 191 | 'id': 'dholbach_listens', |
c96eca42 PH |
192 | 'title': 'Daniel Holbach (listens)', |
193 | 'description': 'md5:327af72d1efeb404a8216c27240d1370', | |
194 | }, | |
9c250931 YCH |
195 | 'params': { |
196 | 'playlist_items': '1-100', | |
197 | }, | |
198 | 'playlist_mincount': 100, | |
c96eca42 PH |
199 | }] |
200 | ||
c96eca42 PH |
201 | def _real_extract(self, url): |
202 | mobj = re.match(self._VALID_URL, url) | |
9c250931 YCH |
203 | user_id = mobj.group('user') |
204 | list_type = mobj.group('type') | |
c96eca42 PH |
205 | |
206 | # if only a profile URL was supplied, default to download all uploads | |
207 | if list_type is None: | |
9c250931 | 208 | list_type = 'uploads' |
c96eca42 | 209 | |
9c250931 | 210 | video_id = '%s_%s' % (user_id, list_type) |
c96eca42 | 211 | |
9c250931 YCH |
212 | profile = self._download_webpage( |
213 | 'https://www.mixcloud.com/%s/' % user_id, video_id, | |
214 | note='Downloading user profile', | |
215 | errnote='Unable to download user profile') | |
c96eca42 | 216 | |
9c250931 | 217 | username = self._og_search_title(profile) |
c96eca42 PH |
218 | description = self._get_user_description(profile) |
219 | ||
9c250931 YCH |
220 | entries = OnDemandPagedList( |
221 | functools.partial( | |
e6da9240 | 222 | self._tracks_page_func, |
9c250931 YCH |
223 | '%s/%s' % (user_id, list_type), video_id, 'list of %s' % list_type), |
224 | self._PAGE_SIZE, use_cache=True) | |
c96eca42 | 225 | |
9c250931 YCH |
226 | return self.playlist_result( |
227 | entries, video_id, '%s (%s)' % (username, list_type), description) | |
c96eca42 | 228 | |
c96eca42 | 229 | |
9c250931 | 230 | class MixcloudPlaylistIE(MixcloudPlaylistBaseIE): |
c96eca42 PH |
231 | _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/playlists/(?P<playlist>[^/]+)/?$' |
232 | IE_NAME = 'mixcloud:playlist' | |
233 | ||
234 | _TESTS = [{ | |
235 | 'url': 'https://www.mixcloud.com/RedBullThre3style/playlists/tokyo-finalists-2015/', | |
236 | 'info_dict': { | |
9c250931 | 237 | 'id': 'RedBullThre3style_tokyo-finalists-2015', |
c96eca42 PH |
238 | 'title': 'National Champions 2015', |
239 | 'description': 'md5:6ff5fb01ac76a31abc9b3939c16243a3', | |
240 | }, | |
9c250931 | 241 | 'playlist_mincount': 16, |
c96eca42 PH |
242 | }, { |
243 | 'url': 'https://www.mixcloud.com/maxvibes/playlists/jazzcat-on-ness-radio/', | |
244 | 'info_dict': { | |
9c250931 | 245 | 'id': 'maxvibes_jazzcat-on-ness-radio', |
c96eca42 | 246 | 'title': 'Jazzcat on Ness Radio', |
f896e1cc | 247 | 'description': 'md5:7bbbf0d6359a0b8cda85224be0f8f263', |
c96eca42 PH |
248 | }, |
249 | 'playlist_mincount': 23 | |
250 | }] | |
251 | ||
c96eca42 PH |
252 | def _real_extract(self, url): |
253 | mobj = re.match(self._VALID_URL, url) | |
9c250931 YCH |
254 | user_id = mobj.group('user') |
255 | playlist_id = mobj.group('playlist') | |
256 | video_id = '%s_%s' % (user_id, playlist_id) | |
c96eca42 | 257 | |
9c250931 YCH |
258 | profile = self._download_webpage( |
259 | url, user_id, | |
260 | note='Downloading playlist page', | |
261 | errnote='Unable to download playlist page') | |
c96eca42 PH |
262 | |
263 | description = self._get_user_description(profile) | |
9c250931 YCH |
264 | playlist_title = self._html_search_regex( |
265 | r'<span[^>]+class="[^"]*list-playlist-title[^"]*"[^>]*>(.*?)</span>', | |
266 | profile, 'playlist title') | |
c96eca42 | 267 | |
9c250931 YCH |
268 | entries = OnDemandPagedList( |
269 | functools.partial( | |
e6da9240 | 270 | self._tracks_page_func, |
9c250931 YCH |
271 | '%s/playlists/%s' % (user_id, playlist_id), video_id, 'tracklist'), |
272 | self._PAGE_SIZE) | |
c96eca42 | 273 | |
9c250931 | 274 | return self.playlist_result(entries, video_id, playlist_title, description) |
e6da9240 YCH |
275 | |
276 | ||
277 | class MixcloudStreamIE(MixcloudPlaylistBaseIE): | |
278 | _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/(?P<id>[^/]+)/stream/?$' | |
279 | IE_NAME = 'mixcloud:stream' | |
280 | ||
281 | _TEST = { | |
282 | 'url': 'https://www.mixcloud.com/FirstEar/stream/', | |
283 | 'info_dict': { | |
284 | 'id': 'FirstEar', | |
285 | 'title': 'First Ear', | |
286 | 'description': 'Curators of good music\nfirstearmusic.com', | |
287 | }, | |
288 | 'playlist_mincount': 192, | |
289 | } | |
290 | ||
291 | def _real_extract(self, url): | |
292 | user_id = self._match_id(url) | |
293 | ||
294 | webpage = self._download_webpage(url, user_id) | |
295 | ||
296 | entries = [] | |
297 | prev_page_url = None | |
298 | ||
299 | def _handle_page(page): | |
300 | entries.extend(self._find_urls_in_page(page)) | |
301 | return self._search_regex( | |
302 | r'm-next-page-url="([^"]+)"', page, | |
303 | 'next page URL', default=None) | |
304 | ||
305 | next_page_url = _handle_page(webpage) | |
306 | ||
307 | for idx in itertools.count(0): | |
308 | if not next_page_url or prev_page_url == next_page_url: | |
309 | break | |
310 | ||
311 | prev_page_url = next_page_url | |
312 | current_page = int(self._search_regex( | |
313 | r'\?page=(\d+)', next_page_url, 'next page number')) | |
314 | ||
315 | next_page_url = _handle_page(self._fetch_tracks_page( | |
316 | '%s/stream' % user_id, user_id, 'stream', idx, | |
317 | real_page_number=current_page)) | |
318 | ||
319 | username = self._og_search_title(webpage) | |
320 | description = self._get_user_description(webpage) | |
321 | ||
322 | return self.playlist_result(entries, user_id, username, description) |