]>
Commit | Line | Data |
---|---|---|
1 | import itertools | |
2 | ||
3 | from .common import InfoExtractor | |
4 | from ..compat import ( | |
5 | compat_b64decode, | |
6 | compat_ord, | |
7 | compat_str, | |
8 | compat_urllib_parse_unquote, | |
9 | ) | |
10 | from ..utils import ( | |
11 | ExtractorError, | |
12 | int_or_none, | |
13 | parse_iso8601, | |
14 | strip_or_none, | |
15 | try_get, | |
16 | ) | |
17 | ||
18 | ||
19 | class MixcloudBaseIE(InfoExtractor): | |
20 | def _call_api(self, object_type, object_fields, display_id, username, slug=None): | |
21 | lookup_key = object_type + 'Lookup' | |
22 | return self._download_json( | |
23 | 'https://app.mixcloud.com/graphql', display_id, query={ | |
24 | 'query': '''{ | |
25 | %s(lookup: {username: "%s"%s}) { | |
26 | %s | |
27 | } | |
28 | }''' % (lookup_key, username, ', slug: "%s"' % slug if slug else '', object_fields) | |
29 | })['data'][lookup_key] | |
30 | ||
31 | ||
32 | class MixcloudIE(MixcloudBaseIE): | |
33 | _VALID_URL = r'https?://(?:(?:www|beta|m)\.)?mixcloud\.com/([^/]+)/(?!stream|uploads|favorites|listens|playlists)([^/]+)' | |
34 | IE_NAME = 'mixcloud' | |
35 | ||
36 | _TESTS = [{ | |
37 | 'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/', | |
38 | 'info_dict': { | |
39 | 'id': 'dholbach_cryptkeeper', | |
40 | 'ext': 'm4a', | |
41 | 'title': 'Cryptkeeper', | |
42 | 'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.', | |
43 | 'uploader': 'Daniel Holbach', | |
44 | 'uploader_id': 'dholbach', | |
45 | 'thumbnail': r're:https?://.*\.jpg', | |
46 | 'view_count': int, | |
47 | 'timestamp': 1321359578, | |
48 | 'upload_date': '20111115', | |
49 | 'uploader_url': 'https://www.mixcloud.com/dholbach/', | |
50 | 'artist': 'Submorphics & Chino , Telekinesis, Porter Robinson, Enei, Breakage ft Jess Mills', | |
51 | 'duration': 3723, | |
52 | 'tags': [], | |
53 | 'comment_count': int, | |
54 | 'repost_count': int, | |
55 | 'like_count': int, | |
56 | }, | |
57 | 'params': {'skip_download': 'm3u8'}, | |
58 | }, { | |
59 | 'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/', | |
60 | 'info_dict': { | |
61 | 'id': 'gillespeterson_caribou-7-inch-vinyl-mix-chat', | |
62 | 'ext': 'mp3', | |
63 | 'title': 'Caribou 7 inch Vinyl Mix & Chat', | |
64 | 'description': 'md5:2b8aec6adce69f9d41724647c65875e8', | |
65 | 'uploader': 'Gilles Peterson Worldwide', | |
66 | 'uploader_id': 'gillespeterson', | |
67 | 'thumbnail': 're:https?://.*', | |
68 | 'view_count': int, | |
69 | 'timestamp': 1422987057, | |
70 | 'upload_date': '20150203', | |
71 | 'uploader_url': 'https://www.mixcloud.com/gillespeterson/', | |
72 | 'duration': 2992, | |
73 | 'tags': [], | |
74 | 'comment_count': int, | |
75 | 'repost_count': int, | |
76 | 'like_count': int, | |
77 | }, | |
78 | 'params': {'skip_download': '404 playback error on site'}, | |
79 | }, { | |
80 | 'url': 'https://beta.mixcloud.com/RedLightRadio/nosedrip-15-red-light-radio-01-18-2016/', | |
81 | 'only_matching': True, | |
82 | }] | |
83 | _DECRYPTION_KEY = 'IFYOUWANTTHEARTISTSTOGETPAIDDONOTDOWNLOADFROMMIXCLOUD' | |
84 | ||
85 | @staticmethod | |
86 | def _decrypt_xor_cipher(key, ciphertext): | |
87 | """Encrypt/Decrypt XOR cipher. Both ways are possible because it's XOR.""" | |
88 | return ''.join([ | |
89 | chr(compat_ord(ch) ^ compat_ord(k)) | |
90 | for ch, k in zip(ciphertext, itertools.cycle(key))]) | |
91 | ||
92 | def _real_extract(self, url): | |
93 | username, slug = self._match_valid_url(url).groups() | |
94 | username, slug = compat_urllib_parse_unquote(username), compat_urllib_parse_unquote(slug) | |
95 | track_id = '%s_%s' % (username, slug) | |
96 | ||
97 | cloudcast = self._call_api('cloudcast', '''audioLength | |
98 | comments(first: 100) { | |
99 | edges { | |
100 | node { | |
101 | comment | |
102 | created | |
103 | user { | |
104 | displayName | |
105 | username | |
106 | } | |
107 | } | |
108 | } | |
109 | totalCount | |
110 | } | |
111 | description | |
112 | favorites { | |
113 | totalCount | |
114 | } | |
115 | featuringArtistList | |
116 | isExclusive | |
117 | name | |
118 | owner { | |
119 | displayName | |
120 | url | |
121 | username | |
122 | } | |
123 | picture(width: 1024, height: 1024) { | |
124 | url | |
125 | } | |
126 | plays | |
127 | publishDate | |
128 | reposts { | |
129 | totalCount | |
130 | } | |
131 | streamInfo { | |
132 | dashUrl | |
133 | hlsUrl | |
134 | url | |
135 | } | |
136 | tags { | |
137 | tag { | |
138 | name | |
139 | } | |
140 | } | |
141 | restrictedReason | |
142 | id''', track_id, username, slug) | |
143 | ||
144 | if not cloudcast: | |
145 | raise ExtractorError('Track not found', expected=True) | |
146 | ||
147 | reason = cloudcast.get('restrictedReason') | |
148 | if reason == 'tracklist': | |
149 | raise ExtractorError('Track unavailable in your country due to licensing restrictions', expected=True) | |
150 | elif reason == 'repeat_play': | |
151 | raise ExtractorError('You have reached your play limit for this track', expected=True) | |
152 | elif reason: | |
153 | raise ExtractorError('Track is restricted', expected=True) | |
154 | ||
155 | title = cloudcast['name'] | |
156 | ||
157 | stream_info = cloudcast['streamInfo'] | |
158 | formats = [] | |
159 | ||
160 | for url_key in ('url', 'hlsUrl', 'dashUrl'): | |
161 | format_url = stream_info.get(url_key) | |
162 | if not format_url: | |
163 | continue | |
164 | decrypted = self._decrypt_xor_cipher( | |
165 | self._DECRYPTION_KEY, compat_b64decode(format_url)) | |
166 | if url_key == 'hlsUrl': | |
167 | formats.extend(self._extract_m3u8_formats( | |
168 | decrypted, track_id, 'mp4', entry_protocol='m3u8_native', | |
169 | m3u8_id='hls', fatal=False)) | |
170 | elif url_key == 'dashUrl': | |
171 | formats.extend(self._extract_mpd_formats( | |
172 | decrypted, track_id, mpd_id='dash', fatal=False)) | |
173 | else: | |
174 | formats.append({ | |
175 | 'format_id': 'http', | |
176 | 'url': decrypted, | |
177 | 'vcodec': 'none', | |
178 | 'downloader_options': { | |
179 | # Mixcloud starts throttling at >~5M | |
180 | 'http_chunk_size': 5242880, | |
181 | }, | |
182 | }) | |
183 | ||
184 | if not formats and cloudcast.get('isExclusive'): | |
185 | self.raise_login_required(metadata_available=True) | |
186 | ||
187 | comments = [] | |
188 | for edge in (try_get(cloudcast, lambda x: x['comments']['edges']) or []): | |
189 | node = edge.get('node') or {} | |
190 | text = strip_or_none(node.get('comment')) | |
191 | if not text: | |
192 | continue | |
193 | user = node.get('user') or {} | |
194 | comments.append({ | |
195 | 'author': user.get('displayName'), | |
196 | 'author_id': user.get('username'), | |
197 | 'text': text, | |
198 | 'timestamp': parse_iso8601(node.get('created')), | |
199 | }) | |
200 | ||
201 | tags = [] | |
202 | for t in cloudcast.get('tags'): | |
203 | tag = try_get(t, lambda x: x['tag']['name'], compat_str) | |
204 | if not tag: | |
205 | tags.append(tag) | |
206 | ||
207 | get_count = lambda x: int_or_none(try_get(cloudcast, lambda y: y[x]['totalCount'])) | |
208 | ||
209 | owner = cloudcast.get('owner') or {} | |
210 | ||
211 | return { | |
212 | 'id': track_id, | |
213 | 'title': title, | |
214 | 'formats': formats, | |
215 | 'description': cloudcast.get('description'), | |
216 | 'thumbnail': try_get(cloudcast, lambda x: x['picture']['url'], compat_str), | |
217 | 'uploader': owner.get('displayName'), | |
218 | 'timestamp': parse_iso8601(cloudcast.get('publishDate')), | |
219 | 'uploader_id': owner.get('username'), | |
220 | 'uploader_url': owner.get('url'), | |
221 | 'duration': int_or_none(cloudcast.get('audioLength')), | |
222 | 'view_count': int_or_none(cloudcast.get('plays')), | |
223 | 'like_count': get_count('favorites'), | |
224 | 'repost_count': get_count('reposts'), | |
225 | 'comment_count': get_count('comments'), | |
226 | 'comments': comments, | |
227 | 'tags': tags, | |
228 | 'artist': ', '.join(cloudcast.get('featuringArtistList') or []) or None, | |
229 | } | |
230 | ||
231 | ||
232 | class MixcloudPlaylistBaseIE(MixcloudBaseIE): | |
233 | def _get_cloudcast(self, node): | |
234 | return node | |
235 | ||
236 | def _get_playlist_title(self, title, slug): | |
237 | return title | |
238 | ||
239 | def _real_extract(self, url): | |
240 | username, slug = self._match_valid_url(url).groups() | |
241 | username = compat_urllib_parse_unquote(username) | |
242 | if not slug: | |
243 | slug = 'uploads' | |
244 | else: | |
245 | slug = compat_urllib_parse_unquote(slug) | |
246 | playlist_id = '%s_%s' % (username, slug) | |
247 | ||
248 | is_playlist_type = self._ROOT_TYPE == 'playlist' | |
249 | playlist_type = 'items' if is_playlist_type else slug | |
250 | list_filter = '' | |
251 | ||
252 | has_next_page = True | |
253 | entries = [] | |
254 | while has_next_page: | |
255 | playlist = self._call_api( | |
256 | self._ROOT_TYPE, '''%s | |
257 | %s | |
258 | %s(first: 100%s) { | |
259 | edges { | |
260 | node { | |
261 | %s | |
262 | } | |
263 | } | |
264 | pageInfo { | |
265 | endCursor | |
266 | hasNextPage | |
267 | } | |
268 | }''' % (self._TITLE_KEY, self._DESCRIPTION_KEY, playlist_type, list_filter, self._NODE_TEMPLATE), | |
269 | playlist_id, username, slug if is_playlist_type else None) | |
270 | ||
271 | items = playlist.get(playlist_type) or {} | |
272 | for edge in items.get('edges', []): | |
273 | cloudcast = self._get_cloudcast(edge.get('node') or {}) | |
274 | cloudcast_url = cloudcast.get('url') | |
275 | if not cloudcast_url: | |
276 | continue | |
277 | item_slug = try_get(cloudcast, lambda x: x['slug'], compat_str) | |
278 | owner_username = try_get(cloudcast, lambda x: x['owner']['username'], compat_str) | |
279 | video_id = f'{owner_username}_{item_slug}' if item_slug and owner_username else None | |
280 | entries.append(self.url_result( | |
281 | cloudcast_url, MixcloudIE.ie_key(), video_id)) | |
282 | ||
283 | page_info = items['pageInfo'] | |
284 | has_next_page = page_info['hasNextPage'] | |
285 | list_filter = ', after: "%s"' % page_info['endCursor'] | |
286 | ||
287 | return self.playlist_result( | |
288 | entries, playlist_id, | |
289 | self._get_playlist_title(playlist[self._TITLE_KEY], slug), | |
290 | playlist.get(self._DESCRIPTION_KEY)) | |
291 | ||
292 | ||
293 | class MixcloudUserIE(MixcloudPlaylistBaseIE): | |
294 | _VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<id>[^/]+)/(?P<type>uploads|favorites|listens|stream)?/?$' | |
295 | IE_NAME = 'mixcloud:user' | |
296 | ||
297 | _TESTS = [{ | |
298 | 'url': 'http://www.mixcloud.com/dholbach/', | |
299 | 'info_dict': { | |
300 | 'id': 'dholbach_uploads', | |
301 | 'title': 'Daniel Holbach (uploads)', | |
302 | 'description': 'md5:a3f468a60ac8c3e1f8616380fc469b2b', | |
303 | }, | |
304 | 'playlist_mincount': 36, | |
305 | }, { | |
306 | 'url': 'http://www.mixcloud.com/dholbach/uploads/', | |
307 | 'info_dict': { | |
308 | 'id': 'dholbach_uploads', | |
309 | 'title': 'Daniel Holbach (uploads)', | |
310 | 'description': 'md5:a3f468a60ac8c3e1f8616380fc469b2b', | |
311 | }, | |
312 | 'playlist_mincount': 36, | |
313 | }, { | |
314 | 'url': 'http://www.mixcloud.com/dholbach/favorites/', | |
315 | 'info_dict': { | |
316 | 'id': 'dholbach_favorites', | |
317 | 'title': 'Daniel Holbach (favorites)', | |
318 | 'description': 'md5:a3f468a60ac8c3e1f8616380fc469b2b', | |
319 | }, | |
320 | # 'params': { | |
321 | # 'playlist_items': '1-100', | |
322 | # }, | |
323 | 'playlist_mincount': 396, | |
324 | }, { | |
325 | 'url': 'http://www.mixcloud.com/dholbach/listens/', | |
326 | 'info_dict': { | |
327 | 'id': 'dholbach_listens', | |
328 | 'title': 'Daniel Holbach (listens)', | |
329 | 'description': 'md5:b60d776f0bab534c5dabe0a34e47a789', | |
330 | }, | |
331 | # 'params': { | |
332 | # 'playlist_items': '1-100', | |
333 | # }, | |
334 | 'playlist_mincount': 1623, | |
335 | 'skip': 'Large list', | |
336 | }, { | |
337 | 'url': 'https://www.mixcloud.com/FirstEar/stream/', | |
338 | 'info_dict': { | |
339 | 'id': 'FirstEar_stream', | |
340 | 'title': 'First Ear (stream)', | |
341 | 'description': 'we maraud for ears', | |
342 | }, | |
343 | 'playlist_mincount': 269, | |
344 | }] | |
345 | ||
346 | _TITLE_KEY = 'displayName' | |
347 | _DESCRIPTION_KEY = 'biog' | |
348 | _ROOT_TYPE = 'user' | |
349 | _NODE_TEMPLATE = '''slug | |
350 | url | |
351 | owner { username }''' | |
352 | ||
353 | def _get_playlist_title(self, title, slug): | |
354 | return '%s (%s)' % (title, slug) | |
355 | ||
356 | ||
357 | class MixcloudPlaylistIE(MixcloudPlaylistBaseIE): | |
358 | _VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/playlists/(?P<playlist>[^/]+)/?$' | |
359 | IE_NAME = 'mixcloud:playlist' | |
360 | ||
361 | _TESTS = [{ | |
362 | 'url': 'https://www.mixcloud.com/maxvibes/playlists/jazzcat-on-ness-radio/', | |
363 | 'info_dict': { | |
364 | 'id': 'maxvibes_jazzcat-on-ness-radio', | |
365 | 'title': 'Ness Radio sessions', | |
366 | }, | |
367 | 'playlist_mincount': 59, | |
368 | }] | |
369 | _TITLE_KEY = 'name' | |
370 | _DESCRIPTION_KEY = 'description' | |
371 | _ROOT_TYPE = 'playlist' | |
372 | _NODE_TEMPLATE = '''cloudcast { | |
373 | slug | |
374 | url | |
375 | owner { username } | |
376 | }''' | |
377 | ||
378 | def _get_cloudcast(self, node): | |
379 | return node.get('cloudcast') or {} |