]>
Commit | Line | Data |
---|---|---|
d0390a0c PH |
1 | from __future__ import unicode_literals |
2 | ||
e6da9240 | 3 | import itertools |
80cbb6dd PH |
4 | |
5 | from .common import InfoExtractor | |
c96eca42 | 6 | from ..compat import ( |
5d7d805c | 7 | compat_b64decode, |
dd91dfcd YCH |
8 | compat_chr, |
9 | compat_ord, | |
095774e5 | 10 | compat_str, |
c96eca42 | 11 | compat_urllib_parse_unquote, |
2384f5a6 | 12 | compat_zip |
c96eca42 | 13 | ) |
1cc79574 | 14 | from ..utils import ( |
095774e5 | 15 | int_or_none, |
5d92b407 RA |
16 | parse_iso8601, |
17 | strip_or_none, | |
095774e5 | 18 | try_get, |
095774e5 | 19 | ) |
80cbb6dd PH |
20 | |
21 | ||
5d92b407 RA |
22 | class MixcloudBaseIE(InfoExtractor): |
23 | def _call_api(self, object_type, object_fields, display_id, username, slug=None): | |
24 | lookup_key = object_type + 'Lookup' | |
25 | return self._download_json( | |
26 | 'https://www.mixcloud.com/graphql', display_id, query={ | |
27 | 'query': '''{ | |
28 | %s(lookup: {username: "%s"%s}) { | |
29 | %s | |
30 | } | |
31 | }''' % (lookup_key, username, ', slug: "%s"' % slug if slug else '', object_fields) | |
32 | })['data'][lookup_key] | |
33 | ||
34 | ||
35 | class MixcloudIE(MixcloudBaseIE): | |
655cb545 | 36 | _VALID_URL = r'https?://(?:(?:www|beta|m)\.)?mixcloud\.com/([^/]+)/(?!stream|uploads|favorites|listens|playlists)([^/]+)' |
d0390a0c | 37 | IE_NAME = 'mixcloud' |
80cbb6dd | 38 | |
58ba6c01 | 39 | _TESTS = [{ |
d0390a0c | 40 | 'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/', |
d0390a0c | 41 | 'info_dict': { |
5d92b407 | 42 | 'id': 'dholbach_cryptkeeper', |
f896e1cc | 43 | 'ext': 'm4a', |
d0390a0c PH |
44 | 'title': 'Cryptkeeper', |
45 | 'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.', | |
46 | 'uploader': 'Daniel Holbach', | |
47 | 'uploader_id': 'dholbach', | |
ec85ded8 | 48 | 'thumbnail': r're:https?://.*\.jpg', |
57c7411f | 49 | 'view_count': int, |
5d92b407 RA |
50 | 'timestamp': 1321359578, |
51 | 'upload_date': '20111115', | |
19e1d359 | 52 | }, |
58ba6c01 S |
53 | }, { |
54 | 'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/', | |
55 | 'info_dict': { | |
5d92b407 | 56 | 'id': 'gillespeterson_caribou-7-inch-vinyl-mix-chat', |
7a757b71 JMF |
57 | 'ext': 'mp3', |
58 | 'title': 'Caribou 7 inch Vinyl Mix & Chat', | |
58ba6c01 | 59 | 'description': 'md5:2b8aec6adce69f9d41724647c65875e8', |
7a757b71 | 60 | 'uploader': 'Gilles Peterson Worldwide', |
58ba6c01 | 61 | 'uploader_id': 'gillespeterson', |
dd91dfcd | 62 | 'thumbnail': 're:https?://.*', |
58ba6c01 | 63 | 'view_count': int, |
5d92b407 RA |
64 | 'timestamp': 1422987057, |
65 | 'upload_date': '20150203', | |
58ba6c01 | 66 | }, |
655cb545 S |
67 | }, { |
68 | 'url': 'https://beta.mixcloud.com/RedLightRadio/nosedrip-15-red-light-radio-01-18-2016/', | |
69 | 'only_matching': True, | |
58ba6c01 | 70 | }] |
5d92b407 | 71 | _DECRYPTION_KEY = 'IFYOUWANTTHEARTISTSTOGETPAIDDONOTDOWNLOADFROMMIXCLOUD' |
80cbb6dd | 72 | |
2384f5a6 TI |
73 | @staticmethod |
74 | def _decrypt_xor_cipher(key, ciphertext): | |
75 | """Encrypt/Decrypt XOR cipher. Both ways are possible because it's XOR.""" | |
76 | return ''.join([ | |
77 | compat_chr(compat_ord(ch) ^ compat_ord(k)) | |
78 | for ch, k in compat_zip(ciphertext, itertools.cycle(key))]) | |
79 | ||
80cbb6dd | 80 | def _real_extract(self, url): |
5ad28e7f | 81 | username, slug = self._match_valid_url(url).groups() |
5d92b407 RA |
82 | username, slug = compat_urllib_parse_unquote(username), compat_urllib_parse_unquote(slug) |
83 | track_id = '%s_%s' % (username, slug) | |
84 | ||
85 | cloudcast = self._call_api('cloudcast', '''audioLength | |
86 | comments(first: 100) { | |
87 | edges { | |
88 | node { | |
89 | comment | |
90 | created | |
91 | user { | |
92 | displayName | |
93 | username | |
94 | } | |
95 | } | |
96 | } | |
97 | totalCount | |
98 | } | |
99 | description | |
100 | favorites { | |
101 | totalCount | |
102 | } | |
103 | featuringArtistList | |
104 | isExclusive | |
105 | name | |
106 | owner { | |
107 | displayName | |
108 | url | |
109 | username | |
110 | } | |
111 | picture(width: 1024, height: 1024) { | |
112 | url | |
113 | } | |
114 | plays | |
115 | publishDate | |
116 | reposts { | |
117 | totalCount | |
118 | } | |
119 | streamInfo { | |
120 | dashUrl | |
121 | hlsUrl | |
122 | url | |
123 | } | |
124 | tags { | |
125 | tag { | |
126 | name | |
127 | } | |
128 | }''', track_id, username, slug) | |
dd2535c3 | 129 | |
5d92b407 | 130 | title = cloudcast['name'] |
19e1d359 | 131 | |
5d92b407 RA |
132 | stream_info = cloudcast['streamInfo'] |
133 | formats = [] | |
2384f5a6 | 134 | |
5d92b407 RA |
135 | for url_key in ('url', 'hlsUrl', 'dashUrl'): |
136 | format_url = stream_info.get(url_key) | |
137 | if not format_url: | |
138 | continue | |
139 | decrypted = self._decrypt_xor_cipher( | |
140 | self._DECRYPTION_KEY, compat_b64decode(format_url)) | |
141 | if url_key == 'hlsUrl': | |
142 | formats.extend(self._extract_m3u8_formats( | |
143 | decrypted, track_id, 'mp4', entry_protocol='m3u8_native', | |
144 | m3u8_id='hls', fatal=False)) | |
145 | elif url_key == 'dashUrl': | |
146 | formats.extend(self._extract_mpd_formats( | |
147 | decrypted, track_id, mpd_id='dash', fatal=False)) | |
2384f5a6 | 148 | else: |
5d92b407 RA |
149 | formats.append({ |
150 | 'format_id': 'http', | |
151 | 'url': decrypted, | |
152 | 'downloader_options': { | |
153 | # Mixcloud starts throttling at >~5M | |
154 | 'http_chunk_size': 5242880, | |
155 | }, | |
156 | }) | |
157 | ||
158 | if not formats and cloudcast.get('isExclusive'): | |
b7da73eb | 159 | self.raise_login_required(metadata_available=True) |
5d92b407 RA |
160 | |
161 | self._sort_formats(formats) | |
162 | ||
163 | comments = [] | |
164 | for edge in (try_get(cloudcast, lambda x: x['comments']['edges']) or []): | |
165 | node = edge.get('node') or {} | |
166 | text = strip_or_none(node.get('comment')) | |
167 | if not text: | |
2384f5a6 | 168 | continue |
5d92b407 RA |
169 | user = node.get('user') or {} |
170 | comments.append({ | |
171 | 'author': user.get('displayName'), | |
172 | 'author_id': user.get('username'), | |
173 | 'text': text, | |
174 | 'timestamp': parse_iso8601(node.get('created')), | |
175 | }) | |
2384f5a6 | 176 | |
5d92b407 RA |
177 | tags = [] |
178 | for t in cloudcast.get('tags'): | |
179 | tag = try_get(t, lambda x: x['tag']['name'], compat_str) | |
180 | if not tag: | |
181 | tags.append(tag) | |
182 | ||
183 | get_count = lambda x: int_or_none(try_get(cloudcast, lambda y: y[x]['totalCount'])) | |
184 | ||
185 | owner = cloudcast.get('owner') or {} | |
19e1d359 JMF |
186 | |
187 | return { | |
188 | 'id': track_id, | |
57c7411f | 189 | 'title': title, |
2384f5a6 | 190 | 'formats': formats, |
5d92b407 RA |
191 | 'description': cloudcast.get('description'), |
192 | 'thumbnail': try_get(cloudcast, lambda x: x['picture']['url'], compat_str), | |
193 | 'uploader': owner.get('displayName'), | |
194 | 'timestamp': parse_iso8601(cloudcast.get('publishDate')), | |
195 | 'uploader_id': owner.get('username'), | |
196 | 'uploader_url': owner.get('url'), | |
197 | 'duration': int_or_none(cloudcast.get('audioLength')), | |
198 | 'view_count': int_or_none(cloudcast.get('plays')), | |
199 | 'like_count': get_count('favorites'), | |
200 | 'repost_count': get_count('reposts'), | |
201 | 'comment_count': get_count('comments'), | |
202 | 'comments': comments, | |
203 | 'tags': tags, | |
204 | 'artist': ', '.join(cloudcast.get('featuringArtistList') or []) or None, | |
19e1d359 | 205 | } |
c96eca42 PH |
206 | |
207 | ||
5d92b407 RA |
208 | class MixcloudPlaylistBaseIE(MixcloudBaseIE): |
209 | def _get_cloudcast(self, node): | |
210 | return node | |
c96eca42 | 211 | |
5d92b407 RA |
212 | def _get_playlist_title(self, title, slug): |
213 | return title | |
214 | ||
215 | def _real_extract(self, url): | |
5ad28e7f | 216 | username, slug = self._match_valid_url(url).groups() |
5d92b407 RA |
217 | username = compat_urllib_parse_unquote(username) |
218 | if not slug: | |
219 | slug = 'uploads' | |
220 | else: | |
221 | slug = compat_urllib_parse_unquote(slug) | |
222 | playlist_id = '%s_%s' % (username, slug) | |
e6da9240 | 223 | |
5d92b407 RA |
224 | is_playlist_type = self._ROOT_TYPE == 'playlist' |
225 | playlist_type = 'items' if is_playlist_type else slug | |
226 | list_filter = '' | |
9c250931 | 227 | |
5d92b407 RA |
228 | has_next_page = True |
229 | entries = [] | |
230 | while has_next_page: | |
231 | playlist = self._call_api( | |
232 | self._ROOT_TYPE, '''%s | |
233 | %s | |
234 | %s(first: 100%s) { | |
235 | edges { | |
236 | node { | |
237 | %s | |
238 | } | |
239 | } | |
240 | pageInfo { | |
241 | endCursor | |
242 | hasNextPage | |
243 | } | |
244 | }''' % (self._TITLE_KEY, self._DESCRIPTION_KEY, playlist_type, list_filter, self._NODE_TEMPLATE), | |
245 | playlist_id, username, slug if is_playlist_type else None) | |
246 | ||
247 | items = playlist.get(playlist_type) or {} | |
248 | for edge in items.get('edges', []): | |
249 | cloudcast = self._get_cloudcast(edge.get('node') or {}) | |
250 | cloudcast_url = cloudcast.get('url') | |
251 | if not cloudcast_url: | |
252 | continue | |
30a074c2 | 253 | slug = try_get(cloudcast, lambda x: x['slug'], compat_str) |
254 | owner_username = try_get(cloudcast, lambda x: x['owner']['username'], compat_str) | |
255 | video_id = '%s_%s' % (owner_username, slug) if slug and owner_username else None | |
5d92b407 | 256 | entries.append(self.url_result( |
30a074c2 | 257 | cloudcast_url, MixcloudIE.ie_key(), video_id)) |
e6da9240 | 258 | |
5d92b407 RA |
259 | page_info = items['pageInfo'] |
260 | has_next_page = page_info['hasNextPage'] | |
261 | list_filter = ', after: "%s"' % page_info['endCursor'] | |
9c250931 | 262 | |
5d92b407 RA |
263 | return self.playlist_result( |
264 | entries, playlist_id, | |
265 | self._get_playlist_title(playlist[self._TITLE_KEY], slug), | |
266 | playlist.get(self._DESCRIPTION_KEY)) | |
9c250931 YCH |
267 | |
268 | ||
269 | class MixcloudUserIE(MixcloudPlaylistBaseIE): | |
5d92b407 | 270 | _VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<id>[^/]+)/(?P<type>uploads|favorites|listens|stream)?/?$' |
c96eca42 PH |
271 | IE_NAME = 'mixcloud:user' |
272 | ||
273 | _TESTS = [{ | |
274 | 'url': 'http://www.mixcloud.com/dholbach/', | |
275 | 'info_dict': { | |
9c250931 | 276 | 'id': 'dholbach_uploads', |
c96eca42 | 277 | 'title': 'Daniel Holbach (uploads)', |
5d92b407 | 278 | 'description': 'md5:b60d776f0bab534c5dabe0a34e47a789', |
c96eca42 | 279 | }, |
5d92b407 | 280 | 'playlist_mincount': 36, |
c96eca42 PH |
281 | }, { |
282 | 'url': 'http://www.mixcloud.com/dholbach/uploads/', | |
283 | 'info_dict': { | |
9c250931 | 284 | 'id': 'dholbach_uploads', |
c96eca42 | 285 | 'title': 'Daniel Holbach (uploads)', |
5d92b407 | 286 | 'description': 'md5:b60d776f0bab534c5dabe0a34e47a789', |
c96eca42 | 287 | }, |
5d92b407 | 288 | 'playlist_mincount': 36, |
c96eca42 PH |
289 | }, { |
290 | 'url': 'http://www.mixcloud.com/dholbach/favorites/', | |
291 | 'info_dict': { | |
9c250931 | 292 | 'id': 'dholbach_favorites', |
c96eca42 | 293 | 'title': 'Daniel Holbach (favorites)', |
5d92b407 | 294 | 'description': 'md5:b60d776f0bab534c5dabe0a34e47a789', |
9c250931 | 295 | }, |
5d92b407 RA |
296 | # 'params': { |
297 | # 'playlist_items': '1-100', | |
298 | # }, | |
299 | 'playlist_mincount': 396, | |
c96eca42 PH |
300 | }, { |
301 | 'url': 'http://www.mixcloud.com/dholbach/listens/', | |
302 | 'info_dict': { | |
9c250931 | 303 | 'id': 'dholbach_listens', |
c96eca42 | 304 | 'title': 'Daniel Holbach (listens)', |
5d92b407 | 305 | 'description': 'md5:b60d776f0bab534c5dabe0a34e47a789', |
c96eca42 | 306 | }, |
5d92b407 RA |
307 | # 'params': { |
308 | # 'playlist_items': '1-100', | |
309 | # }, | |
310 | 'playlist_mincount': 1623, | |
311 | 'skip': 'Large list', | |
312 | }, { | |
313 | 'url': 'https://www.mixcloud.com/FirstEar/stream/', | |
314 | 'info_dict': { | |
315 | 'id': 'FirstEar_stream', | |
316 | 'title': 'First Ear (stream)', | |
317 | 'description': 'Curators of good music\r\n\r\nfirstearmusic.com', | |
9c250931 | 318 | }, |
5d92b407 | 319 | 'playlist_mincount': 271, |
c96eca42 PH |
320 | }] |
321 | ||
5d92b407 RA |
322 | _TITLE_KEY = 'displayName' |
323 | _DESCRIPTION_KEY = 'biog' | |
324 | _ROOT_TYPE = 'user' | |
325 | _NODE_TEMPLATE = '''slug | |
30a074c2 | 326 | url |
327 | owner { username }''' | |
c96eca42 | 328 | |
5d92b407 RA |
329 | def _get_playlist_title(self, title, slug): |
330 | return '%s (%s)' % (title, slug) | |
c96eca42 | 331 | |
c96eca42 | 332 | |
9c250931 | 333 | class MixcloudPlaylistIE(MixcloudPlaylistBaseIE): |
29c67266 | 334 | _VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/playlists/(?P<playlist>[^/]+)/?$' |
c96eca42 PH |
335 | IE_NAME = 'mixcloud:playlist' |
336 | ||
337 | _TESTS = [{ | |
c96eca42 | 338 | 'url': 'https://www.mixcloud.com/maxvibes/playlists/jazzcat-on-ness-radio/', |
e6da9240 | 339 | 'info_dict': { |
5d92b407 RA |
340 | 'id': 'maxvibes_jazzcat-on-ness-radio', |
341 | 'title': 'Ness Radio sessions', | |
e6da9240 | 342 | }, |
5d92b407 RA |
343 | 'playlist_mincount': 59, |
344 | }] | |
345 | _TITLE_KEY = 'name' | |
346 | _DESCRIPTION_KEY = 'description' | |
347 | _ROOT_TYPE = 'playlist' | |
348 | _NODE_TEMPLATE = '''cloudcast { | |
349 | slug | |
350 | url | |
30a074c2 | 351 | owner { username } |
5d92b407 RA |
352 | }''' |
353 | ||
354 | def _get_cloudcast(self, node): | |
355 | return node.get('cloudcast') or {} |