]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/mixcloud.py
[compat] Fix `compat.WINDOWS_VT_MODE`
[yt-dlp.git] / yt_dlp / extractor / mixcloud.py
CommitLineData
e6da9240 1import itertools
80cbb6dd
PH
2
3from .common import InfoExtractor
c96eca42 4from ..compat import (
5d7d805c 5 compat_b64decode,
dd91dfcd
YCH
6 compat_chr,
7 compat_ord,
095774e5 8 compat_str,
c96eca42 9 compat_urllib_parse_unquote,
c96eca42 10)
1cc79574 11from ..utils import (
9040e2d6 12 ExtractorError,
095774e5 13 int_or_none,
5d92b407
RA
14 parse_iso8601,
15 strip_or_none,
095774e5 16 try_get,
095774e5 17)
80cbb6dd
PH
18
19
5d92b407
RA
20class MixcloudBaseIE(InfoExtractor):
21 def _call_api(self, object_type, object_fields, display_id, username, slug=None):
22 lookup_key = object_type + 'Lookup'
23 return self._download_json(
24 'https://www.mixcloud.com/graphql', display_id, query={
25 'query': '''{
26 %s(lookup: {username: "%s"%s}) {
27 %s
28 }
29}''' % (lookup_key, username, ', slug: "%s"' % slug if slug else '', object_fields)
30 })['data'][lookup_key]
31
32
33class MixcloudIE(MixcloudBaseIE):
655cb545 34 _VALID_URL = r'https?://(?:(?:www|beta|m)\.)?mixcloud\.com/([^/]+)/(?!stream|uploads|favorites|listens|playlists)([^/]+)'
d0390a0c 35 IE_NAME = 'mixcloud'
80cbb6dd 36
58ba6c01 37 _TESTS = [{
d0390a0c 38 'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/',
d0390a0c 39 'info_dict': {
5d92b407 40 'id': 'dholbach_cryptkeeper',
f896e1cc 41 'ext': 'm4a',
d0390a0c
PH
42 'title': 'Cryptkeeper',
43 'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
44 'uploader': 'Daniel Holbach',
45 'uploader_id': 'dholbach',
ec85ded8 46 'thumbnail': r're:https?://.*\.jpg',
57c7411f 47 'view_count': int,
5d92b407
RA
48 'timestamp': 1321359578,
49 'upload_date': '20111115',
19e1d359 50 },
58ba6c01
S
51 }, {
52 'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/',
53 'info_dict': {
5d92b407 54 'id': 'gillespeterson_caribou-7-inch-vinyl-mix-chat',
7a757b71
JMF
55 'ext': 'mp3',
56 'title': 'Caribou 7 inch Vinyl Mix & Chat',
58ba6c01 57 'description': 'md5:2b8aec6adce69f9d41724647c65875e8',
7a757b71 58 'uploader': 'Gilles Peterson Worldwide',
58ba6c01 59 'uploader_id': 'gillespeterson',
dd91dfcd 60 'thumbnail': 're:https?://.*',
58ba6c01 61 'view_count': int,
5d92b407
RA
62 'timestamp': 1422987057,
63 'upload_date': '20150203',
58ba6c01 64 },
655cb545
S
65 }, {
66 'url': 'https://beta.mixcloud.com/RedLightRadio/nosedrip-15-red-light-radio-01-18-2016/',
67 'only_matching': True,
58ba6c01 68 }]
5d92b407 69 _DECRYPTION_KEY = 'IFYOUWANTTHEARTISTSTOGETPAIDDONOTDOWNLOADFROMMIXCLOUD'
80cbb6dd 70
2384f5a6
TI
71 @staticmethod
72 def _decrypt_xor_cipher(key, ciphertext):
73 """Encrypt/Decrypt XOR cipher. Both ways are possible because it's XOR."""
74 return ''.join([
75 compat_chr(compat_ord(ch) ^ compat_ord(k))
f9934b96 76 for ch, k in zip(ciphertext, itertools.cycle(key))])
2384f5a6 77
80cbb6dd 78 def _real_extract(self, url):
5ad28e7f 79 username, slug = self._match_valid_url(url).groups()
5d92b407
RA
80 username, slug = compat_urllib_parse_unquote(username), compat_urllib_parse_unquote(slug)
81 track_id = '%s_%s' % (username, slug)
82
83 cloudcast = self._call_api('cloudcast', '''audioLength
84 comments(first: 100) {
85 edges {
86 node {
87 comment
88 created
89 user {
90 displayName
91 username
92 }
93 }
94 }
95 totalCount
96 }
97 description
98 favorites {
99 totalCount
100 }
101 featuringArtistList
102 isExclusive
103 name
104 owner {
105 displayName
106 url
107 username
108 }
109 picture(width: 1024, height: 1024) {
110 url
111 }
112 plays
113 publishDate
114 reposts {
115 totalCount
116 }
117 streamInfo {
118 dashUrl
119 hlsUrl
120 url
121 }
122 tags {
123 tag {
124 name
125 }
9040e2d6
L
126 }
127 restrictedReason
128 id''', track_id, username, slug)
129
130 if not cloudcast:
131 raise ExtractorError('Track not found', expected=True)
132
133 reason = cloudcast.get('restrictedReason')
134 if reason == 'tracklist':
135 raise ExtractorError('Track unavailable in your country due to licensing restrictions', expected=True)
136 elif reason == 'repeat_play':
137 raise ExtractorError('You have reached your play limit for this track', expected=True)
138 elif reason:
139 raise ExtractorError('Track is restricted', expected=True)
dd2535c3 140
5d92b407 141 title = cloudcast['name']
19e1d359 142
5d92b407
RA
143 stream_info = cloudcast['streamInfo']
144 formats = []
2384f5a6 145
5d92b407
RA
146 for url_key in ('url', 'hlsUrl', 'dashUrl'):
147 format_url = stream_info.get(url_key)
148 if not format_url:
149 continue
150 decrypted = self._decrypt_xor_cipher(
151 self._DECRYPTION_KEY, compat_b64decode(format_url))
152 if url_key == 'hlsUrl':
153 formats.extend(self._extract_m3u8_formats(
154 decrypted, track_id, 'mp4', entry_protocol='m3u8_native',
155 m3u8_id='hls', fatal=False))
156 elif url_key == 'dashUrl':
157 formats.extend(self._extract_mpd_formats(
158 decrypted, track_id, mpd_id='dash', fatal=False))
2384f5a6 159 else:
5d92b407
RA
160 formats.append({
161 'format_id': 'http',
162 'url': decrypted,
163 'downloader_options': {
164 # Mixcloud starts throttling at >~5M
165 'http_chunk_size': 5242880,
166 },
167 })
168
169 if not formats and cloudcast.get('isExclusive'):
b7da73eb 170 self.raise_login_required(metadata_available=True)
5d92b407
RA
171
172 self._sort_formats(formats)
173
174 comments = []
175 for edge in (try_get(cloudcast, lambda x: x['comments']['edges']) or []):
176 node = edge.get('node') or {}
177 text = strip_or_none(node.get('comment'))
178 if not text:
2384f5a6 179 continue
5d92b407
RA
180 user = node.get('user') or {}
181 comments.append({
182 'author': user.get('displayName'),
183 'author_id': user.get('username'),
184 'text': text,
185 'timestamp': parse_iso8601(node.get('created')),
186 })
2384f5a6 187
5d92b407
RA
188 tags = []
189 for t in cloudcast.get('tags'):
190 tag = try_get(t, lambda x: x['tag']['name'], compat_str)
191 if not tag:
192 tags.append(tag)
193
194 get_count = lambda x: int_or_none(try_get(cloudcast, lambda y: y[x]['totalCount']))
195
196 owner = cloudcast.get('owner') or {}
19e1d359
JMF
197
198 return {
199 'id': track_id,
57c7411f 200 'title': title,
2384f5a6 201 'formats': formats,
5d92b407
RA
202 'description': cloudcast.get('description'),
203 'thumbnail': try_get(cloudcast, lambda x: x['picture']['url'], compat_str),
204 'uploader': owner.get('displayName'),
205 'timestamp': parse_iso8601(cloudcast.get('publishDate')),
206 'uploader_id': owner.get('username'),
207 'uploader_url': owner.get('url'),
208 'duration': int_or_none(cloudcast.get('audioLength')),
209 'view_count': int_or_none(cloudcast.get('plays')),
210 'like_count': get_count('favorites'),
211 'repost_count': get_count('reposts'),
212 'comment_count': get_count('comments'),
213 'comments': comments,
214 'tags': tags,
215 'artist': ', '.join(cloudcast.get('featuringArtistList') or []) or None,
19e1d359 216 }
c96eca42
PH
217
218
5d92b407
RA
219class MixcloudPlaylistBaseIE(MixcloudBaseIE):
220 def _get_cloudcast(self, node):
221 return node
c96eca42 222
5d92b407
RA
223 def _get_playlist_title(self, title, slug):
224 return title
225
226 def _real_extract(self, url):
5ad28e7f 227 username, slug = self._match_valid_url(url).groups()
5d92b407
RA
228 username = compat_urllib_parse_unquote(username)
229 if not slug:
230 slug = 'uploads'
231 else:
232 slug = compat_urllib_parse_unquote(slug)
233 playlist_id = '%s_%s' % (username, slug)
e6da9240 234
5d92b407
RA
235 is_playlist_type = self._ROOT_TYPE == 'playlist'
236 playlist_type = 'items' if is_playlist_type else slug
237 list_filter = ''
9c250931 238
5d92b407
RA
239 has_next_page = True
240 entries = []
241 while has_next_page:
242 playlist = self._call_api(
243 self._ROOT_TYPE, '''%s
244 %s
245 %s(first: 100%s) {
246 edges {
247 node {
248 %s
249 }
250 }
251 pageInfo {
252 endCursor
253 hasNextPage
254 }
255 }''' % (self._TITLE_KEY, self._DESCRIPTION_KEY, playlist_type, list_filter, self._NODE_TEMPLATE),
256 playlist_id, username, slug if is_playlist_type else None)
257
258 items = playlist.get(playlist_type) or {}
259 for edge in items.get('edges', []):
260 cloudcast = self._get_cloudcast(edge.get('node') or {})
261 cloudcast_url = cloudcast.get('url')
262 if not cloudcast_url:
263 continue
30a074c2 264 slug = try_get(cloudcast, lambda x: x['slug'], compat_str)
265 owner_username = try_get(cloudcast, lambda x: x['owner']['username'], compat_str)
266 video_id = '%s_%s' % (owner_username, slug) if slug and owner_username else None
5d92b407 267 entries.append(self.url_result(
30a074c2 268 cloudcast_url, MixcloudIE.ie_key(), video_id))
e6da9240 269
5d92b407
RA
270 page_info = items['pageInfo']
271 has_next_page = page_info['hasNextPage']
272 list_filter = ', after: "%s"' % page_info['endCursor']
9c250931 273
5d92b407
RA
274 return self.playlist_result(
275 entries, playlist_id,
276 self._get_playlist_title(playlist[self._TITLE_KEY], slug),
277 playlist.get(self._DESCRIPTION_KEY))
9c250931
YCH
278
279
280class MixcloudUserIE(MixcloudPlaylistBaseIE):
5d92b407 281 _VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<id>[^/]+)/(?P<type>uploads|favorites|listens|stream)?/?$'
c96eca42
PH
282 IE_NAME = 'mixcloud:user'
283
284 _TESTS = [{
285 'url': 'http://www.mixcloud.com/dholbach/',
286 'info_dict': {
9c250931 287 'id': 'dholbach_uploads',
c96eca42 288 'title': 'Daniel Holbach (uploads)',
5d92b407 289 'description': 'md5:b60d776f0bab534c5dabe0a34e47a789',
c96eca42 290 },
5d92b407 291 'playlist_mincount': 36,
c96eca42
PH
292 }, {
293 'url': 'http://www.mixcloud.com/dholbach/uploads/',
294 'info_dict': {
9c250931 295 'id': 'dholbach_uploads',
c96eca42 296 'title': 'Daniel Holbach (uploads)',
5d92b407 297 'description': 'md5:b60d776f0bab534c5dabe0a34e47a789',
c96eca42 298 },
5d92b407 299 'playlist_mincount': 36,
c96eca42
PH
300 }, {
301 'url': 'http://www.mixcloud.com/dholbach/favorites/',
302 'info_dict': {
9c250931 303 'id': 'dholbach_favorites',
c96eca42 304 'title': 'Daniel Holbach (favorites)',
5d92b407 305 'description': 'md5:b60d776f0bab534c5dabe0a34e47a789',
9c250931 306 },
5d92b407
RA
307 # 'params': {
308 # 'playlist_items': '1-100',
309 # },
310 'playlist_mincount': 396,
c96eca42
PH
311 }, {
312 'url': 'http://www.mixcloud.com/dholbach/listens/',
313 'info_dict': {
9c250931 314 'id': 'dholbach_listens',
c96eca42 315 'title': 'Daniel Holbach (listens)',
5d92b407 316 'description': 'md5:b60d776f0bab534c5dabe0a34e47a789',
c96eca42 317 },
5d92b407
RA
318 # 'params': {
319 # 'playlist_items': '1-100',
320 # },
321 'playlist_mincount': 1623,
322 'skip': 'Large list',
323 }, {
324 'url': 'https://www.mixcloud.com/FirstEar/stream/',
325 'info_dict': {
326 'id': 'FirstEar_stream',
327 'title': 'First Ear (stream)',
328 'description': 'Curators of good music\r\n\r\nfirstearmusic.com',
9c250931 329 },
5d92b407 330 'playlist_mincount': 271,
c96eca42
PH
331 }]
332
5d92b407
RA
333 _TITLE_KEY = 'displayName'
334 _DESCRIPTION_KEY = 'biog'
335 _ROOT_TYPE = 'user'
336 _NODE_TEMPLATE = '''slug
30a074c2 337 url
338 owner { username }'''
c96eca42 339
5d92b407
RA
340 def _get_playlist_title(self, title, slug):
341 return '%s (%s)' % (title, slug)
c96eca42 342
c96eca42 343
9c250931 344class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
29c67266 345 _VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/playlists/(?P<playlist>[^/]+)/?$'
c96eca42
PH
346 IE_NAME = 'mixcloud:playlist'
347
348 _TESTS = [{
c96eca42 349 'url': 'https://www.mixcloud.com/maxvibes/playlists/jazzcat-on-ness-radio/',
e6da9240 350 'info_dict': {
5d92b407
RA
351 'id': 'maxvibes_jazzcat-on-ness-radio',
352 'title': 'Ness Radio sessions',
e6da9240 353 },
5d92b407
RA
354 'playlist_mincount': 59,
355 }]
356 _TITLE_KEY = 'name'
357 _DESCRIPTION_KEY = 'description'
358 _ROOT_TYPE = 'playlist'
359 _NODE_TEMPLATE = '''cloudcast {
360 slug
361 url
30a074c2 362 owner { username }
5d92b407
RA
363 }'''
364
365 def _get_cloudcast(self, node):
366 return node.get('cloudcast') or {}