]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/mixcloud.py
Allow extractors to specify section_start/end for clips
[yt-dlp.git] / yt_dlp / extractor / mixcloud.py
1 import itertools
2
3 from .common import InfoExtractor
4 from ..compat import (
5 compat_b64decode,
6 compat_chr,
7 compat_ord,
8 compat_str,
9 compat_urllib_parse_unquote,
10 )
11 from ..utils import (
12 ExtractorError,
13 int_or_none,
14 parse_iso8601,
15 strip_or_none,
16 try_get,
17 )
18
19
20 class MixcloudBaseIE(InfoExtractor):
21 def _call_api(self, object_type, object_fields, display_id, username, slug=None):
22 lookup_key = object_type + 'Lookup'
23 return self._download_json(
24 'https://www.mixcloud.com/graphql', display_id, query={
25 'query': '''{
26 %s(lookup: {username: "%s"%s}) {
27 %s
28 }
29 }''' % (lookup_key, username, ', slug: "%s"' % slug if slug else '', object_fields)
30 })['data'][lookup_key]
31
32
33 class MixcloudIE(MixcloudBaseIE):
34 _VALID_URL = r'https?://(?:(?:www|beta|m)\.)?mixcloud\.com/([^/]+)/(?!stream|uploads|favorites|listens|playlists)([^/]+)'
35 IE_NAME = 'mixcloud'
36
37 _TESTS = [{
38 'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/',
39 'info_dict': {
40 'id': 'dholbach_cryptkeeper',
41 'ext': 'm4a',
42 'title': 'Cryptkeeper',
43 'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
44 'uploader': 'Daniel Holbach',
45 'uploader_id': 'dholbach',
46 'thumbnail': r're:https?://.*\.jpg',
47 'view_count': int,
48 'timestamp': 1321359578,
49 'upload_date': '20111115',
50 },
51 }, {
52 'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/',
53 'info_dict': {
54 'id': 'gillespeterson_caribou-7-inch-vinyl-mix-chat',
55 'ext': 'mp3',
56 'title': 'Caribou 7 inch Vinyl Mix & Chat',
57 'description': 'md5:2b8aec6adce69f9d41724647c65875e8',
58 'uploader': 'Gilles Peterson Worldwide',
59 'uploader_id': 'gillespeterson',
60 'thumbnail': 're:https?://.*',
61 'view_count': int,
62 'timestamp': 1422987057,
63 'upload_date': '20150203',
64 },
65 }, {
66 'url': 'https://beta.mixcloud.com/RedLightRadio/nosedrip-15-red-light-radio-01-18-2016/',
67 'only_matching': True,
68 }]
69 _DECRYPTION_KEY = 'IFYOUWANTTHEARTISTSTOGETPAIDDONOTDOWNLOADFROMMIXCLOUD'
70
71 @staticmethod
72 def _decrypt_xor_cipher(key, ciphertext):
73 """Encrypt/Decrypt XOR cipher. Both ways are possible because it's XOR."""
74 return ''.join([
75 compat_chr(compat_ord(ch) ^ compat_ord(k))
76 for ch, k in zip(ciphertext, itertools.cycle(key))])
77
78 def _real_extract(self, url):
79 username, slug = self._match_valid_url(url).groups()
80 username, slug = compat_urllib_parse_unquote(username), compat_urllib_parse_unquote(slug)
81 track_id = '%s_%s' % (username, slug)
82
83 cloudcast = self._call_api('cloudcast', '''audioLength
84 comments(first: 100) {
85 edges {
86 node {
87 comment
88 created
89 user {
90 displayName
91 username
92 }
93 }
94 }
95 totalCount
96 }
97 description
98 favorites {
99 totalCount
100 }
101 featuringArtistList
102 isExclusive
103 name
104 owner {
105 displayName
106 url
107 username
108 }
109 picture(width: 1024, height: 1024) {
110 url
111 }
112 plays
113 publishDate
114 reposts {
115 totalCount
116 }
117 streamInfo {
118 dashUrl
119 hlsUrl
120 url
121 }
122 tags {
123 tag {
124 name
125 }
126 }
127 restrictedReason
128 id''', track_id, username, slug)
129
130 if not cloudcast:
131 raise ExtractorError('Track not found', expected=True)
132
133 reason = cloudcast.get('restrictedReason')
134 if reason == 'tracklist':
135 raise ExtractorError('Track unavailable in your country due to licensing restrictions', expected=True)
136 elif reason == 'repeat_play':
137 raise ExtractorError('You have reached your play limit for this track', expected=True)
138 elif reason:
139 raise ExtractorError('Track is restricted', expected=True)
140
141 title = cloudcast['name']
142
143 stream_info = cloudcast['streamInfo']
144 formats = []
145
146 for url_key in ('url', 'hlsUrl', 'dashUrl'):
147 format_url = stream_info.get(url_key)
148 if not format_url:
149 continue
150 decrypted = self._decrypt_xor_cipher(
151 self._DECRYPTION_KEY, compat_b64decode(format_url))
152 if url_key == 'hlsUrl':
153 formats.extend(self._extract_m3u8_formats(
154 decrypted, track_id, 'mp4', entry_protocol='m3u8_native',
155 m3u8_id='hls', fatal=False))
156 elif url_key == 'dashUrl':
157 formats.extend(self._extract_mpd_formats(
158 decrypted, track_id, mpd_id='dash', fatal=False))
159 else:
160 formats.append({
161 'format_id': 'http',
162 'url': decrypted,
163 'downloader_options': {
164 # Mixcloud starts throttling at >~5M
165 'http_chunk_size': 5242880,
166 },
167 })
168
169 if not formats and cloudcast.get('isExclusive'):
170 self.raise_login_required(metadata_available=True)
171
172 self._sort_formats(formats)
173
174 comments = []
175 for edge in (try_get(cloudcast, lambda x: x['comments']['edges']) or []):
176 node = edge.get('node') or {}
177 text = strip_or_none(node.get('comment'))
178 if not text:
179 continue
180 user = node.get('user') or {}
181 comments.append({
182 'author': user.get('displayName'),
183 'author_id': user.get('username'),
184 'text': text,
185 'timestamp': parse_iso8601(node.get('created')),
186 })
187
188 tags = []
189 for t in cloudcast.get('tags'):
190 tag = try_get(t, lambda x: x['tag']['name'], compat_str)
191 if not tag:
192 tags.append(tag)
193
194 get_count = lambda x: int_or_none(try_get(cloudcast, lambda y: y[x]['totalCount']))
195
196 owner = cloudcast.get('owner') or {}
197
198 return {
199 'id': track_id,
200 'title': title,
201 'formats': formats,
202 'description': cloudcast.get('description'),
203 'thumbnail': try_get(cloudcast, lambda x: x['picture']['url'], compat_str),
204 'uploader': owner.get('displayName'),
205 'timestamp': parse_iso8601(cloudcast.get('publishDate')),
206 'uploader_id': owner.get('username'),
207 'uploader_url': owner.get('url'),
208 'duration': int_or_none(cloudcast.get('audioLength')),
209 'view_count': int_or_none(cloudcast.get('plays')),
210 'like_count': get_count('favorites'),
211 'repost_count': get_count('reposts'),
212 'comment_count': get_count('comments'),
213 'comments': comments,
214 'tags': tags,
215 'artist': ', '.join(cloudcast.get('featuringArtistList') or []) or None,
216 }
217
218
219 class MixcloudPlaylistBaseIE(MixcloudBaseIE):
220 def _get_cloudcast(self, node):
221 return node
222
223 def _get_playlist_title(self, title, slug):
224 return title
225
226 def _real_extract(self, url):
227 username, slug = self._match_valid_url(url).groups()
228 username = compat_urllib_parse_unquote(username)
229 if not slug:
230 slug = 'uploads'
231 else:
232 slug = compat_urllib_parse_unquote(slug)
233 playlist_id = '%s_%s' % (username, slug)
234
235 is_playlist_type = self._ROOT_TYPE == 'playlist'
236 playlist_type = 'items' if is_playlist_type else slug
237 list_filter = ''
238
239 has_next_page = True
240 entries = []
241 while has_next_page:
242 playlist = self._call_api(
243 self._ROOT_TYPE, '''%s
244 %s
245 %s(first: 100%s) {
246 edges {
247 node {
248 %s
249 }
250 }
251 pageInfo {
252 endCursor
253 hasNextPage
254 }
255 }''' % (self._TITLE_KEY, self._DESCRIPTION_KEY, playlist_type, list_filter, self._NODE_TEMPLATE),
256 playlist_id, username, slug if is_playlist_type else None)
257
258 items = playlist.get(playlist_type) or {}
259 for edge in items.get('edges', []):
260 cloudcast = self._get_cloudcast(edge.get('node') or {})
261 cloudcast_url = cloudcast.get('url')
262 if not cloudcast_url:
263 continue
264 slug = try_get(cloudcast, lambda x: x['slug'], compat_str)
265 owner_username = try_get(cloudcast, lambda x: x['owner']['username'], compat_str)
266 video_id = '%s_%s' % (owner_username, slug) if slug and owner_username else None
267 entries.append(self.url_result(
268 cloudcast_url, MixcloudIE.ie_key(), video_id))
269
270 page_info = items['pageInfo']
271 has_next_page = page_info['hasNextPage']
272 list_filter = ', after: "%s"' % page_info['endCursor']
273
274 return self.playlist_result(
275 entries, playlist_id,
276 self._get_playlist_title(playlist[self._TITLE_KEY], slug),
277 playlist.get(self._DESCRIPTION_KEY))
278
279
280 class MixcloudUserIE(MixcloudPlaylistBaseIE):
281 _VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<id>[^/]+)/(?P<type>uploads|favorites|listens|stream)?/?$'
282 IE_NAME = 'mixcloud:user'
283
284 _TESTS = [{
285 'url': 'http://www.mixcloud.com/dholbach/',
286 'info_dict': {
287 'id': 'dholbach_uploads',
288 'title': 'Daniel Holbach (uploads)',
289 'description': 'md5:b60d776f0bab534c5dabe0a34e47a789',
290 },
291 'playlist_mincount': 36,
292 }, {
293 'url': 'http://www.mixcloud.com/dholbach/uploads/',
294 'info_dict': {
295 'id': 'dholbach_uploads',
296 'title': 'Daniel Holbach (uploads)',
297 'description': 'md5:b60d776f0bab534c5dabe0a34e47a789',
298 },
299 'playlist_mincount': 36,
300 }, {
301 'url': 'http://www.mixcloud.com/dholbach/favorites/',
302 'info_dict': {
303 'id': 'dholbach_favorites',
304 'title': 'Daniel Holbach (favorites)',
305 'description': 'md5:b60d776f0bab534c5dabe0a34e47a789',
306 },
307 # 'params': {
308 # 'playlist_items': '1-100',
309 # },
310 'playlist_mincount': 396,
311 }, {
312 'url': 'http://www.mixcloud.com/dholbach/listens/',
313 'info_dict': {
314 'id': 'dholbach_listens',
315 'title': 'Daniel Holbach (listens)',
316 'description': 'md5:b60d776f0bab534c5dabe0a34e47a789',
317 },
318 # 'params': {
319 # 'playlist_items': '1-100',
320 # },
321 'playlist_mincount': 1623,
322 'skip': 'Large list',
323 }, {
324 'url': 'https://www.mixcloud.com/FirstEar/stream/',
325 'info_dict': {
326 'id': 'FirstEar_stream',
327 'title': 'First Ear (stream)',
328 'description': 'Curators of good music\r\n\r\nfirstearmusic.com',
329 },
330 'playlist_mincount': 271,
331 }]
332
333 _TITLE_KEY = 'displayName'
334 _DESCRIPTION_KEY = 'biog'
335 _ROOT_TYPE = 'user'
336 _NODE_TEMPLATE = '''slug
337 url
338 owner { username }'''
339
340 def _get_playlist_title(self, title, slug):
341 return '%s (%s)' % (title, slug)
342
343
344 class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
345 _VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/playlists/(?P<playlist>[^/]+)/?$'
346 IE_NAME = 'mixcloud:playlist'
347
348 _TESTS = [{
349 'url': 'https://www.mixcloud.com/maxvibes/playlists/jazzcat-on-ness-radio/',
350 'info_dict': {
351 'id': 'maxvibes_jazzcat-on-ness-radio',
352 'title': 'Ness Radio sessions',
353 },
354 'playlist_mincount': 59,
355 }]
356 _TITLE_KEY = 'name'
357 _DESCRIPTION_KEY = 'description'
358 _ROOT_TYPE = 'playlist'
359 _NODE_TEMPLATE = '''cloudcast {
360 slug
361 url
362 owner { username }
363 }'''
364
365 def _get_cloudcast(self, node):
366 return node.get('cloudcast') or {}