]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/audius.py
[LnkIE] Add extractor (#2408)
[yt-dlp.git] / yt_dlp / extractor / audius.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import random
5
6 from .common import InfoExtractor
7 from ..utils import ExtractorError, try_get, compat_str, str_or_none
8 from ..compat import compat_urllib_parse_unquote
9
10
11 class AudiusBaseIE(InfoExtractor):
12 _API_BASE = None
13 _API_V = '/v1'
14
15 def _get_response_data(self, response):
16 if isinstance(response, dict):
17 response_data = response.get('data')
18 if response_data is not None:
19 return response_data
20 if len(response) == 1 and 'message' in response:
21 raise ExtractorError('API error: %s' % response['message'],
22 expected=True)
23 raise ExtractorError('Unexpected API response')
24
25 def _select_api_base(self):
26 """Selecting one of the currently available API hosts"""
27 response = super(AudiusBaseIE, self)._download_json(
28 'https://api.audius.co/', None,
29 note='Requesting available API hosts',
30 errnote='Unable to request available API hosts')
31 hosts = self._get_response_data(response)
32 if isinstance(hosts, list):
33 self._API_BASE = random.choice(hosts)
34 return
35 raise ExtractorError('Unable to get available API hosts')
36
37 @staticmethod
38 def _prepare_url(url, title):
39 """
40 Audius removes forward slashes from the uri, but leaves backslashes.
41 The problem is that the current version of Chrome replaces backslashes
42 in the address bar with a forward slashes, so if you copy the link from
43 there and paste it into youtube-dl, you won't be able to download
44 anything from this link, since the Audius API won't be able to resolve
45 this url
46 """
47 url = compat_urllib_parse_unquote(url)
48 title = compat_urllib_parse_unquote(title)
49 if '/' in title or '%2F' in title:
50 fixed_title = title.replace('/', '%5C').replace('%2F', '%5C')
51 return url.replace(title, fixed_title)
52 return url
53
54 def _api_request(self, path, item_id=None, note='Downloading JSON metadata',
55 errnote='Unable to download JSON metadata',
56 expected_status=None):
57 if self._API_BASE is None:
58 self._select_api_base()
59 try:
60 response = super(AudiusBaseIE, self)._download_json(
61 '%s%s%s' % (self._API_BASE, self._API_V, path), item_id, note=note,
62 errnote=errnote, expected_status=expected_status)
63 except ExtractorError as exc:
64 # some of Audius API hosts may not work as expected and return HTML
65 if 'Failed to parse JSON' in compat_str(exc):
66 raise ExtractorError('An error occurred while receiving data. Try again',
67 expected=True)
68 raise exc
69 return self._get_response_data(response)
70
71 def _resolve_url(self, url, item_id):
72 return self._api_request('/resolve?url=%s' % url, item_id,
73 expected_status=404)
74
75
76 class AudiusIE(AudiusBaseIE):
77 _VALID_URL = r'''(?x)https?://(?:www\.)?(?:audius\.co/(?P<uploader>[\w\d-]+)(?!/album|/playlist)/(?P<title>\S+))'''
78 IE_DESC = 'Audius.co'
79 _TESTS = [
80 {
81 # URL from Chrome address bar which replace backslash to forward slash
82 'url': 'https://audius.co/test_acc/t%D0%B5%D0%B5%D0%B5est-1.%5E_%7B%7D/%22%3C%3E.%E2%84%96~%60-198631',
83 'md5': '92c35d3e754d5a0f17eef396b0d33582',
84 'info_dict': {
85 'id': 'xd8gY',
86 'title': '''Tеееest/ 1.!@#$%^&*()_+=[]{};'\\\":<>,.?/№~`''',
87 'ext': 'mp3',
88 'description': 'Description',
89 'duration': 30,
90 'track': '''Tеееest/ 1.!@#$%^&*()_+=[]{};'\\\":<>,.?/№~`''',
91 'artist': 'test',
92 'genre': 'Electronic',
93 'thumbnail': r're:https?://.*\.jpg',
94 'view_count': int,
95 'like_count': int,
96 'repost_count': int,
97 }
98 },
99 {
100 # Regular track
101 'url': 'https://audius.co/voltra/radar-103692',
102 'md5': '491898a0a8de39f20c5d6a8a80ab5132',
103 'info_dict': {
104 'id': 'KKdy2',
105 'title': 'RADAR',
106 'ext': 'mp3',
107 'duration': 318,
108 'track': 'RADAR',
109 'artist': 'voltra',
110 'genre': 'Trance',
111 'thumbnail': r're:https?://.*\.jpg',
112 'view_count': int,
113 'like_count': int,
114 'repost_count': int,
115 }
116 },
117 ]
118
119 _ARTWORK_MAP = {
120 "150x150": 150,
121 "480x480": 480,
122 "1000x1000": 1000
123 }
124
125 def _real_extract(self, url):
126 mobj = self._match_valid_url(url)
127 track_id = try_get(mobj, lambda x: x.group('track_id'))
128 if track_id is None:
129 title = mobj.group('title')
130 # uploader = mobj.group('uploader')
131 url = self._prepare_url(url, title)
132 track_data = self._resolve_url(url, title)
133 else: # API link
134 title = None
135 # uploader = None
136 track_data = self._api_request('/tracks/%s' % track_id, track_id)
137
138 if not isinstance(track_data, dict):
139 raise ExtractorError('Unexpected API response')
140
141 track_id = track_data.get('id')
142 if track_id is None:
143 raise ExtractorError('Unable to get ID of the track')
144
145 artworks_data = track_data.get('artwork')
146 thumbnails = []
147 if isinstance(artworks_data, dict):
148 for quality_key, thumbnail_url in artworks_data.items():
149 thumbnail = {
150 "url": thumbnail_url
151 }
152 quality_code = self._ARTWORK_MAP.get(quality_key)
153 if quality_code is not None:
154 thumbnail['preference'] = quality_code
155 thumbnails.append(thumbnail)
156
157 return {
158 'id': track_id,
159 'title': track_data.get('title', title),
160 'url': '%s/v1/tracks/%s/stream' % (self._API_BASE, track_id),
161 'ext': 'mp3',
162 'description': track_data.get('description'),
163 'duration': track_data.get('duration'),
164 'track': track_data.get('title'),
165 'artist': try_get(track_data, lambda x: x['user']['name'], compat_str),
166 'genre': track_data.get('genre'),
167 'thumbnails': thumbnails,
168 'view_count': track_data.get('play_count'),
169 'like_count': track_data.get('favorite_count'),
170 'repost_count': track_data.get('repost_count'),
171 }
172
173
174 class AudiusTrackIE(AudiusIE):
175 _VALID_URL = r'''(?x)(?:audius:)(?:https?://(?:www\.)?.+/v1/tracks/)?(?P<track_id>\w+)'''
176 IE_NAME = 'audius:track'
177 IE_DESC = 'Audius track ID or API link. Prepend with "audius:"'
178 _TESTS = [
179 {
180 'url': 'audius:9RWlo',
181 'only_matching': True
182 },
183 {
184 'url': 'audius:http://discoveryprovider.audius.prod-us-west-2.staked.cloud/v1/tracks/9RWlo',
185 'only_matching': True
186 },
187 ]
188
189
190 class AudiusPlaylistIE(AudiusBaseIE):
191 _VALID_URL = r'https?://(?:www\.)?audius\.co/(?P<uploader>[\w\d-]+)/(?:album|playlist)/(?P<title>\S+)'
192 IE_NAME = 'audius:playlist'
193 IE_DESC = 'Audius.co playlists'
194 _TEST = {
195 'url': 'https://audius.co/test_acc/playlist/test-playlist-22910',
196 'info_dict': {
197 'id': 'DNvjN',
198 'title': 'test playlist',
199 'description': 'Test description\n\nlol',
200 },
201 'playlist_count': 175,
202 }
203
204 def _build_playlist(self, tracks):
205 entries = []
206 for track in tracks:
207 if not isinstance(track, dict):
208 raise ExtractorError('Unexpected API response')
209 track_id = str_or_none(track.get('id'))
210 if not track_id:
211 raise ExtractorError('Unable to get track ID from playlist')
212 entries.append(self.url_result(
213 'audius:%s' % track_id,
214 ie=AudiusTrackIE.ie_key(), video_id=track_id))
215 return entries
216
217 def _real_extract(self, url):
218 self._select_api_base()
219 mobj = self._match_valid_url(url)
220 title = mobj.group('title')
221 # uploader = mobj.group('uploader')
222 url = self._prepare_url(url, title)
223 playlist_response = self._resolve_url(url, title)
224
225 if not isinstance(playlist_response, list) or len(playlist_response) != 1:
226 raise ExtractorError('Unexpected API response')
227
228 playlist_data = playlist_response[0]
229 if not isinstance(playlist_data, dict):
230 raise ExtractorError('Unexpected API response')
231
232 playlist_id = playlist_data.get('id')
233 if playlist_id is None:
234 raise ExtractorError('Unable to get playlist ID')
235
236 playlist_tracks = self._api_request(
237 '/playlists/%s/tracks' % playlist_id,
238 title, note='Downloading playlist tracks metadata',
239 errnote='Unable to download playlist tracks metadata')
240 if not isinstance(playlist_tracks, list):
241 raise ExtractorError('Unexpected API response')
242
243 entries = self._build_playlist(playlist_tracks)
244 return self.playlist_result(entries, playlist_id,
245 playlist_data.get('playlist_name', title),
246 playlist_data.get('description'))
247
248
249 class AudiusProfileIE(AudiusPlaylistIE):
250 IE_NAME = 'audius:artist'
251 IE_DESC = 'Audius.co profile/artist pages'
252 _VALID_URL = r'https?://(?:www)?audius\.co/(?P<id>[^\/]+)/?(?:[?#]|$)'
253 _TEST = {
254 'url': 'https://audius.co/pzl/',
255 'info_dict': {
256 'id': 'ezRo7',
257 'description': 'TAMALE\n\nContact: officialpzl@gmail.com',
258 'title': 'pzl',
259 },
260 'playlist_count': 24,
261 }
262
263 def _real_extract(self, url):
264 self._select_api_base()
265 profile_id = self._match_id(url)
266 try:
267 _profile_data = self._api_request('/full/users/handle/' + profile_id, profile_id)
268 except ExtractorError as e:
269 raise ExtractorError('Could not download profile info; ' + str(e))
270 profile_audius_id = _profile_data[0]['id']
271 profile_bio = _profile_data[0].get('bio')
272
273 api_call = self._api_request('/full/users/handle/%s/tracks' % profile_id, profile_id)
274 return self.playlist_result(self._build_playlist(api_call), profile_audius_id, profile_id, profile_bio)