]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/audius.py
[ie/matchtv] Fix extractor (#10190)
[yt-dlp.git] / yt_dlp / extractor / audius.py
1 import random
2 import urllib.parse
3
4 from .common import InfoExtractor
5 from ..utils import ExtractorError, str_or_none, try_get
6
7
8 class AudiusBaseIE(InfoExtractor):
9 _API_BASE = None
10 _API_V = '/v1'
11
12 def _get_response_data(self, response):
13 if isinstance(response, dict):
14 response_data = response.get('data')
15 if response_data is not None:
16 return response_data
17 if len(response) == 1 and 'message' in response:
18 raise ExtractorError('API error: {}'.format(response['message']),
19 expected=True)
20 raise ExtractorError('Unexpected API response')
21
22 def _select_api_base(self):
23 """Selecting one of the currently available API hosts"""
24 response = super()._download_json(
25 'https://api.audius.co/', None,
26 note='Requesting available API hosts',
27 errnote='Unable to request available API hosts')
28 hosts = self._get_response_data(response)
29 if isinstance(hosts, list):
30 self._API_BASE = random.choice(hosts)
31 return
32 raise ExtractorError('Unable to get available API hosts')
33
34 @staticmethod
35 def _prepare_url(url, title):
36 """
37 Audius removes forward slashes from the uri, but leaves backslashes.
38 The problem is that the current version of Chrome replaces backslashes
39 in the address bar with a forward slashes, so if you copy the link from
40 there and paste it into youtube-dl, you won't be able to download
41 anything from this link, since the Audius API won't be able to resolve
42 this url
43 """
44 url = urllib.parse.unquote(url)
45 title = urllib.parse.unquote(title)
46 if '/' in title or '%2F' in title:
47 fixed_title = title.replace('/', '%5C').replace('%2F', '%5C')
48 return url.replace(title, fixed_title)
49 return url
50
51 def _api_request(self, path, item_id=None, note='Downloading JSON metadata',
52 errnote='Unable to download JSON metadata',
53 expected_status=None):
54 if self._API_BASE is None:
55 self._select_api_base()
56 try:
57 response = super()._download_json(
58 f'{self._API_BASE}{self._API_V}{path}', item_id, note=note,
59 errnote=errnote, expected_status=expected_status)
60 except ExtractorError as exc:
61 # some of Audius API hosts may not work as expected and return HTML
62 if 'Failed to parse JSON' in str(exc):
63 raise ExtractorError('An error occurred while receiving data. Try again',
64 expected=True)
65 raise exc
66 return self._get_response_data(response)
67
68 def _resolve_url(self, url, item_id):
69 return self._api_request(f'/resolve?url={url}', item_id,
70 expected_status=404)
71
72
73 class AudiusIE(AudiusBaseIE):
74 _VALID_URL = r'''(?x)https?://(?:www\.)?(?:audius\.co/(?P<uploader>[\w\d-]+)(?!/album|/playlist)/(?P<title>\S+))'''
75 IE_DESC = 'Audius.co'
76 _TESTS = [
77 {
78 # URL from Chrome address bar which replace backslash to forward slash
79 'url': 'https://audius.co/test_acc/t%D0%B5%D0%B5%D0%B5est-1.%5E_%7B%7D/%22%3C%3E.%E2%84%96~%60-198631',
80 'md5': '92c35d3e754d5a0f17eef396b0d33582',
81 'info_dict': {
82 'id': 'xd8gY',
83 'title': '''Tеееest/ 1.!@#$%^&*()_+=[]{};'\\\":<>,.?/№~`''',
84 'ext': 'mp3',
85 'description': 'Description',
86 'duration': 30,
87 'track': '''Tеееest/ 1.!@#$%^&*()_+=[]{};'\\\":<>,.?/№~`''',
88 'artist': 'test',
89 'genre': 'Electronic',
90 'thumbnail': r're:https?://.*\.jpg',
91 'view_count': int,
92 'like_count': int,
93 'repost_count': int,
94 },
95 },
96 {
97 # Regular track
98 'url': 'https://audius.co/voltra/radar-103692',
99 'md5': '491898a0a8de39f20c5d6a8a80ab5132',
100 'info_dict': {
101 'id': 'KKdy2',
102 'title': 'RADAR',
103 'ext': 'mp3',
104 'duration': 318,
105 'track': 'RADAR',
106 'artist': 'voltra',
107 'genre': 'Trance',
108 'thumbnail': r're:https?://.*\.jpg',
109 'view_count': int,
110 'like_count': int,
111 'repost_count': int,
112 },
113 },
114 ]
115
116 _ARTWORK_MAP = {
117 '150x150': 150,
118 '480x480': 480,
119 '1000x1000': 1000,
120 }
121
122 def _real_extract(self, url):
123 mobj = self._match_valid_url(url)
124 track_id = try_get(mobj, lambda x: x.group('track_id'))
125 if track_id is None:
126 title = mobj.group('title')
127 # uploader = mobj.group('uploader')
128 url = self._prepare_url(url, title)
129 track_data = self._resolve_url(url, title)
130 else: # API link
131 title = None
132 # uploader = None
133 track_data = self._api_request(f'/tracks/{track_id}', track_id)
134
135 if not isinstance(track_data, dict):
136 raise ExtractorError('Unexpected API response')
137
138 track_id = track_data.get('id')
139 if track_id is None:
140 raise ExtractorError('Unable to get ID of the track')
141
142 artworks_data = track_data.get('artwork')
143 thumbnails = []
144 if isinstance(artworks_data, dict):
145 for quality_key, thumbnail_url in artworks_data.items():
146 thumbnail = {
147 'url': thumbnail_url,
148 }
149 quality_code = self._ARTWORK_MAP.get(quality_key)
150 if quality_code is not None:
151 thumbnail['preference'] = quality_code
152 thumbnails.append(thumbnail)
153
154 return {
155 'id': track_id,
156 'title': track_data.get('title', title),
157 'url': f'{self._API_BASE}/v1/tracks/{track_id}/stream',
158 'ext': 'mp3',
159 'description': track_data.get('description'),
160 'duration': track_data.get('duration'),
161 'track': track_data.get('title'),
162 'artist': try_get(track_data, lambda x: x['user']['name'], str),
163 'genre': track_data.get('genre'),
164 'thumbnails': thumbnails,
165 'view_count': track_data.get('play_count'),
166 'like_count': track_data.get('favorite_count'),
167 'repost_count': track_data.get('repost_count'),
168 }
169
170
171 class AudiusTrackIE(AudiusIE): # XXX: Do not subclass from concrete IE
172 _VALID_URL = r'''(?x)(?:audius:)(?:https?://(?:www\.)?.+/v1/tracks/)?(?P<track_id>\w+)'''
173 IE_NAME = 'audius:track'
174 IE_DESC = 'Audius track ID or API link. Prepend with "audius:"'
175 _TESTS = [
176 {
177 'url': 'audius:9RWlo',
178 'only_matching': True,
179 },
180 {
181 'url': 'audius:http://discoveryprovider.audius.prod-us-west-2.staked.cloud/v1/tracks/9RWlo',
182 'only_matching': True,
183 },
184 ]
185
186
187 class AudiusPlaylistIE(AudiusBaseIE):
188 _VALID_URL = r'https?://(?:www\.)?audius\.co/(?P<uploader>[\w\d-]+)/(?:album|playlist)/(?P<title>\S+)'
189 IE_NAME = 'audius:playlist'
190 IE_DESC = 'Audius.co playlists'
191 _TEST = {
192 'url': 'https://audius.co/test_acc/playlist/test-playlist-22910',
193 'info_dict': {
194 'id': 'DNvjN',
195 'title': 'test playlist',
196 'description': 'Test description\n\nlol',
197 },
198 'playlist_count': 175,
199 }
200
201 def _build_playlist(self, tracks):
202 entries = []
203 for track in tracks:
204 if not isinstance(track, dict):
205 raise ExtractorError('Unexpected API response')
206 track_id = str_or_none(track.get('id'))
207 if not track_id:
208 raise ExtractorError('Unable to get track ID from playlist')
209 entries.append(self.url_result(
210 f'audius:{track_id}',
211 ie=AudiusTrackIE.ie_key(), video_id=track_id))
212 return entries
213
214 def _real_extract(self, url):
215 self._select_api_base()
216 mobj = self._match_valid_url(url)
217 title = mobj.group('title')
218 # uploader = mobj.group('uploader')
219 url = self._prepare_url(url, title)
220 playlist_response = self._resolve_url(url, title)
221
222 if not isinstance(playlist_response, list) or len(playlist_response) != 1:
223 raise ExtractorError('Unexpected API response')
224
225 playlist_data = playlist_response[0]
226 if not isinstance(playlist_data, dict):
227 raise ExtractorError('Unexpected API response')
228
229 playlist_id = playlist_data.get('id')
230 if playlist_id is None:
231 raise ExtractorError('Unable to get playlist ID')
232
233 playlist_tracks = self._api_request(
234 f'/playlists/{playlist_id}/tracks',
235 title, note='Downloading playlist tracks metadata',
236 errnote='Unable to download playlist tracks metadata')
237 if not isinstance(playlist_tracks, list):
238 raise ExtractorError('Unexpected API response')
239
240 entries = self._build_playlist(playlist_tracks)
241 return self.playlist_result(entries, playlist_id,
242 playlist_data.get('playlist_name', title),
243 playlist_data.get('description'))
244
245
246 class AudiusProfileIE(AudiusPlaylistIE): # XXX: Do not subclass from concrete IE
247 IE_NAME = 'audius:artist'
248 IE_DESC = 'Audius.co profile/artist pages'
249 _VALID_URL = r'https?://(?:www)?audius\.co/(?P<id>[^\/]+)/?(?:[?#]|$)'
250 _TEST = {
251 'url': 'https://audius.co/pzl/',
252 'info_dict': {
253 'id': 'ezRo7',
254 'description': 'TAMALE\n\nContact: officialpzl@gmail.com',
255 'title': 'pzl',
256 },
257 'playlist_count': 24,
258 }
259
260 def _real_extract(self, url):
261 self._select_api_base()
262 profile_id = self._match_id(url)
263 try:
264 _profile_data = self._api_request('/full/users/handle/' + profile_id, profile_id)
265 except ExtractorError as e:
266 raise ExtractorError('Could not download profile info; ' + str(e))
267 profile_audius_id = _profile_data[0]['id']
268 profile_bio = _profile_data[0].get('bio')
269
270 api_call = self._api_request(f'/full/users/handle/{profile_id}/tracks', profile_id)
271 return self.playlist_result(self._build_playlist(api_call), profile_audius_id, profile_id, profile_bio)