]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/audius.py
[youtube] Sort audio-only formats correctly
[yt-dlp.git] / yt_dlp / extractor / audius.py
CommitLineData
caa15a7b 1# coding: utf-8
2from __future__ import unicode_literals
3
4import random
5import re
6
7from .common import InfoExtractor
8from ..utils import ExtractorError, try_get, compat_str, str_or_none
9from ..compat import compat_urllib_parse_unquote
10
11
12class AudiusBaseIE(InfoExtractor):
13 _API_BASE = None
14 _API_V = '/v1'
15
16 def _get_response_data(self, response):
17 if isinstance(response, dict):
18 response_data = response.get('data')
19 if response_data is not None:
20 return response_data
21 if len(response) == 1 and 'message' in response:
22 raise ExtractorError('API error: %s' % response['message'],
23 expected=True)
24 raise ExtractorError('Unexpected API response')
25
26 def _select_api_base(self):
27 """Selecting one of the currently available API hosts"""
28 response = super(AudiusBaseIE, self)._download_json(
29 'https://api.audius.co/', None,
30 note='Requesting available API hosts',
31 errnote='Unable to request available API hosts')
32 hosts = self._get_response_data(response)
33 if isinstance(hosts, list):
34 self._API_BASE = random.choice(hosts)
35 return
36 raise ExtractorError('Unable to get available API hosts')
37
38 @staticmethod
39 def _prepare_url(url, title):
40 """
41 Audius removes forward slashes from the uri, but leaves backslashes.
42 The problem is that the current version of Chrome replaces backslashes
43 in the address bar with a forward slashes, so if you copy the link from
44 there and paste it into youtube-dl, you won't be able to download
45 anything from this link, since the Audius API won't be able to resolve
46 this url
47 """
48 url = compat_urllib_parse_unquote(url)
49 title = compat_urllib_parse_unquote(title)
50 if '/' in title or '%2F' in title:
51 fixed_title = title.replace('/', '%5C').replace('%2F', '%5C')
52 return url.replace(title, fixed_title)
53 return url
54
55 def _api_request(self, path, item_id=None, note='Downloading JSON metadata',
56 errnote='Unable to download JSON metadata',
57 expected_status=None):
58 if self._API_BASE is None:
59 self._select_api_base()
60 try:
61 response = super(AudiusBaseIE, self)._download_json(
62 '%s%s%s' % (self._API_BASE, self._API_V, path), item_id, note=note,
63 errnote=errnote, expected_status=expected_status)
64 except ExtractorError as exc:
65 # some of Audius API hosts may not work as expected and return HTML
66 if 'Failed to parse JSON' in compat_str(exc):
67 raise ExtractorError('An error occurred while receiving data. Try again',
68 expected=True)
69 raise exc
70 return self._get_response_data(response)
71
72 def _resolve_url(self, url, item_id):
73 return self._api_request('/resolve?url=%s' % url, item_id,
74 expected_status=404)
75
76
77class AudiusIE(AudiusBaseIE):
78 _VALID_URL = r'''(?x)https?://(?:www\.)?(?:audius\.co/(?P<uploader>[\w\d-]+)(?!/album|/playlist)/(?P<title>\S+))'''
c55256c5 79 IE_DESC = 'Audius.co'
caa15a7b 80 _TESTS = [
81 {
82 # URL from Chrome address bar which replace backslash to forward slash
83 'url': 'https://audius.co/test_acc/t%D0%B5%D0%B5%D0%B5est-1.%5E_%7B%7D/%22%3C%3E.%E2%84%96~%60-198631',
84 'md5': '92c35d3e754d5a0f17eef396b0d33582',
85 'info_dict': {
86 'id': 'xd8gY',
87 'title': '''Tеееest/ 1.!@#$%^&*()_+=[]{};'\\\":<>,.?/№~`''',
88 'ext': 'mp3',
89 'description': 'Description',
90 'duration': 30,
91 'track': '''Tеееest/ 1.!@#$%^&*()_+=[]{};'\\\":<>,.?/№~`''',
92 'artist': 'test',
93 'genre': 'Electronic',
94 'thumbnail': r're:https?://.*\.jpg',
95 'view_count': int,
96 'like_count': int,
97 'repost_count': int,
98 }
99 },
100 {
101 # Regular track
102 'url': 'https://audius.co/voltra/radar-103692',
103 'md5': '491898a0a8de39f20c5d6a8a80ab5132',
104 'info_dict': {
105 'id': 'KKdy2',
106 'title': 'RADAR',
107 'ext': 'mp3',
108 'duration': 318,
109 'track': 'RADAR',
110 'artist': 'voltra',
111 'genre': 'Trance',
112 'thumbnail': r're:https?://.*\.jpg',
113 'view_count': int,
114 'like_count': int,
115 'repost_count': int,
116 }
117 },
118 ]
119
120 _ARTWORK_MAP = {
121 "150x150": 150,
122 "480x480": 480,
123 "1000x1000": 1000
124 }
125
126 def _real_extract(self, url):
127 mobj = re.match(self._VALID_URL, url)
c55256c5 128 track_id = try_get(mobj, lambda x: x.group('track_id'))
caa15a7b 129 if track_id is None:
c55256c5 130 title = mobj.group('title')
131 # uploader = mobj.group('uploader')
caa15a7b 132 url = self._prepare_url(url, title)
133 track_data = self._resolve_url(url, title)
134 else: # API link
c55256c5 135 title = None
136 # uploader = None
caa15a7b 137 track_data = self._api_request('/tracks/%s' % track_id, track_id)
138
139 if not isinstance(track_data, dict):
140 raise ExtractorError('Unexpected API response')
141
142 track_id = track_data.get('id')
143 if track_id is None:
144 raise ExtractorError('Unable to get ID of the track')
145
146 artworks_data = track_data.get('artwork')
147 thumbnails = []
148 if isinstance(artworks_data, dict):
149 for quality_key, thumbnail_url in artworks_data.items():
150 thumbnail = {
151 "url": thumbnail_url
152 }
153 quality_code = self._ARTWORK_MAP.get(quality_key)
154 if quality_code is not None:
155 thumbnail['preference'] = quality_code
156 thumbnails.append(thumbnail)
157
158 return {
159 'id': track_id,
160 'title': track_data.get('title', title),
161 'url': '%s/v1/tracks/%s/stream' % (self._API_BASE, track_id),
162 'ext': 'mp3',
163 'description': track_data.get('description'),
164 'duration': track_data.get('duration'),
165 'track': track_data.get('title'),
166 'artist': try_get(track_data, lambda x: x['user']['name'], compat_str),
167 'genre': track_data.get('genre'),
168 'thumbnails': thumbnails,
169 'view_count': track_data.get('play_count'),
170 'like_count': track_data.get('favorite_count'),
171 'repost_count': track_data.get('repost_count'),
172 }
173
174
c55256c5 175class AudiusTrackIE(AudiusIE):
176 _VALID_URL = r'''(?x)(?:audius:)(?:https?://(?:www\.)?.+/v1/tracks/)?(?P<track_id>\w+)'''
177 IE_NAME = 'audius:track'
178 IE_DESC = 'Audius track ID or API link. Prepend with "audius:"'
179 _TESTS = [
180 {
181 'url': 'audius:9RWlo',
182 'only_matching': True
183 },
184 {
185 'url': 'audius:http://discoveryprovider.audius.prod-us-west-2.staked.cloud/v1/tracks/9RWlo',
186 'only_matching': True
187 },
188 ]
189
190
caa15a7b 191class AudiusPlaylistIE(AudiusBaseIE):
192 _VALID_URL = r'https?://(?:www\.)?audius\.co/(?P<uploader>[\w\d-]+)/(?:album|playlist)/(?P<title>\S+)'
193 IE_NAME = 'audius:playlist'
c55256c5 194 IE_DESC = 'Audius.co playlists'
caa15a7b 195 _TEST = {
196 'url': 'https://audius.co/test_acc/playlist/test-playlist-22910',
197 'info_dict': {
198 'id': 'DNvjN',
199 'title': 'test playlist',
200 'description': 'Test description\n\nlol',
201 },
202 'playlist_count': 175,
203 }
204
205 def _build_playlist(self, tracks):
206 entries = []
207 for track in tracks:
208 if not isinstance(track, dict):
209 raise ExtractorError('Unexpected API response')
210 track_id = str_or_none(track.get('id'))
211 if not track_id:
212 raise ExtractorError('Unable to get track ID from playlist')
213 entries.append(self.url_result(
c55256c5 214 'audius:%s' % track_id,
215 ie=AudiusTrackIE.ie_key(), video_id=track_id))
caa15a7b 216 return entries
217
218 def _real_extract(self, url):
219 self._select_api_base()
220 mobj = re.match(self._VALID_URL, url)
c55256c5 221 title = mobj.group('title')
222 # uploader = mobj.group('uploader')
caa15a7b 223 url = self._prepare_url(url, title)
224 playlist_response = self._resolve_url(url, title)
225
226 if not isinstance(playlist_response, list) or len(playlist_response) != 1:
227 raise ExtractorError('Unexpected API response')
228
229 playlist_data = playlist_response[0]
230 if not isinstance(playlist_data, dict):
231 raise ExtractorError('Unexpected API response')
232
233 playlist_id = playlist_data.get('id')
234 if playlist_id is None:
235 raise ExtractorError('Unable to get playlist ID')
236
237 playlist_tracks = self._api_request(
238 '/playlists/%s/tracks' % playlist_id,
239 title, note='Downloading playlist tracks metadata',
240 errnote='Unable to download playlist tracks metadata')
241 if not isinstance(playlist_tracks, list):
242 raise ExtractorError('Unexpected API response')
243
244 entries = self._build_playlist(playlist_tracks)
245 return self.playlist_result(entries, playlist_id,
246 playlist_data.get('playlist_name', title),
247 playlist_data.get('description'))