yt_dlp/extractor/audius.py

   1 import random
   2 import urllib.parse
   3
   4 from .common import InfoExtractor
   5 from ..utils import ExtractorError, str_or_none, try_get
   6
   7
   8 class AudiusBaseIE(InfoExtractor):
   9     _API_BASE = None
  10     _API_V = '/v1'
  11
  12     def _get_response_data(self, response):
  13         if isinstance(response, dict):
  14             response_data = response.get('data')
  15             if response_data is not None:
  16                 return response_data
  17             if len(response) == 1 and 'message' in response:
  18                 raise ExtractorError('API error: {}'.format(response['message']),
  19                                      expected=True)
  20         raise ExtractorError('Unexpected API response')
  21
  22     def _select_api_base(self):
  23         """Selecting one of the currently available API hosts"""
  24         response = super()._download_json(
  25             'https://api.audius.co/', None,
  26             note='Requesting available API hosts',
  27             errnote='Unable to request available API hosts')
  28         hosts = self._get_response_data(response)
  29         if isinstance(hosts, list):
  30             self._API_BASE = random.choice(hosts)
  31             return
  32         raise ExtractorError('Unable to get available API hosts')
  33
  34     @staticmethod
  35     def _prepare_url(url, title):
  36         """
  37         Audius removes forward slashes from the uri, but leaves backslashes.
  38         The problem is that the current version of Chrome replaces backslashes
  39         in the address bar with a forward slashes, so if you copy the link from
  40         there and paste it into youtube-dl, you won't be able to download
  41         anything from this link, since the Audius API won't be able to resolve
  42         this url
  43         """
  44         url = urllib.parse.unquote(url)
  45         title = urllib.parse.unquote(title)
  46         if '/' in title or '%2F' in title:
  47             fixed_title = title.replace('/', '%5C').replace('%2F', '%5C')
  48             return url.replace(title, fixed_title)
  49         return url
  50
  51     def _api_request(self, path, item_id=None, note='Downloading JSON metadata',
  52                      errnote='Unable to download JSON metadata',
  53                      expected_status=None):
  54         if self._API_BASE is None:
  55             self._select_api_base()
  56         try:
  57             response = super()._download_json(
  58                 f'{self._API_BASE}{self._API_V}{path}', item_id, note=note,
  59                 errnote=errnote, expected_status=expected_status)
  60         except ExtractorError as exc:
  61             # some of Audius API hosts may not work as expected and return HTML
  62             if 'Failed to parse JSON' in str(exc):
  63                 raise ExtractorError('An error occurred while receiving data. Try again',
  64                                      expected=True)
  65             raise exc
  66         return self._get_response_data(response)
  67
  68     def _resolve_url(self, url, item_id):
  69         return self._api_request(f'/resolve?url={url}', item_id,
  70                                  expected_status=404)
  71
  72
  73 class AudiusIE(AudiusBaseIE):
  74     _VALID_URL = r'''(?x)https?://(?:www\.)?(?:audius\.co/(?P<uploader>[\w\d-]+)(?!/album|/playlist)/(?P<title>\S+))'''
  75     IE_DESC = 'Audius.co'
  76     _TESTS = [
  77         {
  78             # URL from Chrome address bar which replace backslash to forward slash
  79             'url': 'https://audius.co/test_acc/t%D0%B5%D0%B5%D0%B5est-1.%5E_%7B%7D/%22%3C%3E.%E2%84%96~%60-198631',
  80             'md5': '92c35d3e754d5a0f17eef396b0d33582',
  81             'info_dict': {
  82                 'id': 'xd8gY',
  83                 'title': '''Tеееest/ 1.!@#$%^&*()_+=[]{};'\\\":<>,.?/№~`''',
  84                 'ext': 'mp3',
  85                 'description': 'Description',
  86                 'duration': 30,
  87                 'track': '''Tеееest/ 1.!@#$%^&*()_+=[]{};'\\\":<>,.?/№~`''',
  88                 'artist': 'test',
  89                 'genre': 'Electronic',
  90                 'thumbnail': r're:https?://.*\.jpg',
  91                 'view_count': int,
  92                 'like_count': int,
  93                 'repost_count': int,
  94             },
  95         },
  96         {
  97             # Regular track
  98             'url': 'https://audius.co/voltra/radar-103692',
  99             'md5': '491898a0a8de39f20c5d6a8a80ab5132',
 100             'info_dict': {
 101                 'id': 'KKdy2',
 102                 'title': 'RADAR',
 103                 'ext': 'mp3',
 104                 'duration': 318,
 105                 'track': 'RADAR',
 106                 'artist': 'voltra',
 107                 'genre': 'Trance',
 108                 'thumbnail': r're:https?://.*\.jpg',
 109                 'view_count': int,
 110                 'like_count': int,
 111                 'repost_count': int,
 112             },
 113         },
 114     ]
 115
 116     _ARTWORK_MAP = {
 117         '150x150': 150,
 118         '480x480': 480,
 119         '1000x1000': 1000,
 120     }
 121
 122     def _real_extract(self, url):
 123         mobj = self._match_valid_url(url)
 124         track_id = try_get(mobj, lambda x: x.group('track_id'))
 125         if track_id is None:
 126             title = mobj.group('title')
 127             # uploader = mobj.group('uploader')
 128             url = self._prepare_url(url, title)
 129             track_data = self._resolve_url(url, title)
 130         else:  # API link
 131             title = None
 132             # uploader = None
 133             track_data = self._api_request(f'/tracks/{track_id}', track_id)
 134
 135         if not isinstance(track_data, dict):
 136             raise ExtractorError('Unexpected API response')
 137
 138         track_id = track_data.get('id')
 139         if track_id is None:
 140             raise ExtractorError('Unable to get ID of the track')
 141
 142         artworks_data = track_data.get('artwork')
 143         thumbnails = []
 144         if isinstance(artworks_data, dict):
 145             for quality_key, thumbnail_url in artworks_data.items():
 146                 thumbnail = {
 147                     'url': thumbnail_url,
 148                 }
 149                 quality_code = self._ARTWORK_MAP.get(quality_key)
 150                 if quality_code is not None:
 151                     thumbnail['preference'] = quality_code
 152                 thumbnails.append(thumbnail)
 153
 154         return {
 155             'id': track_id,
 156             'title': track_data.get('title', title),
 157             'url': f'{self._API_BASE}/v1/tracks/{track_id}/stream',
 158             'ext': 'mp3',
 159             'description': track_data.get('description'),
 160             'duration': track_data.get('duration'),
 161             'track': track_data.get('title'),
 162             'artist': try_get(track_data, lambda x: x['user']['name'], str),
 163             'genre': track_data.get('genre'),
 164             'thumbnails': thumbnails,
 165             'view_count': track_data.get('play_count'),
 166             'like_count': track_data.get('favorite_count'),
 167             'repost_count': track_data.get('repost_count'),
 168         }
 169
 170
 171 class AudiusTrackIE(AudiusIE):  # XXX: Do not subclass from concrete IE
 172     _VALID_URL = r'''(?x)(?:audius:)(?:https?://(?:www\.)?.+/v1/tracks/)?(?P<track_id>\w+)'''
 173     IE_NAME = 'audius:track'
 174     IE_DESC = 'Audius track ID or API link. Prepend with "audius:"'
 175     _TESTS = [
 176         {
 177             'url': 'audius:9RWlo',
 178             'only_matching': True,
 179         },
 180         {
 181             'url': 'audius:http://discoveryprovider.audius.prod-us-west-2.staked.cloud/v1/tracks/9RWlo',
 182             'only_matching': True,
 183         },
 184     ]
 185
 186
 187 class AudiusPlaylistIE(AudiusBaseIE):
 188     _VALID_URL = r'https?://(?:www\.)?audius\.co/(?P<uploader>[\w\d-]+)/(?:album|playlist)/(?P<title>\S+)'
 189     IE_NAME = 'audius:playlist'
 190     IE_DESC = 'Audius.co playlists'
 191     _TEST = {
 192         'url': 'https://audius.co/test_acc/playlist/test-playlist-22910',
 193         'info_dict': {
 194             'id': 'DNvjN',
 195             'title': 'test playlist',
 196             'description': 'Test description\n\nlol',
 197         },
 198         'playlist_count': 175,
 199     }
 200
 201     def _build_playlist(self, tracks):
 202         entries = []
 203         for track in tracks:
 204             if not isinstance(track, dict):
 205                 raise ExtractorError('Unexpected API response')
 206             track_id = str_or_none(track.get('id'))
 207             if not track_id:
 208                 raise ExtractorError('Unable to get track ID from playlist')
 209             entries.append(self.url_result(
 210                 f'audius:{track_id}',
 211                 ie=AudiusTrackIE.ie_key(), video_id=track_id))
 212         return entries
 213
 214     def _real_extract(self, url):
 215         self._select_api_base()
 216         mobj = self._match_valid_url(url)
 217         title = mobj.group('title')
 218         # uploader = mobj.group('uploader')
 219         url = self._prepare_url(url, title)
 220         playlist_response = self._resolve_url(url, title)
 221
 222         if not isinstance(playlist_response, list) or len(playlist_response) != 1:
 223             raise ExtractorError('Unexpected API response')
 224
 225         playlist_data = playlist_response[0]
 226         if not isinstance(playlist_data, dict):
 227             raise ExtractorError('Unexpected API response')
 228
 229         playlist_id = playlist_data.get('id')
 230         if playlist_id is None:
 231             raise ExtractorError('Unable to get playlist ID')
 232
 233         playlist_tracks = self._api_request(
 234             f'/playlists/{playlist_id}/tracks',
 235             title, note='Downloading playlist tracks metadata',
 236             errnote='Unable to download playlist tracks metadata')
 237         if not isinstance(playlist_tracks, list):
 238             raise ExtractorError('Unexpected API response')
 239
 240         entries = self._build_playlist(playlist_tracks)
 241         return self.playlist_result(entries, playlist_id,
 242                                     playlist_data.get('playlist_name', title),
 243                                     playlist_data.get('description'))
 244
 245
 246 class AudiusProfileIE(AudiusPlaylistIE):  # XXX: Do not subclass from concrete IE
 247     IE_NAME = 'audius:artist'
 248     IE_DESC = 'Audius.co profile/artist pages'
 249     _VALID_URL = r'https?://(?:www)?audius\.co/(?P<id>[^\/]+)/?(?:[?#]|$)'
 250     _TEST = {
 251         'url': 'https://audius.co/pzl/',
 252         'info_dict': {
 253             'id': 'ezRo7',
 254             'description': 'TAMALE\n\nContact: officialpzl@gmail.com',
 255             'title': 'pzl',
 256         },
 257         'playlist_count': 24,
 258     }
 259
 260     def _real_extract(self, url):
 261         self._select_api_base()
 262         profile_id = self._match_id(url)
 263         try:
 264             _profile_data = self._api_request('/full/users/handle/' + profile_id, profile_id)
 265         except ExtractorError as e:
 266             raise ExtractorError('Could not download profile info; ' + str(e))
 267         profile_audius_id = _profile_data[0]['id']
 268         profile_bio = _profile_data[0].get('bio')
 269
 270         api_call = self._api_request(f'/full/users/handle/{profile_id}/tracks', profile_id)
 271         return self.playlist_result(self._build_playlist(api_call), profile_audius_id, profile_id, profile_bio)