yt_dlp/extractor/audius.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import random
   5
   6 from .common import InfoExtractor
   7 from ..utils import ExtractorError, try_get, compat_str, str_or_none
   8 from ..compat import compat_urllib_parse_unquote
   9
  10
  11 class AudiusBaseIE(InfoExtractor):
  12     _API_BASE = None
  13     _API_V = '/v1'
  14
  15     def _get_response_data(self, response):
  16         if isinstance(response, dict):
  17             response_data = response.get('data')
  18             if response_data is not None:
  19                 return response_data
  20             if len(response) == 1 and 'message' in response:
  21                 raise ExtractorError('API error: %s' % response['message'],
  22                                      expected=True)
  23         raise ExtractorError('Unexpected API response')
  24
  25     def _select_api_base(self):
  26         """Selecting one of the currently available API hosts"""
  27         response = super(AudiusBaseIE, self)._download_json(
  28             'https://api.audius.co/', None,
  29             note='Requesting available API hosts',
  30             errnote='Unable to request available API hosts')
  31         hosts = self._get_response_data(response)
  32         if isinstance(hosts, list):
  33             self._API_BASE = random.choice(hosts)
  34             return
  35         raise ExtractorError('Unable to get available API hosts')
  36
  37     @staticmethod
  38     def _prepare_url(url, title):
  39         """
  40         Audius removes forward slashes from the uri, but leaves backslashes.
  41         The problem is that the current version of Chrome replaces backslashes
  42         in the address bar with a forward slashes, so if you copy the link from
  43         there and paste it into youtube-dl, you won't be able to download
  44         anything from this link, since the Audius API won't be able to resolve
  45         this url
  46         """
  47         url = compat_urllib_parse_unquote(url)
  48         title = compat_urllib_parse_unquote(title)
  49         if '/' in title or '%2F' in title:
  50             fixed_title = title.replace('/', '%5C').replace('%2F', '%5C')
  51             return url.replace(title, fixed_title)
  52         return url
  53
  54     def _api_request(self, path, item_id=None, note='Downloading JSON metadata',
  55                      errnote='Unable to download JSON metadata',
  56                      expected_status=None):
  57         if self._API_BASE is None:
  58             self._select_api_base()
  59         try:
  60             response = super(AudiusBaseIE, self)._download_json(
  61                 '%s%s%s' % (self._API_BASE, self._API_V, path), item_id, note=note,
  62                 errnote=errnote, expected_status=expected_status)
  63         except ExtractorError as exc:
  64             # some of Audius API hosts may not work as expected and return HTML
  65             if 'Failed to parse JSON' in compat_str(exc):
  66                 raise ExtractorError('An error occurred while receiving data. Try again',
  67                                      expected=True)
  68             raise exc
  69         return self._get_response_data(response)
  70
  71     def _resolve_url(self, url, item_id):
  72         return self._api_request('/resolve?url=%s' % url, item_id,
  73                                  expected_status=404)
  74
  75
  76 class AudiusIE(AudiusBaseIE):
  77     _VALID_URL = r'''(?x)https?://(?:www\.)?(?:audius\.co/(?P<uploader>[\w\d-]+)(?!/album|/playlist)/(?P<title>\S+))'''
  78     IE_DESC = 'Audius.co'
  79     _TESTS = [
  80         {
  81             # URL from Chrome address bar which replace backslash to forward slash
  82             'url': 'https://audius.co/test_acc/t%D0%B5%D0%B5%D0%B5est-1.%5E_%7B%7D/%22%3C%3E.%E2%84%96~%60-198631',
  83             'md5': '92c35d3e754d5a0f17eef396b0d33582',
  84             'info_dict': {
  85                 'id': 'xd8gY',
  86                 'title': '''Tеееest/ 1.!@#$%^&*()_+=[]{};'\\\":<>,.?/№~`''',
  87                 'ext': 'mp3',
  88                 'description': 'Description',
  89                 'duration': 30,
  90                 'track': '''Tеееest/ 1.!@#$%^&*()_+=[]{};'\\\":<>,.?/№~`''',
  91                 'artist': 'test',
  92                 'genre': 'Electronic',
  93                 'thumbnail': r're:https?://.*\.jpg',
  94                 'view_count': int,
  95                 'like_count': int,
  96                 'repost_count': int,
  97             }
  98         },
  99         {
 100             # Regular track
 101             'url': 'https://audius.co/voltra/radar-103692',
 102             'md5': '491898a0a8de39f20c5d6a8a80ab5132',
 103             'info_dict': {
 104                 'id': 'KKdy2',
 105                 'title': 'RADAR',
 106                 'ext': 'mp3',
 107                 'duration': 318,
 108                 'track': 'RADAR',
 109                 'artist': 'voltra',
 110                 'genre': 'Trance',
 111                 'thumbnail': r're:https?://.*\.jpg',
 112                 'view_count': int,
 113                 'like_count': int,
 114                 'repost_count': int,
 115             }
 116         },
 117     ]
 118
 119     _ARTWORK_MAP = {
 120         "150x150": 150,
 121         "480x480": 480,
 122         "1000x1000": 1000
 123     }
 124
 125     def _real_extract(self, url):
 126         mobj = self._match_valid_url(url)
 127         track_id = try_get(mobj, lambda x: x.group('track_id'))
 128         if track_id is None:
 129             title = mobj.group('title')
 130             # uploader = mobj.group('uploader')
 131             url = self._prepare_url(url, title)
 132             track_data = self._resolve_url(url, title)
 133         else:  # API link
 134             title = None
 135             # uploader = None
 136             track_data = self._api_request('/tracks/%s' % track_id, track_id)
 137
 138         if not isinstance(track_data, dict):
 139             raise ExtractorError('Unexpected API response')
 140
 141         track_id = track_data.get('id')
 142         if track_id is None:
 143             raise ExtractorError('Unable to get ID of the track')
 144
 145         artworks_data = track_data.get('artwork')
 146         thumbnails = []
 147         if isinstance(artworks_data, dict):
 148             for quality_key, thumbnail_url in artworks_data.items():
 149                 thumbnail = {
 150                     "url": thumbnail_url
 151                 }
 152                 quality_code = self._ARTWORK_MAP.get(quality_key)
 153                 if quality_code is not None:
 154                     thumbnail['preference'] = quality_code
 155                 thumbnails.append(thumbnail)
 156
 157         return {
 158             'id': track_id,
 159             'title': track_data.get('title', title),
 160             'url': '%s/v1/tracks/%s/stream' % (self._API_BASE, track_id),
 161             'ext': 'mp3',
 162             'description': track_data.get('description'),
 163             'duration': track_data.get('duration'),
 164             'track': track_data.get('title'),
 165             'artist': try_get(track_data, lambda x: x['user']['name'], compat_str),
 166             'genre': track_data.get('genre'),
 167             'thumbnails': thumbnails,
 168             'view_count': track_data.get('play_count'),
 169             'like_count': track_data.get('favorite_count'),
 170             'repost_count': track_data.get('repost_count'),
 171         }
 172
 173
 174 class AudiusTrackIE(AudiusIE):
 175     _VALID_URL = r'''(?x)(?:audius:)(?:https?://(?:www\.)?.+/v1/tracks/)?(?P<track_id>\w+)'''
 176     IE_NAME = 'audius:track'
 177     IE_DESC = 'Audius track ID or API link. Prepend with "audius:"'
 178     _TESTS = [
 179         {
 180             'url': 'audius:9RWlo',
 181             'only_matching': True
 182         },
 183         {
 184             'url': 'audius:http://discoveryprovider.audius.prod-us-west-2.staked.cloud/v1/tracks/9RWlo',
 185             'only_matching': True
 186         },
 187     ]
 188
 189
 190 class AudiusPlaylistIE(AudiusBaseIE):
 191     _VALID_URL = r'https?://(?:www\.)?audius\.co/(?P<uploader>[\w\d-]+)/(?:album|playlist)/(?P<title>\S+)'
 192     IE_NAME = 'audius:playlist'
 193     IE_DESC = 'Audius.co playlists'
 194     _TEST = {
 195         'url': 'https://audius.co/test_acc/playlist/test-playlist-22910',
 196         'info_dict': {
 197             'id': 'DNvjN',
 198             'title': 'test playlist',
 199             'description': 'Test description\n\nlol',
 200         },
 201         'playlist_count': 175,
 202     }
 203
 204     def _build_playlist(self, tracks):
 205         entries = []
 206         for track in tracks:
 207             if not isinstance(track, dict):
 208                 raise ExtractorError('Unexpected API response')
 209             track_id = str_or_none(track.get('id'))
 210             if not track_id:
 211                 raise ExtractorError('Unable to get track ID from playlist')
 212             entries.append(self.url_result(
 213                 'audius:%s' % track_id,
 214                 ie=AudiusTrackIE.ie_key(), video_id=track_id))
 215         return entries
 216
 217     def _real_extract(self, url):
 218         self._select_api_base()
 219         mobj = self._match_valid_url(url)
 220         title = mobj.group('title')
 221         # uploader = mobj.group('uploader')
 222         url = self._prepare_url(url, title)
 223         playlist_response = self._resolve_url(url, title)
 224
 225         if not isinstance(playlist_response, list) or len(playlist_response) != 1:
 226             raise ExtractorError('Unexpected API response')
 227
 228         playlist_data = playlist_response[0]
 229         if not isinstance(playlist_data, dict):
 230             raise ExtractorError('Unexpected API response')
 231
 232         playlist_id = playlist_data.get('id')
 233         if playlist_id is None:
 234             raise ExtractorError('Unable to get playlist ID')
 235
 236         playlist_tracks = self._api_request(
 237             '/playlists/%s/tracks' % playlist_id,
 238             title, note='Downloading playlist tracks metadata',
 239             errnote='Unable to download playlist tracks metadata')
 240         if not isinstance(playlist_tracks, list):
 241             raise ExtractorError('Unexpected API response')
 242
 243         entries = self._build_playlist(playlist_tracks)
 244         return self.playlist_result(entries, playlist_id,
 245                                     playlist_data.get('playlist_name', title),
 246                                     playlist_data.get('description'))
 247
 248
 249 class AudiusProfileIE(AudiusPlaylistIE):
 250     IE_NAME = 'audius:artist'
 251     IE_DESC = 'Audius.co profile/artist pages'
 252     _VALID_URL = r'https?://(?:www)?audius\.co/(?P<id>[^\/]+)/?(?:[?#]|$)'
 253     _TEST = {
 254         'url': 'https://audius.co/pzl/',
 255         'info_dict': {
 256             'id': 'ezRo7',
 257             'description': 'TAMALE\n\nContact: officialpzl@gmail.com',
 258             'title': 'pzl',
 259         },
 260         'playlist_count': 24,
 261     }
 262
 263     def _real_extract(self, url):
 264         self._select_api_base()
 265         profile_id = self._match_id(url)
 266         try:
 267             _profile_data = self._api_request('/full/users/handle/' + profile_id, profile_id)
 268         except ExtractorError as e:
 269             raise ExtractorError('Could not download profile info; ' + str(e))
 270         profile_audius_id = _profile_data[0]['id']
 271         profile_bio = _profile_data[0].get('bio')
 272
 273         api_call = self._api_request('/full/users/handle/%s/tracks' % profile_id, profile_id)
 274         return self.playlist_result(self._build_playlist(api_call), profile_audius_id, profile_id, profile_bio)