[yt-dlp.git] / yt_dlp / extractor / audius.py

import random

from .common import InfoExtractor
from ..compat import compat_str, compat_urllib_parse_unquote
from ..utils import ExtractorError, str_or_none, try_get


class AudiusBaseIE(InfoExtractor):
    _API_BASE = None
    _API_V = '/v1'

    def _get_response_data(self, response):
        if isinstance(response, dict):
            response_data = response.get('data')
            if response_data is not None:
                return response_data
            if len(response) == 1 and 'message' in response:
                raise ExtractorError('API error: %s' % response['message'],
                                     expected=True)
        raise ExtractorError('Unexpected API response')

    def _select_api_base(self):
        """Selecting one of the currently available API hosts"""
        response = super(AudiusBaseIE, self)._download_json(
            'https://api.audius.co/', None,
            note='Requesting available API hosts',
            errnote='Unable to request available API hosts')
        hosts = self._get_response_data(response)
        if isinstance(hosts, list):
            self._API_BASE = random.choice(hosts)
            return
        raise ExtractorError('Unable to get available API hosts')

    @staticmethod
    def _prepare_url(url, title):
        """
        Audius removes forward slashes from the uri, but leaves backslashes.
        The problem is that the current version of Chrome replaces backslashes
        in the address bar with a forward slashes, so if you copy the link from
        there and paste it into youtube-dl, you won't be able to download
        anything from this link, since the Audius API won't be able to resolve
        this url
        """
        url = compat_urllib_parse_unquote(url)
        title = compat_urllib_parse_unquote(title)
        if '/' in title or '%2F' in title:
            fixed_title = title.replace('/', '%5C').replace('%2F', '%5C')
            return url.replace(title, fixed_title)
        return url

    def _api_request(self, path, item_id=None, note='Downloading JSON metadata',
                     errnote='Unable to download JSON metadata',
                     expected_status=None):
        if self._API_BASE is None:
            self._select_api_base()
        try:
            response = super(AudiusBaseIE, self)._download_json(
                '%s%s%s' % (self._API_BASE, self._API_V, path), item_id, note=note,
                errnote=errnote, expected_status=expected_status)
        except ExtractorError as exc:
            # some of Audius API hosts may not work as expected and return HTML
            if 'Failed to parse JSON' in compat_str(exc):
                raise ExtractorError('An error occurred while receiving data. Try again',
                                     expected=True)
            raise exc
        return self._get_response_data(response)

    def _resolve_url(self, url, item_id):
        return self._api_request('/resolve?url=%s' % url, item_id,
                                 expected_status=404)


class AudiusIE(AudiusBaseIE):
    _VALID_URL = r'''(?x)https?://(?:www\.)?(?:audius\.co/(?P<uploader>[\w\d-]+)(?!/album|/playlist)/(?P<title>\S+))'''
    IE_DESC = 'Audius.co'
    _TESTS = [
        {
            # URL from Chrome address bar which replace backslash to forward slash
            'url': 'https://audius.co/test_acc/t%D0%B5%D0%B5%D0%B5est-1.%5E_%7B%7D/%22%3C%3E.%E2%84%96~%60-198631',
            'md5': '92c35d3e754d5a0f17eef396b0d33582',
            'info_dict': {
                'id': 'xd8gY',
                'title': '''Tеееest/ 1.!@#$%^&*()_+=[]{};'\\\":<>,.?/№~`''',
                'ext': 'mp3',
                'description': 'Description',
                'duration': 30,
                'track': '''Tеееest/ 1.!@#$%^&*()_+=[]{};'\\\":<>,.?/№~`''',
                'artist': 'test',
                'genre': 'Electronic',
                'thumbnail': r're:https?://.*\.jpg',
                'view_count': int,
                'like_count': int,
                'repost_count': int,
            }
        },
        {
            # Regular track
            'url': 'https://audius.co/voltra/radar-103692',
            'md5': '491898a0a8de39f20c5d6a8a80ab5132',
            'info_dict': {
                'id': 'KKdy2',
                'title': 'RADAR',
                'ext': 'mp3',
                'duration': 318,
                'track': 'RADAR',
                'artist': 'voltra',
                'genre': 'Trance',
                'thumbnail': r're:https?://.*\.jpg',
                'view_count': int,
                'like_count': int,
                'repost_count': int,
            }
        },
    ]

    _ARTWORK_MAP = {
        "150x150": 150,
        "480x480": 480,
        "1000x1000": 1000
    }

    def _real_extract(self, url):
        mobj = self._match_valid_url(url)
        track_id = try_get(mobj, lambda x: x.group('track_id'))
        if track_id is None:
            title = mobj.group('title')
            # uploader = mobj.group('uploader')
            url = self._prepare_url(url, title)
            track_data = self._resolve_url(url, title)
        else:  # API link
            title = None
            # uploader = None
            track_data = self._api_request('/tracks/%s' % track_id, track_id)

        if not isinstance(track_data, dict):
            raise ExtractorError('Unexpected API response')

        track_id = track_data.get('id')
        if track_id is None:
            raise ExtractorError('Unable to get ID of the track')

        artworks_data = track_data.get('artwork')
        thumbnails = []
        if isinstance(artworks_data, dict):
            for quality_key, thumbnail_url in artworks_data.items():
                thumbnail = {
                    "url": thumbnail_url
                }
                quality_code = self._ARTWORK_MAP.get(quality_key)
                if quality_code is not None:
                    thumbnail['preference'] = quality_code
                thumbnails.append(thumbnail)

        return {
            'id': track_id,
            'title': track_data.get('title', title),
            'url': '%s/v1/tracks/%s/stream' % (self._API_BASE, track_id),
            'ext': 'mp3',
            'description': track_data.get('description'),
            'duration': track_data.get('duration'),
            'track': track_data.get('title'),
            'artist': try_get(track_data, lambda x: x['user']['name'], compat_str),
            'genre': track_data.get('genre'),
            'thumbnails': thumbnails,
            'view_count': track_data.get('play_count'),
            'like_count': track_data.get('favorite_count'),
            'repost_count': track_data.get('repost_count'),
        }


class AudiusTrackIE(AudiusIE):
    _VALID_URL = r'''(?x)(?:audius:)(?:https?://(?:www\.)?.+/v1/tracks/)?(?P<track_id>\w+)'''
    IE_NAME = 'audius:track'
    IE_DESC = 'Audius track ID or API link. Prepend with "audius:"'
    _TESTS = [
        {
            'url': 'audius:9RWlo',
            'only_matching': True
        },
        {
            'url': 'audius:http://discoveryprovider.audius.prod-us-west-2.staked.cloud/v1/tracks/9RWlo',
            'only_matching': True
        },
    ]


class AudiusPlaylistIE(AudiusBaseIE):
    _VALID_URL = r'https?://(?:www\.)?audius\.co/(?P<uploader>[\w\d-]+)/(?:album|playlist)/(?P<title>\S+)'
    IE_NAME = 'audius:playlist'
    IE_DESC = 'Audius.co playlists'
    _TEST = {
        'url': 'https://audius.co/test_acc/playlist/test-playlist-22910',
        'info_dict': {
            'id': 'DNvjN',
            'title': 'test playlist',
            'description': 'Test description\n\nlol',
        },
        'playlist_count': 175,
    }

    def _build_playlist(self, tracks):
        entries = []
        for track in tracks:
            if not isinstance(track, dict):
                raise ExtractorError('Unexpected API response')
            track_id = str_or_none(track.get('id'))
            if not track_id:
                raise ExtractorError('Unable to get track ID from playlist')
            entries.append(self.url_result(
                'audius:%s' % track_id,
                ie=AudiusTrackIE.ie_key(), video_id=track_id))
        return entries

    def _real_extract(self, url):
        self._select_api_base()
        mobj = self._match_valid_url(url)
        title = mobj.group('title')
        # uploader = mobj.group('uploader')
        url = self._prepare_url(url, title)
        playlist_response = self._resolve_url(url, title)

        if not isinstance(playlist_response, list) or len(playlist_response) != 1:
            raise ExtractorError('Unexpected API response')

        playlist_data = playlist_response[0]
        if not isinstance(playlist_data, dict):
            raise ExtractorError('Unexpected API response')

        playlist_id = playlist_data.get('id')
        if playlist_id is None:
            raise ExtractorError('Unable to get playlist ID')

        playlist_tracks = self._api_request(
            '/playlists/%s/tracks' % playlist_id,
            title, note='Downloading playlist tracks metadata',
            errnote='Unable to download playlist tracks metadata')
        if not isinstance(playlist_tracks, list):
            raise ExtractorError('Unexpected API response')

        entries = self._build_playlist(playlist_tracks)
        return self.playlist_result(entries, playlist_id,
                                    playlist_data.get('playlist_name', title),
                                    playlist_data.get('description'))


class AudiusProfileIE(AudiusPlaylistIE):
    IE_NAME = 'audius:artist'
    IE_DESC = 'Audius.co profile/artist pages'
    _VALID_URL = r'https?://(?:www)?audius\.co/(?P<id>[^\/]+)/?(?:[?#]|$)'
    _TEST = {
        'url': 'https://audius.co/pzl/',
        'info_dict': {
            'id': 'ezRo7',
            'description': 'TAMALE\n\nContact: officialpzl@gmail.com',
            'title': 'pzl',
        },
        'playlist_count': 24,
    }

    def _real_extract(self, url):
        self._select_api_base()
        profile_id = self._match_id(url)
        try:
            _profile_data = self._api_request('/full/users/handle/' + profile_id, profile_id)
        except ExtractorError as e:
            raise ExtractorError('Could not download profile info; ' + str(e))
        profile_audius_id = _profile_data[0]['id']
        profile_bio = _profile_data[0].get('bio')

        api_call = self._api_request('/full/users/handle/%s/tracks' % profile_id, profile_id)
        return self.playlist_result(self._build_playlist(api_call), profile_audius_id, profile_id, profile_bio)
Commit	Line	Data
caa15a7b	1	import random
caa15a7b	2
caa15a7b	3	from .common import InfoExtractor
14f25df2	4	from ..compat import compat_str, compat_urllib_parse_unquote
14f25df2	5	from ..utils import ExtractorError, str_or_none, try_get
caa15a7b	6
	7
	8	class AudiusBaseIE(InfoExtractor):
	9	_API_BASE = None
	10	_API_V = '/v1'
	11
	12	def _get_response_data(self, response):
	13	if isinstance(response, dict):
	14	response_data = response.get('data')
	15	if response_data is not None:
	16	return response_data
	17	if len(response) == 1 and 'message' in response:
	18	raise ExtractorError('API error: %s' % response['message'],
	19	expected=True)
	20	raise ExtractorError('Unexpected API response')
	21
	22	def _select_api_base(self):
	23	"""Selecting one of the currently available API hosts"""
	24	response = super(AudiusBaseIE, self)._download_json(
	25	'https://api.audius.co/', None,
	26	note='Requesting available API hosts',
	27	errnote='Unable to request available API hosts')
	28	hosts = self._get_response_data(response)
	29	if isinstance(hosts, list):
	30	self._API_BASE = random.choice(hosts)
	31	return
	32	raise ExtractorError('Unable to get available API hosts')
	33
	34	@staticmethod
	35	def _prepare_url(url, title):
	36	"""
	37	Audius removes forward slashes from the uri, but leaves backslashes.
	38	The problem is that the current version of Chrome replaces backslashes
	39	in the address bar with a forward slashes, so if you copy the link from
	40	there and paste it into youtube-dl, you won't be able to download
	41	anything from this link, since the Audius API won't be able to resolve
	42	this url
	43	"""
	44	url = compat_urllib_parse_unquote(url)
	45	title = compat_urllib_parse_unquote(title)
	46	if '/' in title or '%2F' in title:
	47	fixed_title = title.replace('/', '%5C').replace('%2F', '%5C')
	48	return url.replace(title, fixed_title)
	49	return url
	50
	51	def _api_request(self, path, item_id=None, note='Downloading JSON metadata',
	52	errnote='Unable to download JSON metadata',
	53	expected_status=None):
	54	if self._API_BASE is None:
	55	self._select_api_base()
	56	try:
	57	response = super(AudiusBaseIE, self)._download_json(
	58	'%s%s%s' % (self._API_BASE, self._API_V, path), item_id, note=note,
	59	errnote=errnote, expected_status=expected_status)
	60	except ExtractorError as exc:
	61	# some of Audius API hosts may not work as expected and return HTML
	62	if 'Failed to parse JSON' in compat_str(exc):
	63	raise ExtractorError('An error occurred while receiving data. Try again',
	64	expected=True)
	65	raise exc
	66	return self._get_response_data(response)
	67
	68	def _resolve_url(self, url, item_id):
	69	return self._api_request('/resolve?url=%s' % url, item_id,
70	expected_status=404)
71
72
73	class AudiusIE(AudiusBaseIE):
74	_VALID_URL = r'''(?x)https?://(?:www\.)?(?:audius\.co/(?P<uploader>[\w\d-]+)(?!/album\|/playlist)/(?P<title>\S+))'''
c55256c5	75	IE_DESC = 'Audius.co'
caa15a7b	76	_TESTS = [
	77	{
	78	# URL from Chrome address bar which replace backslash to forward slash
	79	'url': 'https://audius.co/test_acc/t%D0%B5%D0%B5%D0%B5est-1.%5E_%7B%7D/%22%3C%3E.%E2%84%96~%60-198631',
	80	'md5': '92c35d3e754d5a0f17eef396b0d33582',
	81	'info_dict': {
	82	'id': 'xd8gY',
	83	'title': '''Tеееest/ 1.!@#$%^&*()_+=[]{};'\\\":<>,.?/№~`''',
	84	'ext': 'mp3',
	85	'description': 'Description',
	86	'duration': 30,
	87	'track': '''Tеееest/ 1.!@#$%^&*()_+=[]{};'\\\":<>,.?/№~`''',
	88	'artist': 'test',
	89	'genre': 'Electronic',
	90	'thumbnail': r're:https?://.*\.jpg',
	91	'view_count': int,
	92	'like_count': int,
	93	'repost_count': int,
	94	}
	95	},
	96	{
	97	# Regular track
	98	'url': 'https://audius.co/voltra/radar-103692',
	99	'md5': '491898a0a8de39f20c5d6a8a80ab5132',
	100	'info_dict': {
	101	'id': 'KKdy2',
	102	'title': 'RADAR',
	103	'ext': 'mp3',
	104	'duration': 318,
	105	'track': 'RADAR',
	106	'artist': 'voltra',
	107	'genre': 'Trance',
	108	'thumbnail': r're:https?://.*\.jpg',
	109	'view_count': int,
	110	'like_count': int,
	111	'repost_count': int,
	112	}
	113	},
	114	]
	115
	116	_ARTWORK_MAP = {
	117	"150x150": 150,
	118	"480x480": 480,
	119	"1000x1000": 1000
	120	}
	121
	122	def _real_extract(self, url):
5ad28e7f	123	mobj = self._match_valid_url(url)
c55256c5	124	track_id = try_get(mobj, lambda x: x.group('track_id'))
caa15a7b	125	if track_id is None:
c55256c5	126	title = mobj.group('title')
c55256c5	127	# uploader = mobj.group('uploader')
caa15a7b	128	url = self._prepare_url(url, title)
	129	track_data = self._resolve_url(url, title)
	130	else: # API link
c55256c5	131	title = None
c55256c5	132	# uploader = None
caa15a7b	133	track_data = self._api_request('/tracks/%s' % track_id, track_id)
	134
	135	if not isinstance(track_data, dict):
	136	raise ExtractorError('Unexpected API response')
	137
	138	track_id = track_data.get('id')
	139	if track_id is None:
	140	raise ExtractorError('Unable to get ID of the track')
	141
	142	artworks_data = track_data.get('artwork')
	143	thumbnails = []
	144	if isinstance(artworks_data, dict):
	145	for quality_key, thumbnail_url in artworks_data.items():
	146	thumbnail = {
	147	"url": thumbnail_url
	148	}
	149	quality_code = self._ARTWORK_MAP.get(quality_key)
	150	if quality_code is not None:
	151	thumbnail['preference'] = quality_code
	152	thumbnails.append(thumbnail)
	153
	154	return {
	155	'id': track_id,
	156	'title': track_data.get('title', title),
	157	'url': '%s/v1/tracks/%s/stream' % (self._API_BASE, track_id),
	158	'ext': 'mp3',
	159	'description': track_data.get('description'),
	160	'duration': track_data.get('duration'),
	161	'track': track_data.get('title'),
	162	'artist': try_get(track_data, lambda x: x['user']['name'], compat_str),
	163	'genre': track_data.get('genre'),
	164	'thumbnails': thumbnails,
	165	'view_count': track_data.get('play_count'),
	166	'like_count': track_data.get('favorite_count'),
	167	'repost_count': track_data.get('repost_count'),
	168	}
	169
	170
c55256c5	171	class AudiusTrackIE(AudiusIE):
	172	_VALID_URL = r'''(?x)(?:audius:)(?:https?://(?:www\.)?.+/v1/tracks/)?(?P<track_id>\w+)'''
	173	IE_NAME = 'audius:track'
	174	IE_DESC = 'Audius track ID or API link. Prepend with "audius:"'
	175	_TESTS = [
	176	{
	177	'url': 'audius:9RWlo',
	178	'only_matching': True
	179	},
	180	{
	181	'url': 'audius:http://discoveryprovider.audius.prod-us-west-2.staked.cloud/v1/tracks/9RWlo',
	182	'only_matching': True
	183	},
	184	]
	185
	186
caa15a7b	187	class AudiusPlaylistIE(AudiusBaseIE):
	188	_VALID_URL = r'https?://(?:www\.)?audius\.co/(?P<uploader>[\w\d-]+)/(?:album\|playlist)/(?P<title>\S+)'
	189	IE_NAME = 'audius:playlist'
c55256c5	190	IE_DESC = 'Audius.co playlists'
caa15a7b	191	_TEST = {
	192	'url': 'https://audius.co/test_acc/playlist/test-playlist-22910',
	193	'info_dict': {
	194	'id': 'DNvjN',
	195	'title': 'test playlist',
	196	'description': 'Test description\n\nlol',
	197	},
	198	'playlist_count': 175,
	199	}
	200
	201	def _build_playlist(self, tracks):
	202	entries = []
	203	for track in tracks:
	204	if not isinstance(track, dict):
	205	raise ExtractorError('Unexpected API response')
	206	track_id = str_or_none(track.get('id'))
	207	if not track_id:
	208	raise ExtractorError('Unable to get track ID from playlist')
	209	entries.append(self.url_result(
c55256c5	210	'audius:%s' % track_id,
c55256c5	211	ie=AudiusTrackIE.ie_key(), video_id=track_id))
caa15a7b	212	return entries
	213
	214	def _real_extract(self, url):
	215	self._select_api_base()
5ad28e7f	216	mobj = self._match_valid_url(url)
c55256c5	217	title = mobj.group('title')
c55256c5	218	# uploader = mobj.group('uploader')
caa15a7b	219	url = self._prepare_url(url, title)
	220	playlist_response = self._resolve_url(url, title)
	221
	222	if not isinstance(playlist_response, list) or len(playlist_response) != 1:
	223	raise ExtractorError('Unexpected API response')
	224
	225	playlist_data = playlist_response[0]
	226	if not isinstance(playlist_data, dict):
	227	raise ExtractorError('Unexpected API response')
	228
	229	playlist_id = playlist_data.get('id')
	230	if playlist_id is None:
	231	raise ExtractorError('Unable to get playlist ID')
	232
	233	playlist_tracks = self._api_request(
	234	'/playlists/%s/tracks' % playlist_id,
	235	title, note='Downloading playlist tracks metadata',
	236	errnote='Unable to download playlist tracks metadata')
	237	if not isinstance(playlist_tracks, list):
	238	raise ExtractorError('Unexpected API response')
	239
	240	entries = self._build_playlist(playlist_tracks)
	241	return self.playlist_result(entries, playlist_id,
	242	playlist_data.get('playlist_name', title),
	243	playlist_data.get('description'))
98784ef8	244
	245
	246	class AudiusProfileIE(AudiusPlaylistIE):
	247	IE_NAME = 'audius:artist'
	248	IE_DESC = 'Audius.co profile/artist pages'
	249	_VALID_URL = r'https?://(?:www)?audius\.co/(?P<id>[^\/]+)/?(?:[?#]\|$)'
	250	_TEST = {
	251	'url': 'https://audius.co/pzl/',
	252	'info_dict': {
	253	'id': 'ezRo7',
	254	'description': 'TAMALE\n\nContact: officialpzl@gmail.com',
	255	'title': 'pzl',
	256	},
	257	'playlist_count': 24,
	258	}
	259
	260	def _real_extract(self, url):
	261	self._select_api_base()
	262	profile_id = self._match_id(url)
	263	try:
	264	_profile_data = self._api_request('/full/users/handle/' + profile_id, profile_id)
	265	except ExtractorError as e:
	266	raise ExtractorError('Could not download profile info; ' + str(e))
	267	profile_audius_id = _profile_data[0]['id']
	268	profile_bio = _profile_data[0].get('bio')
	269
	270	api_call = self._api_request('/full/users/handle/%s/tracks' % profile_id, profile_id)
	271	return self.playlist_result(self._build_playlist(api_call), profile_audius_id, profile_id, profile_bio)