[yt-dlp.git] / yt_dlp / extractor / musicdex.py

from .common import InfoExtractor
from ..utils import (
    date_from_str,
    format_field,
    try_get,
    unified_strdate,
)


class MusicdexBaseIE(InfoExtractor):
    def _return_info(self, track_json, album_json, id):
        return {
            'id': str(id),
            'title': track_json.get('name'),
            'track': track_json.get('name'),
            'description': track_json.get('description'),
            'track_number': track_json.get('number'),
            'url': format_field(track_json, 'url', 'https://www.musicdex.org/%s'),
            'duration': track_json.get('duration'),
            'genres': [genre.get('name') for genre in track_json.get('genres') or []],
            'like_count': track_json.get('likes_count'),
            'view_count': track_json.get('plays'),
            'artists': [artist.get('name') for artist in track_json.get('artists') or []],
            'album_artists': [artist.get('name') for artist in album_json.get('artists') or []],
            'thumbnail': format_field(album_json, 'image', 'https://www.musicdex.org/%s'),
            'album': album_json.get('name'),
            'release_year': try_get(album_json, lambda x: date_from_str(unified_strdate(x['release_date'])).year),
            'extractor_key': MusicdexSongIE.ie_key(),
            'extractor': 'MusicdexSong',
        }


class MusicdexSongIE(MusicdexBaseIE):
    _VALID_URL = r'https?://(?:www\.)?musicdex\.org/track/(?P<id>\d+)'

    _TESTS = [{
        'url': 'https://www.musicdex.org/track/306/dual-existence',
        'info_dict': {
            'id': '306',
            'ext': 'mp3',
            'title': 'dual existence',
            'description': '#NIPPONSEI @ IRC.RIZON.NET',
            'track': 'dual existence',
            'track_number': 1,
            'duration': 266000,
            'genres': ['Anime'],
            'like_count': int,
            'view_count': int,
            'artists': ['fripSide'],
            'album_artists': ['fripSide'],
            'thumbnail': 'https://www.musicdex.org/storage/album/9iDIam1DHTVqUG4UclFIEq1WAFGXfPW4y0TtZa91.png',
            'album': 'To Aru Kagaku no Railgun T OP2 Single - dual existence',
            'release_year': 2020
        },
        'params': {'skip_download': True}
    }]

    def _real_extract(self, url):
        id = self._match_id(url)
        data_json = self._download_json(f'https://www.musicdex.org/secure/tracks/{id}?defaultRelations=true', id)['track']
        return self._return_info(data_json, data_json.get('album') or {}, id)


class MusicdexAlbumIE(MusicdexBaseIE):
    _VALID_URL = r'https?://(?:www\.)?musicdex\.org/album/(?P<id>\d+)'

    _TESTS = [{
        'url': 'https://www.musicdex.org/album/56/tenmon-and-eiichiro-yanagi-minori/ef-a-tale-of-memories-original-soundtrack-2-fortissimo',
        'playlist_mincount': 28,
        'info_dict': {
            'id': '56',
            'genres': ['OST'],
            'view_count': int,
            'artists': ['TENMON & Eiichiro Yanagi / minori'],
            'title': 'ef - a tale of memories Original Soundtrack 2 ~fortissimo~',
            'release_year': 2008,
            'thumbnail': 'https://www.musicdex.org/storage/album/2rSHkyYBYfB7sbvElpEyTMcUn6toY7AohOgJuDlE.jpg',
        },
    }]

    def _real_extract(self, url):
        id = self._match_id(url)
        data_json = self._download_json(f'https://www.musicdex.org/secure/albums/{id}?defaultRelations=true', id)['album']
        entries = [self._return_info(track, data_json, track['id']) for track in data_json.get('tracks') or [] if track.get('id')]

        return {
            '_type': 'playlist',
            'id': id,
            'title': data_json.get('name'),
            'description': data_json.get('description'),
            'genres': [genre.get('name') for genre in data_json.get('genres') or []],
            'view_count': data_json.get('plays'),
            'artists': [artist.get('name') for artist in data_json.get('artists') or []],
            'thumbnail': format_field(data_json, 'image', 'https://www.musicdex.org/%s'),
            'release_year': try_get(data_json, lambda x: date_from_str(unified_strdate(x['release_date'])).year),
            'entries': entries,
        }


class MusicdexPageIE(MusicdexBaseIE):  # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
    def _entries(self, id):
        next_page_url = self._API_URL % id
        while next_page_url:
            data_json = self._download_json(next_page_url, id)['pagination']
            for data in data_json.get('data') or []:
                yield data
            next_page_url = data_json.get('next_page_url')


class MusicdexArtistIE(MusicdexPageIE):
    _VALID_URL = r'https?://(?:www\.)?musicdex\.org/artist/(?P<id>\d+)'
    _API_URL = 'https://www.musicdex.org/secure/artists/%s/albums?page=1'

    _TESTS = [{
        'url': 'https://www.musicdex.org/artist/11/fripside',
        'playlist_mincount': 28,
        'info_dict': {
            'id': '11',
            'view_count': int,
            'title': 'fripSide',
            'thumbnail': 'https://www.musicdex.org/storage/artist/ZmOz0lN2vsweegB660em3xWffCjLPmTQHqJls5Xx.jpg',
        },
    }]

    def _real_extract(self, url):
        id = self._match_id(url)
        data_json = self._download_json(f'https://www.musicdex.org/secure/artists/{id}', id)['artist']
        entries = []
        for album in self._entries(id):
            entries.extend(self._return_info(track, album, track['id']) for track in album.get('tracks') or [] if track.get('id'))

        return {
            '_type': 'playlist',
            'id': id,
            'title': data_json.get('name'),
            'view_count': data_json.get('plays'),
            'thumbnail': format_field(data_json, 'image_small', 'https://www.musicdex.org/%s'),
            'entries': entries,
        }


class MusicdexPlaylistIE(MusicdexPageIE):
    _VALID_URL = r'https?://(?:www\.)?musicdex\.org/playlist/(?P<id>\d+)'
    _API_URL = 'https://www.musicdex.org/secure/playlists/%s/tracks?perPage=10000&page=1'

    _TESTS = [{
        'url': 'https://www.musicdex.org/playlist/9/test',
        'playlist_mincount': 73,
        'info_dict': {
            'id': '9',
            'view_count': int,
            'title': 'Test',
            'thumbnail': 'https://www.musicdex.org/storage/album/jXATI79f0IbQ2sgsKYOYRCW3zRwF3XsfHhzITCuJ.jpg',
            'description': 'Test 123 123 21312 32121321321321312',
        },
    }]

    def _real_extract(self, url):
        id = self._match_id(url)
        data_json = self._download_json(f'https://www.musicdex.org/secure/playlists/{id}', id)['playlist']
        entries = [self._return_info(track, track.get('album') or {}, track['id'])
                   for track in self._entries(id) or [] if track.get('id')]

        return {
            '_type': 'playlist',
            'id': id,
            'title': data_json.get('name'),
            'description': data_json.get('description'),
            'view_count': data_json.get('plays'),
            'thumbnail': format_field(data_json, 'image', 'https://www.musicdex.org/%s'),
            'entries': entries,
        }
Commit	Line	Data
fb62afd6 AG	1	from .common import InfoExtractor
	2	from ..utils import (
	3	date_from_str,
	4	format_field,
	5	try_get,
	6	unified_strdate,
	7	)
	8
	9
	10	class MusicdexBaseIE(InfoExtractor):
	11	def _return_info(self, track_json, album_json, id):
	12	return {
	13	'id': str(id),
	14	'title': track_json.get('name'),
	15	'track': track_json.get('name'),
	16	'description': track_json.get('description'),
	17	'track_number': track_json.get('number'),
	18	'url': format_field(track_json, 'url', 'https://www.musicdex.org/%s'),
	19	'duration': track_json.get('duration'),
f4f9f6d0	20	'genres': [genre.get('name') for genre in track_json.get('genres') or []],
fb62afd6 AG	21	'like_count': track_json.get('likes_count'),
fb62afd6 AG	22	'view_count': track_json.get('plays'),
f4f9f6d0	23	'artists': [artist.get('name') for artist in track_json.get('artists') or []],
f4f9f6d0	24	'album_artists': [artist.get('name') for artist in album_json.get('artists') or []],
fb62afd6 AG	25	'thumbnail': format_field(album_json, 'image', 'https://www.musicdex.org/%s'),
	26	'album': album_json.get('name'),
	27	'release_year': try_get(album_json, lambda x: date_from_str(unified_strdate(x['release_date'])).year),
	28	'extractor_key': MusicdexSongIE.ie_key(),
	29	'extractor': 'MusicdexSong',
	30	}
	31
	32
	33	class MusicdexSongIE(MusicdexBaseIE):
	34	_VALID_URL = r'https?://(?:www\.)?musicdex\.org/track/(?P<id>\d+)'
	35
	36	_TESTS = [{
	37	'url': 'https://www.musicdex.org/track/306/dual-existence',
	38	'info_dict': {
	39	'id': '306',
	40	'ext': 'mp3',
	41	'title': 'dual existence',
	42	'description': '#NIPPONSEI @ IRC.RIZON.NET',
	43	'track': 'dual existence',
	44	'track_number': 1,
	45	'duration': 266000,
f4f9f6d0	46	'genres': ['Anime'],
fb62afd6 AG	47	'like_count': int,
fb62afd6 AG	48	'view_count': int,
f4f9f6d0	49	'artists': ['fripSide'],
f4f9f6d0	50	'album_artists': ['fripSide'],
fb62afd6 AG	51	'thumbnail': 'https://www.musicdex.org/storage/album/9iDIam1DHTVqUG4UclFIEq1WAFGXfPW4y0TtZa91.png',
	52	'album': 'To Aru Kagaku no Railgun T OP2 Single - dual existence',
	53	'release_year': 2020
	54	},
	55	'params': {'skip_download': True}
	56	}]
	57
	58	def _real_extract(self, url):
	59	id = self._match_id(url)
	60	data_json = self._download_json(f'https://www.musicdex.org/secure/tracks/{id}?defaultRelations=true', id)['track']
	61	return self._return_info(data_json, data_json.get('album') or {}, id)
	62
	63
	64	class MusicdexAlbumIE(MusicdexBaseIE):
	65	_VALID_URL = r'https?://(?:www\.)?musicdex\.org/album/(?P<id>\d+)'
	66
	67	_TESTS = [{
	68	'url': 'https://www.musicdex.org/album/56/tenmon-and-eiichiro-yanagi-minori/ef-a-tale-of-memories-original-soundtrack-2-fortissimo',
	69	'playlist_mincount': 28,
	70	'info_dict': {
	71	'id': '56',
f4f9f6d0	72	'genres': ['OST'],
fb62afd6	73	'view_count': int,
f4f9f6d0	74	'artists': ['TENMON & Eiichiro Yanagi / minori'],
fb62afd6 AG	75	'title': 'ef - a tale of memories Original Soundtrack 2 ~fortissimo~',
	76	'release_year': 2008,
	77	'thumbnail': 'https://www.musicdex.org/storage/album/2rSHkyYBYfB7sbvElpEyTMcUn6toY7AohOgJuDlE.jpg',
	78	},
	79	}]
	80
	81	def _real_extract(self, url):
	82	id = self._match_id(url)
	83	data_json = self._download_json(f'https://www.musicdex.org/secure/albums/{id}?defaultRelations=true', id)['album']
	84	entries = [self._return_info(track, data_json, track['id']) for track in data_json.get('tracks') or [] if track.get('id')]
	85
	86	return {
	87	'_type': 'playlist',
	88	'id': id,
	89	'title': data_json.get('name'),
	90	'description': data_json.get('description'),
f4f9f6d0	91	'genres': [genre.get('name') for genre in data_json.get('genres') or []],
fb62afd6	92	'view_count': data_json.get('plays'),
f4f9f6d0	93	'artists': [artist.get('name') for artist in data_json.get('artists') or []],
fb62afd6 AG	94	'thumbnail': format_field(data_json, 'image', 'https://www.musicdex.org/%s'),
	95	'release_year': try_get(data_json, lambda x: date_from_str(unified_strdate(x['release_date'])).year),
	96	'entries': entries,
	97	}
	98
	99
6368e2e6	100	class MusicdexPageIE(MusicdexBaseIE): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
fb62afd6 AG	101	def _entries(self, id):
	102	next_page_url = self._API_URL % id
	103	while next_page_url:
	104	data_json = self._download_json(next_page_url, id)['pagination']
	105	for data in data_json.get('data') or []:
	106	yield data
	107	next_page_url = data_json.get('next_page_url')
	108
	109
	110	class MusicdexArtistIE(MusicdexPageIE):
	111	_VALID_URL = r'https?://(?:www\.)?musicdex\.org/artist/(?P<id>\d+)'
	112	_API_URL = 'https://www.musicdex.org/secure/artists/%s/albums?page=1'
	113
	114	_TESTS = [{
	115	'url': 'https://www.musicdex.org/artist/11/fripside',
	116	'playlist_mincount': 28,
	117	'info_dict': {
	118	'id': '11',
	119	'view_count': int,
	120	'title': 'fripSide',
	121	'thumbnail': 'https://www.musicdex.org/storage/artist/ZmOz0lN2vsweegB660em3xWffCjLPmTQHqJls5Xx.jpg',
	122	},
	123	}]
	124
	125	def _real_extract(self, url):
	126	id = self._match_id(url)
	127	data_json = self._download_json(f'https://www.musicdex.org/secure/artists/{id}', id)['artist']
	128	entries = []
	129	for album in self._entries(id):
	130	entries.extend(self._return_info(track, album, track['id']) for track in album.get('tracks') or [] if track.get('id'))
	131
	132	return {
	133	'_type': 'playlist',
	134	'id': id,
	135	'title': data_json.get('name'),
	136	'view_count': data_json.get('plays'),
	137	'thumbnail': format_field(data_json, 'image_small', 'https://www.musicdex.org/%s'),
	138	'entries': entries,
	139	}
	140
	141
	142	class MusicdexPlaylistIE(MusicdexPageIE):
	143	_VALID_URL = r'https?://(?:www\.)?musicdex\.org/playlist/(?P<id>\d+)'
	144	_API_URL = 'https://www.musicdex.org/secure/playlists/%s/tracks?perPage=10000&page=1'
	145
	146	_TESTS = [{
	147	'url': 'https://www.musicdex.org/playlist/9/test',
	148	'playlist_mincount': 73,
	149	'info_dict': {
	150	'id': '9',
	151	'view_count': int,
	152	'title': 'Test',
	153	'thumbnail': 'https://www.musicdex.org/storage/album/jXATI79f0IbQ2sgsKYOYRCW3zRwF3XsfHhzITCuJ.jpg',
	154	'description': 'Test 123 123 21312 32121321321321312',
	155	},
	156	}]
	157
	158	def _real_extract(self, url):
	159	id = self._match_id(url)
	160	data_json = self._download_json(f'https://www.musicdex.org/secure/playlists/{id}', id)['playlist']
	161	entries = [self._return_info(track, track.get('album') or {}, track['id'])
	162	for track in self._entries(id) or [] if track.get('id')]
	163
	164	return {
165	'_type': 'playlist',
166	'id': id,
167	'title': data_json.get('name'),
168	'description': data_json.get('description'),
169	'view_count': data_json.get('plays'),
170	'thumbnail': format_field(data_json, 'image', 'https://www.musicdex.org/%s'),
171	'entries': entries,
172	}