]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/musicdex.py
[ie/matchtv] Fix extractor (#10190)
[yt-dlp.git] / yt_dlp / extractor / musicdex.py
CommitLineData
fb62afd6
AG
1from .common import InfoExtractor
2from ..utils import (
3 date_from_str,
4 format_field,
5 try_get,
6 unified_strdate,
7)
8
9
10class MusicdexBaseIE(InfoExtractor):
add96eb9 11 def _return_info(self, track_json, album_json, video_id):
fb62afd6 12 return {
add96eb9 13 'id': str(video_id),
fb62afd6
AG
14 'title': track_json.get('name'),
15 'track': track_json.get('name'),
16 'description': track_json.get('description'),
17 'track_number': track_json.get('number'),
18 'url': format_field(track_json, 'url', 'https://www.musicdex.org/%s'),
19 'duration': track_json.get('duration'),
f4f9f6d0 20 'genres': [genre.get('name') for genre in track_json.get('genres') or []],
fb62afd6
AG
21 'like_count': track_json.get('likes_count'),
22 'view_count': track_json.get('plays'),
f4f9f6d0 23 'artists': [artist.get('name') for artist in track_json.get('artists') or []],
24 'album_artists': [artist.get('name') for artist in album_json.get('artists') or []],
fb62afd6
AG
25 'thumbnail': format_field(album_json, 'image', 'https://www.musicdex.org/%s'),
26 'album': album_json.get('name'),
27 'release_year': try_get(album_json, lambda x: date_from_str(unified_strdate(x['release_date'])).year),
28 'extractor_key': MusicdexSongIE.ie_key(),
29 'extractor': 'MusicdexSong',
30 }
31
32
33class MusicdexSongIE(MusicdexBaseIE):
34 _VALID_URL = r'https?://(?:www\.)?musicdex\.org/track/(?P<id>\d+)'
35
36 _TESTS = [{
37 'url': 'https://www.musicdex.org/track/306/dual-existence',
38 'info_dict': {
39 'id': '306',
40 'ext': 'mp3',
41 'title': 'dual existence',
42 'description': '#NIPPONSEI @ IRC.RIZON.NET',
43 'track': 'dual existence',
44 'track_number': 1,
45 'duration': 266000,
f4f9f6d0 46 'genres': ['Anime'],
fb62afd6
AG
47 'like_count': int,
48 'view_count': int,
f4f9f6d0 49 'artists': ['fripSide'],
50 'album_artists': ['fripSide'],
fb62afd6
AG
51 'thumbnail': 'https://www.musicdex.org/storage/album/9iDIam1DHTVqUG4UclFIEq1WAFGXfPW4y0TtZa91.png',
52 'album': 'To Aru Kagaku no Railgun T OP2 Single - dual existence',
add96eb9 53 'release_year': 2020,
fb62afd6 54 },
add96eb9 55 'params': {'skip_download': True},
fb62afd6
AG
56 }]
57
58 def _real_extract(self, url):
add96eb9 59 video_id = self._match_id(url)
60 data_json = self._download_json(
61 f'https://www.musicdex.org/secure/tracks/{video_id}?defaultRelations=true', video_id)['track']
62 return self._return_info(data_json, data_json.get('album') or {}, video_id)
fb62afd6
AG
63
64
65class MusicdexAlbumIE(MusicdexBaseIE):
66 _VALID_URL = r'https?://(?:www\.)?musicdex\.org/album/(?P<id>\d+)'
67
68 _TESTS = [{
69 'url': 'https://www.musicdex.org/album/56/tenmon-and-eiichiro-yanagi-minori/ef-a-tale-of-memories-original-soundtrack-2-fortissimo',
70 'playlist_mincount': 28,
71 'info_dict': {
72 'id': '56',
f4f9f6d0 73 'genres': ['OST'],
fb62afd6 74 'view_count': int,
f4f9f6d0 75 'artists': ['TENMON & Eiichiro Yanagi / minori'],
fb62afd6
AG
76 'title': 'ef - a tale of memories Original Soundtrack 2 ~fortissimo~',
77 'release_year': 2008,
78 'thumbnail': 'https://www.musicdex.org/storage/album/2rSHkyYBYfB7sbvElpEyTMcUn6toY7AohOgJuDlE.jpg',
79 },
80 }]
81
82 def _real_extract(self, url):
add96eb9 83 playlist_id = self._match_id(url)
84 data_json = self._download_json(
85 f'https://www.musicdex.org/secure/albums/{playlist_id}?defaultRelations=true', playlist_id)['album']
86 entries = [self._return_info(track, data_json, track['id'])
87 for track in data_json.get('tracks') or [] if track.get('id')]
fb62afd6
AG
88
89 return {
90 '_type': 'playlist',
add96eb9 91 'id': playlist_id,
fb62afd6
AG
92 'title': data_json.get('name'),
93 'description': data_json.get('description'),
f4f9f6d0 94 'genres': [genre.get('name') for genre in data_json.get('genres') or []],
fb62afd6 95 'view_count': data_json.get('plays'),
f4f9f6d0 96 'artists': [artist.get('name') for artist in data_json.get('artists') or []],
fb62afd6
AG
97 'thumbnail': format_field(data_json, 'image', 'https://www.musicdex.org/%s'),
98 'release_year': try_get(data_json, lambda x: date_from_str(unified_strdate(x['release_date'])).year),
99 'entries': entries,
100 }
101
102
6368e2e6 103class MusicdexPageIE(MusicdexBaseIE): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
add96eb9 104 def _entries(self, playlist_id):
105 next_page_url = self._API_URL % playlist_id
fb62afd6 106 while next_page_url:
add96eb9 107 data_json = self._download_json(next_page_url, playlist_id)['pagination']
108 yield from data_json.get('data') or []
fb62afd6
AG
109 next_page_url = data_json.get('next_page_url')
110
111
112class MusicdexArtistIE(MusicdexPageIE):
113 _VALID_URL = r'https?://(?:www\.)?musicdex\.org/artist/(?P<id>\d+)'
114 _API_URL = 'https://www.musicdex.org/secure/artists/%s/albums?page=1'
115
116 _TESTS = [{
117 'url': 'https://www.musicdex.org/artist/11/fripside',
118 'playlist_mincount': 28,
119 'info_dict': {
120 'id': '11',
121 'view_count': int,
122 'title': 'fripSide',
123 'thumbnail': 'https://www.musicdex.org/storage/artist/ZmOz0lN2vsweegB660em3xWffCjLPmTQHqJls5Xx.jpg',
124 },
125 }]
126
127 def _real_extract(self, url):
add96eb9 128 playlist_id = self._match_id(url)
129 data_json = self._download_json(f'https://www.musicdex.org/secure/artists/{playlist_id}', playlist_id)['artist']
fb62afd6 130 entries = []
add96eb9 131 for album in self._entries(playlist_id):
fb62afd6
AG
132 entries.extend(self._return_info(track, album, track['id']) for track in album.get('tracks') or [] if track.get('id'))
133
134 return {
135 '_type': 'playlist',
add96eb9 136 'id': playlist_id,
fb62afd6
AG
137 'title': data_json.get('name'),
138 'view_count': data_json.get('plays'),
139 'thumbnail': format_field(data_json, 'image_small', 'https://www.musicdex.org/%s'),
140 'entries': entries,
141 }
142
143
144class MusicdexPlaylistIE(MusicdexPageIE):
145 _VALID_URL = r'https?://(?:www\.)?musicdex\.org/playlist/(?P<id>\d+)'
146 _API_URL = 'https://www.musicdex.org/secure/playlists/%s/tracks?perPage=10000&page=1'
147
148 _TESTS = [{
149 'url': 'https://www.musicdex.org/playlist/9/test',
150 'playlist_mincount': 73,
151 'info_dict': {
152 'id': '9',
153 'view_count': int,
154 'title': 'Test',
155 'thumbnail': 'https://www.musicdex.org/storage/album/jXATI79f0IbQ2sgsKYOYRCW3zRwF3XsfHhzITCuJ.jpg',
156 'description': 'Test 123 123 21312 32121321321321312',
157 },
158 }]
159
160 def _real_extract(self, url):
add96eb9 161 playlist_id = self._match_id(url)
162 data_json = self._download_json(f'https://www.musicdex.org/secure/playlists/{playlist_id}', playlist_id)['playlist']
fb62afd6 163 entries = [self._return_info(track, track.get('album') or {}, track['id'])
add96eb9 164 for track in self._entries(playlist_id) or [] if track.get('id')]
fb62afd6
AG
165
166 return {
167 '_type': 'playlist',
add96eb9 168 'id': playlist_id,
fb62afd6
AG
169 'title': data_json.get('name'),
170 'description': data_json.get('description'),
171 'view_count': data_json.get('plays'),
172 'thumbnail': format_field(data_json, 'image', 'https://www.musicdex.org/%s'),
173 'entries': entries,
174 }