]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/yandexmusic.py
[cleanup] Add more ruff rules (#10149)
[yt-dlp.git] / yt_dlp / extractor / yandexmusic.py
CommitLineData
4c603938 1import hashlib
bc2ca1bb 2import itertools
4c603938
MA
3
4from .common import InfoExtractor
47fe42e1 5from ..utils import (
ae7d31af 6 ExtractorError,
47fe42e1 7 float_or_none,
e897bd82 8 int_or_none,
0250161c 9 try_get,
47fe42e1 10)
4c603938 11
4c603938 12
ae7d31af 13class YandexMusicBaseIE(InfoExtractor):
29f7c58a 14 _VALID_URL_BASE = r'https?://music\.yandex\.(?P<tld>ru|kz|ua|by|com)'
15
ae7d31af
S
16 @staticmethod
17 def _handle_error(response):
eebe6b38
S
18 if isinstance(response, dict):
19 error = response.get('error')
20 if error:
21 raise ExtractorError(error, expected=True)
197a5da1
S
22 if response.get('type') == 'captcha' or 'captcha' in response:
23 YandexMusicBaseIE._raise_captcha()
24
25 @staticmethod
26 def _raise_captcha():
27 raise ExtractorError(
7a5c1cfe 28 'YandexMusic has considered yt-dlp requests automated and '
197a5da1
S
29 'asks you to solve a CAPTCHA. You can either wait for some '
30 'time until unblocked and optionally use --sleep-interval '
31 'in future or alternatively you can go to https://music.yandex.ru/ '
32 'solve CAPTCHA, then export cookies and pass cookie file to '
7a5c1cfe 33 'yt-dlp with --cookies',
197a5da1 34 expected=True)
ae7d31af 35
e5eadfa8 36 def _download_webpage_handle(self, *args, **kwargs):
add96eb9 37 webpage = super()._download_webpage_handle(*args, **kwargs)
7f776fa4 38 if 'Нам очень жаль, но&nbsp;запросы, поступившие с&nbsp;вашего IP-адреса, похожи на&nbsp;автоматические.' in webpage:
197a5da1 39 self._raise_captcha()
7f776fa4
YCH
40 return webpage
41
ae7d31af 42 def _download_json(self, *args, **kwargs):
add96eb9 43 response = super()._download_json(*args, **kwargs)
ae7d31af
S
44 self._handle_error(response)
45 return response
46
29f7c58a 47 def _call_api(self, ep, tld, url, item_id, note, query):
48 return self._download_json(
add96eb9 49 f'https://music.yandex.{tld}/handlers/{ep}.jsx',
29f7c58a 50 item_id, note,
51 fatal=False,
52 headers={
53 'Referer': url,
54 'X-Requested-With': 'XMLHttpRequest',
55 'X-Retpath-Y': url,
56 },
57 query=query)
58
ae7d31af
S
59
60class YandexMusicTrackIE(YandexMusicBaseIE):
e4df2f98
S
61 IE_NAME = 'yandexmusic:track'
62 IE_DESC = 'Яндекс.Музыка - Трек'
add96eb9 63 _VALID_URL = rf'{YandexMusicBaseIE._VALID_URL_BASE}/album/(?P<album_id>\d+)/track/(?P<id>\d+)'
e4df2f98 64
c452790a 65 _TESTS = [{
e4df2f98 66 'url': 'http://music.yandex.ru/album/540508/track/4878838',
29f7c58a 67 'md5': 'dec8b661f12027ceaba33318787fff76',
e4df2f98
S
68 'info_dict': {
69 'id': '4878838',
70 'ext': 'mp3',
29f7c58a 71 'title': 'md5:c63e19341fdbe84e43425a30bc777856',
72 'filesize': int,
e4df2f98 73 'duration': 193.04,
29f7c58a 74 'track': 'md5:210508c6ffdfd67a493a6c378f22c3ff',
75 'album': 'md5:cd04fb13c4efeafdfa0a6a6aca36d01a',
76 'album_artist': 'md5:5f54c35462c07952df33d97cfb5fc200',
77 'artist': 'md5:e6fd86621825f14dc0b25db3acd68160',
796bf9de 78 'release_year': 2009,
7f776fa4 79 },
29f7c58a 80 # 'skip': 'Travis CI servers blocked by YandexMusic',
c452790a 81 }, {
82 # multiple disks
83 'url': 'http://music.yandex.ru/album/3840501/track/705105',
29f7c58a 84 'md5': '82a54e9e787301dd45aba093cf6e58c0',
c452790a 85 'info_dict': {
86 'id': '705105',
87 'ext': 'mp3',
29f7c58a 88 'title': 'md5:f86d4a9188279860a83000277024c1a6',
89 'filesize': int,
c452790a 90 'duration': 239.27,
29f7c58a 91 'track': 'md5:40f887f0666ba1aa10b835aca44807d1',
92 'album': 'md5:624f5224b14f5c88a8e812fd7fbf1873',
93 'album_artist': 'md5:dd35f2af4e8927100cbe6f5e62e1fb12',
94 'artist': 'md5:dd35f2af4e8927100cbe6f5e62e1fb12',
c452790a 95 'release_year': 2016,
96 'genre': 'pop',
97 'disc_number': 2,
98 'track_number': 9,
99 },
29f7c58a 100 # 'skip': 'Travis CI servers blocked by YandexMusic',
101 }, {
102 'url': 'http://music.yandex.com/album/540508/track/4878838',
103 'only_matching': True,
c452790a 104 }]
e4df2f98 105
58ef5e78 106 def _real_extract(self, url):
5ad28e7f 107 mobj = self._match_valid_url(url)
29f7c58a 108 tld, album_id, track_id = mobj.group('tld'), mobj.group('album_id'), mobj.group('id')
4c603938 109
29f7c58a 110 track = self._call_api(
111 'track', tld, url, track_id, 'Downloading track JSON',
add96eb9 112 {'track': f'{track_id}:{album_id}'})['track']
58ef5e78 113 track_title = track['title']
cf03e34a 114
58ef5e78 115 download_data = self._download_json(
add96eb9 116 f'https://music.yandex.ru/api/v2.1/handlers/track/{track_id}:{album_id}/web-album_track-track-track-main/download/m',
2ebe6fef 117 track_id, 'Downloading track location url JSON', query={'hq': 1}, headers={'X-Retpath-Y': url})
4c603938 118
58ef5e78
RA
119 fd_data = self._download_json(
120 download_data['src'], track_id,
121 'Downloading track location JSON',
122 query={'format': 'json'})
add96eb9 123 key = hashlib.md5(('XGRlBW9FXlekgbPrRHuSiA' + fd_data['path'][1:] + fd_data['s']).encode()).hexdigest()
124 f_url = 'http://{}/get-mp3/{}/{}?track-id={} '.format(fd_data['host'], key, fd_data['ts'] + fd_data['path'], track['id'])
4c603938 125
ab953c64
S
126 thumbnail = None
127 cover_uri = track.get('albums', [{}])[0].get('coverUri')
128 if cover_uri:
129 thumbnail = cover_uri.replace('%%', 'orig')
130 if not thumbnail.startswith('http'):
131 thumbnail = 'http://' + thumbnail
e90d1754 132
e90d1754 133 track_info = {
58ef5e78 134 'id': track_id,
47fe42e1 135 'ext': 'mp3',
58ef5e78 136 'url': f_url,
47fe42e1
S
137 'filesize': int_or_none(track.get('fileSize')),
138 'duration': float_or_none(track.get('durationMs'), 1000),
ab953c64 139 'thumbnail': thumbnail,
e90d1754 140 'track': track_title,
58ef5e78
RA
141 'acodec': download_data.get('codec'),
142 'abr': int_or_none(download_data.get('bitrate')),
47fe42e1 143 }
4c603938 144
c452790a 145 def extract_artist_name(artist):
146 decomposed = artist.get('decomposed')
147 if not isinstance(decomposed, list):
148 return artist['name']
149 parts = [artist['name']]
150 for element in decomposed:
151 if isinstance(element, dict) and element.get('name'):
152 parts.append(element['name'])
add96eb9 153 elif isinstance(element, str):
c452790a 154 parts.append(element)
155 return ''.join(parts)
156
e90d1754
S
157 def extract_artist(artist_list):
158 if artist_list and isinstance(artist_list, list):
c452790a 159 artists_names = [extract_artist_name(a) for a in artist_list if a.get('name')]
e90d1754
S
160 if artists_names:
161 return ', '.join(artists_names)
162
163 albums = track.get('albums')
164 if albums and isinstance(albums, list):
165 album = albums[0]
166 if isinstance(album, dict):
167 year = album.get('year')
c452790a 168 disc_number = int_or_none(try_get(
169 album, lambda x: x['trackPosition']['volume']))
170 track_number = int_or_none(try_get(
171 album, lambda x: x['trackPosition']['index']))
e90d1754
S
172 track_info.update({
173 'album': album.get('title'),
174 'album_artist': extract_artist(album.get('artists')),
796bf9de 175 'release_year': int_or_none(year),
c452790a 176 'genre': album.get('genre'),
177 'disc_number': disc_number,
178 'track_number': track_number,
e90d1754
S
179 })
180
181 track_artist = extract_artist(track.get('artists'))
182 if track_artist:
183 track_info.update({
184 'artist': track_artist,
add96eb9 185 'title': f'{track_artist} - {track_title}',
e90d1754
S
186 })
187 else:
188 track_info['title'] = track_title
e90d1754 189
58ef5e78 190 return track_info
4c603938 191
4c603938 192
ae7d31af 193class YandexMusicPlaylistBaseIE(YandexMusicBaseIE):
29f7c58a 194 def _extract_tracks(self, source, item_id, url, tld):
195 tracks = source['tracks']
add96eb9 196 track_ids = [str(track_id) for track_id in source['trackIds']]
29f7c58a 197
198 # tracks dictionary shipped with playlist.jsx API is limited to 150 tracks,
199 # missing tracks should be retrieved manually.
200 if len(tracks) < len(track_ids):
add96eb9 201 present_track_ids = {
202 str(track['id'])
203 for track in tracks if track.get('id')}
29f7c58a 204 missing_track_ids = [
205 track_id for track_id in track_ids
206 if track_id not in present_track_ids]
bc2ca1bb 207 # Request missing tracks in chunks to avoid exceeding max HTTP header size,
208 # see https://github.com/ytdl-org/youtube-dl/issues/27355
209 _TRACKS_PER_CHUNK = 250
210 for chunk_num in itertools.count(0):
211 start = chunk_num * _TRACKS_PER_CHUNK
212 end = start + _TRACKS_PER_CHUNK
213 missing_track_ids_req = missing_track_ids[start:end]
214 assert missing_track_ids_req
215 missing_tracks = self._call_api(
216 'track-entries', tld, url, item_id,
add96eb9 217 f'Downloading missing tracks JSON chunk {chunk_num + 1}', {
bc2ca1bb 218 'entries': ','.join(missing_track_ids_req),
219 'lang': tld,
add96eb9 220 'external-domain': f'music.yandex.{tld}',
bc2ca1bb 221 'overembed': 'false',
222 'strict': 'true',
223 })
224 if missing_tracks:
225 tracks.extend(missing_tracks)
226 if end >= len(missing_track_ids):
227 break
29f7c58a 228
229 return tracks
230
e7c14660 231 def _build_playlist(self, tracks):
29f7c58a 232 entries = []
233 for track in tracks:
234 track_id = track.get('id') or track.get('realId')
235 if not track_id:
236 continue
237 albums = track.get('albums')
238 if not albums or not isinstance(albums, list):
239 continue
240 album = albums[0]
241 if not isinstance(album, dict):
242 continue
243 album_id = album.get('id')
244 if not album_id:
245 continue
246 entries.append(self.url_result(
add96eb9 247 f'http://music.yandex.ru/album/{album_id}/track/{track_id}',
29f7c58a 248 ie=YandexMusicTrackIE.ie_key(), video_id=track_id))
249 return entries
e7c14660
S
250
251
252class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE):
47fe42e1
S
253 IE_NAME = 'yandexmusic:album'
254 IE_DESC = 'Яндекс.Музыка - Альбом'
add96eb9 255 _VALID_URL = rf'{YandexMusicBaseIE._VALID_URL_BASE}/album/(?P<id>\d+)'
4c603938 256
c452790a 257 _TESTS = [{
47fe42e1
S
258 'url': 'http://music.yandex.ru/album/540508',
259 'info_dict': {
260 'id': '540508',
29f7c58a 261 'title': 'md5:7ed1c3567f28d14be9f61179116f5571',
47fe42e1
S
262 },
263 'playlist_count': 50,
29f7c58a 264 # 'skip': 'Travis CI servers blocked by YandexMusic',
c452790a 265 }, {
266 'url': 'https://music.yandex.ru/album/3840501',
267 'info_dict': {
268 'id': '3840501',
29f7c58a 269 'title': 'md5:36733472cdaa7dcb1fd9473f7da8e50f',
c452790a 270 },
271 'playlist_count': 33,
29f7c58a 272 # 'skip': 'Travis CI servers blocked by YandexMusic',
273 }, {
274 # empty artists
275 'url': 'https://music.yandex.ru/album/9091882',
276 'info_dict': {
277 'id': '9091882',
278 'title': 'ТЕД на русском',
279 },
280 'playlist_count': 187,
c452790a 281 }]
4c603938 282
29f7c58a 283 @classmethod
284 def suitable(cls, url):
add96eb9 285 return False if YandexMusicTrackIE.suitable(url) else super().suitable(url)
29f7c58a 286
4c603938 287 def _real_extract(self, url):
5ad28e7f 288 mobj = self._match_valid_url(url)
29f7c58a 289 tld = mobj.group('tld')
290 album_id = mobj.group('id')
4c603938 291
29f7c58a 292 album = self._call_api(
293 'album', tld, url, album_id, 'Downloading album JSON',
294 {'album': album_id})
4c603938 295
c452790a 296 entries = self._build_playlist([track for volume in album['volumes'] for track in volume])
47fe42e1 297
29f7c58a 298 title = album['title']
add96eb9 299 artist = try_get(album, lambda x: x['artists'][0]['name'], str)
29f7c58a 300 if artist:
add96eb9 301 title = f'{artist} - {title}'
47fe42e1
S
302 year = album.get('year')
303 if year:
add96eb9 304 title += f' ({year})'
47fe42e1 305
add96eb9 306 return self.playlist_result(entries, str(album['id']), title)
4c603938 307
4c603938 308
e7c14660 309class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE):
47fe42e1
S
310 IE_NAME = 'yandexmusic:playlist'
311 IE_DESC = 'Яндекс.Музыка - Плейлист'
add96eb9 312 _VALID_URL = rf'{YandexMusicBaseIE._VALID_URL_BASE}/users/(?P<user>[^/]+)/playlists/(?P<id>\d+)'
47fe42e1 313
baf510bf 314 _TESTS = [{
47fe42e1 315 'url': 'http://music.yandex.ru/users/music.partners/playlists/1245',
4c603938 316 'info_dict': {
47fe42e1 317 'id': '1245',
29f7c58a 318 'title': 'md5:841559b3fe2b998eca88d0d2e22a3097',
47fe42e1
S
319 'description': 'md5:3b9f27b0efbe53f2ee1e844d07155cc9',
320 },
29f7c58a 321 'playlist_count': 5,
322 # 'skip': 'Travis CI servers blocked by YandexMusic',
baf510bf 323 }, {
baf510bf 324 'url': 'https://music.yandex.ru/users/ya.playlist/playlists/1036',
29f7c58a 325 'only_matching': True,
326 }, {
327 # playlist exceeding the limit of 150 tracks (see
328 # https://github.com/ytdl-org/youtube-dl/issues/6666)
329 'url': 'https://music.yandex.ru/users/mesiaz/playlists/1364',
baf510bf 330 'info_dict': {
29f7c58a 331 'id': '1364',
332 'title': 'md5:b3b400f997d3f878a13ae0699653f7db',
baf510bf 333 },
29f7c58a 334 'playlist_mincount': 437,
335 # 'skip': 'Travis CI servers blocked by YandexMusic',
baf510bf 336 }]
4c603938
MA
337
338 def _real_extract(self, url):
5ad28e7f 339 mobj = self._match_valid_url(url)
e960c3c2
S
340 tld = mobj.group('tld')
341 user = mobj.group('user')
342 playlist_id = mobj.group('id')
343
29f7c58a 344 playlist = self._call_api(
345 'playlist', tld, url, playlist_id, 'Downloading playlist JSON', {
e960c3c2
S
346 'owner': user,
347 'kinds': playlist_id,
348 'light': 'true',
349 'lang': tld,
add96eb9 350 'external-domain': f'music.yandex.{tld}',
e960c3c2
S
351 'overembed': 'false',
352 })['playlist']
baf510bf 353
29f7c58a 354 tracks = self._extract_tracks(playlist, playlist_id, url, tld)
47fe42e1 355
47fe42e1 356 return self.playlist_result(
baf510bf 357 self._build_playlist(tracks),
add96eb9 358 str(playlist_id),
203a3c0e 359 playlist.get('title'), playlist.get('description'))
29f7c58a 360
361
362class YandexMusicArtistBaseIE(YandexMusicPlaylistBaseIE):
363 def _call_artist(self, tld, url, artist_id):
364 return self._call_api(
365 'artist', tld, url, artist_id,
add96eb9 366 f'Downloading artist {self._ARTIST_WHAT} JSON', {
29f7c58a 367 'artist': artist_id,
368 'what': self._ARTIST_WHAT,
369 'sort': self._ARTIST_SORT or '',
370 'dir': '',
371 'period': '',
372 'lang': tld,
add96eb9 373 'external-domain': f'music.yandex.{tld}',
29f7c58a 374 'overembed': 'false',
375 })
376
377 def _real_extract(self, url):
5ad28e7f 378 mobj = self._match_valid_url(url)
29f7c58a 379 tld = mobj.group('tld')
380 artist_id = mobj.group('id')
381 data = self._call_artist(tld, url, artist_id)
382 tracks = self._extract_tracks(data, artist_id, url, tld)
add96eb9 383 title = try_get(data, lambda x: x['artist']['name'], str)
29f7c58a 384 return self.playlist_result(
385 self._build_playlist(tracks), artist_id, title)
386
387
388class YandexMusicArtistTracksIE(YandexMusicArtistBaseIE):
389 IE_NAME = 'yandexmusic:artist:tracks'
390 IE_DESC = 'Яндекс.Музыка - Артист - Треки'
add96eb9 391 _VALID_URL = rf'{YandexMusicBaseIE._VALID_URL_BASE}/artist/(?P<id>\d+)/tracks'
29f7c58a 392
393 _TESTS = [{
394 'url': 'https://music.yandex.ru/artist/617526/tracks',
395 'info_dict': {
396 'id': '617526',
397 'title': 'md5:131aef29d45fd5a965ca613e708c040b',
398 },
399 'playlist_count': 507,
400 # 'skip': 'Travis CI servers blocked by YandexMusic',
401 }]
402
403 _ARTIST_SORT = ''
404 _ARTIST_WHAT = 'tracks'
405
406 def _real_extract(self, url):
5ad28e7f 407 mobj = self._match_valid_url(url)
29f7c58a 408 tld = mobj.group('tld')
409 artist_id = mobj.group('id')
410 data = self._call_artist(tld, url, artist_id)
411 tracks = self._extract_tracks(data, artist_id, url, tld)
add96eb9 412 artist = try_get(data, lambda x: x['artist']['name'], str)
413 title = '{} - {}'.format(artist or artist_id, 'Треки')
29f7c58a 414 return self.playlist_result(
415 self._build_playlist(tracks), artist_id, title)
416
417
418class YandexMusicArtistAlbumsIE(YandexMusicArtistBaseIE):
419 IE_NAME = 'yandexmusic:artist:albums'
420 IE_DESC = 'Яндекс.Музыка - Артист - Альбомы'
add96eb9 421 _VALID_URL = rf'{YandexMusicBaseIE._VALID_URL_BASE}/artist/(?P<id>\d+)/albums'
29f7c58a 422
423 _TESTS = [{
424 'url': 'https://music.yandex.ru/artist/617526/albums',
425 'info_dict': {
426 'id': '617526',
427 'title': 'md5:55dc58d5c85699b7fb41ee926700236c',
428 },
429 'playlist_count': 8,
430 # 'skip': 'Travis CI servers blocked by YandexMusic',
431 }]
432
433 _ARTIST_SORT = 'year'
434 _ARTIST_WHAT = 'albums'
435
436 def _real_extract(self, url):
5ad28e7f 437 mobj = self._match_valid_url(url)
29f7c58a 438 tld = mobj.group('tld')
439 artist_id = mobj.group('id')
440 data = self._call_artist(tld, url, artist_id)
441 entries = []
442 for album in data['albums']:
443 if not isinstance(album, dict):
444 continue
445 album_id = album.get('id')
446 if not album_id:
447 continue
448 entries.append(self.url_result(
add96eb9 449 f'http://music.yandex.ru/album/{album_id}',
29f7c58a 450 ie=YandexMusicAlbumIE.ie_key(), video_id=album_id))
add96eb9 451 artist = try_get(data, lambda x: x['artist']['name'], str)
452 title = '{} - {}'.format(artist or artist_id, 'Альбомы')
29f7c58a 453 return self.playlist_result(entries, artist_id, title)