4 from .common
import InfoExtractor
13 class YandexMusicBaseIE(InfoExtractor
):
14 _VALID_URL_BASE
= r
'https?://music\.yandex\.(?P<tld>ru|kz|ua|by|com)'
17 def _handle_error(response
):
18 if isinstance(response
, dict):
19 error
= response
.get('error')
21 raise ExtractorError(error
, expected
=True)
22 if response
.get('type') == 'captcha' or 'captcha' in response
:
23 YandexMusicBaseIE
._raise
_captcha
()
28 'YandexMusic has considered yt-dlp requests automated and '
29 'asks you to solve a CAPTCHA. You can either wait for some '
30 'time until unblocked and optionally use --sleep-interval '
31 'in future or alternatively you can go to https://music.yandex.ru/ '
32 'solve CAPTCHA, then export cookies and pass cookie file to '
33 'yt-dlp with --cookies',
36 def _download_webpage_handle(self
, *args
, **kwargs
):
37 webpage
= super()._download
_webpage
_handle
(*args
, **kwargs
)
38 if 'Нам очень жаль, но запросы, поступившие с вашего IP-адреса, похожи на автоматические.' in webpage
:
42 def _download_json(self
, *args
, **kwargs
):
43 response
= super()._download
_json
(*args
, **kwargs
)
44 self
._handle
_error
(response
)
47 def _call_api(self
, ep
, tld
, url
, item_id
, note
, query
):
48 return self
._download
_json
(
49 f
'https://music.yandex.{tld}/handlers/{ep}.jsx',
54 'X-Requested-With': 'XMLHttpRequest',
60 class YandexMusicTrackIE(YandexMusicBaseIE
):
61 IE_NAME
= 'yandexmusic:track'
62 IE_DESC
= 'Яндекс.Музыка - Трек'
63 _VALID_URL
= rf
'{YandexMusicBaseIE._VALID_URL_BASE}/album/(?P<album_id>\d+)/track/(?P<id>\d+)'
66 'url': 'http://music.yandex.ru/album/540508/track/4878838',
67 'md5': 'dec8b661f12027ceaba33318787fff76',
71 'title': 'md5:c63e19341fdbe84e43425a30bc777856',
74 'track': 'md5:210508c6ffdfd67a493a6c378f22c3ff',
75 'album': 'md5:cd04fb13c4efeafdfa0a6a6aca36d01a',
76 'album_artist': 'md5:5f54c35462c07952df33d97cfb5fc200',
77 'artist': 'md5:e6fd86621825f14dc0b25db3acd68160',
80 # 'skip': 'Travis CI servers blocked by YandexMusic',
83 'url': 'http://music.yandex.ru/album/3840501/track/705105',
84 'md5': '82a54e9e787301dd45aba093cf6e58c0',
88 'title': 'md5:f86d4a9188279860a83000277024c1a6',
91 'track': 'md5:40f887f0666ba1aa10b835aca44807d1',
92 'album': 'md5:624f5224b14f5c88a8e812fd7fbf1873',
93 'album_artist': 'md5:dd35f2af4e8927100cbe6f5e62e1fb12',
94 'artist': 'md5:dd35f2af4e8927100cbe6f5e62e1fb12',
100 # 'skip': 'Travis CI servers blocked by YandexMusic',
102 'url': 'http://music.yandex.com/album/540508/track/4878838',
103 'only_matching': True,
106 def _real_extract(self
, url
):
107 mobj
= self
._match
_valid
_url
(url
)
108 tld
, album_id
, track_id
= mobj
.group('tld'), mobj
.group('album_id'), mobj
.group('id')
110 track
= self
._call
_api
(
111 'track', tld
, url
, track_id
, 'Downloading track JSON',
112 {'track': f'{track_id}
:{album_id}
'})['track
']
113 track_title = track['title
']
115 download_data = self._download_json(
116 f'https
://music
.yandex
.ru
/api
/v2
.1
/handlers
/track
/{track_id}
:{album_id}
/web
-album_track
-track
-track
-main
/download
/m
',
117 track_id, 'Downloading track location url JSON
', query={'hq': 1}, headers={'X-Retpath-Y': url})
119 fd_data = self._download_json(
120 download_data['src
'], track_id,
121 'Downloading track location JSON
',
122 query={'format': 'json'})
123 key = hashlib.md5(('XGRlBW9FXlekgbPrRHuSiA
' + fd_data['path
'][1:] + fd_data['s
']).encode()).hexdigest()
124 f_url = 'http
://{}/get-mp3/{}
/{}?track-id={}
'.format(fd_data['host
'], key, fd_data['ts
'] + fd_data['path
'], track['id'])
127 cover_uri = track.get('albums
', [{}])[0].get('coverUri
')
129 thumbnail = cover_uri.replace('%%', 'orig
')
130 if not thumbnail.startswith('http
'):
131 thumbnail = 'http
://' + thumbnail
137 'filesize
': int_or_none(track.get('fileSize
')),
138 'duration
': float_or_none(track.get('durationMs
'), 1000),
139 'thumbnail
': thumbnail,
140 'track
': track_title,
141 'acodec
': download_data.get('codec
'),
142 'abr
': int_or_none(download_data.get('bitrate
')),
145 def extract_artist_name(artist):
146 decomposed = artist.get('decomposed
')
147 if not isinstance(decomposed, list):
148 return artist['name
']
149 parts = [artist['name
']]
150 for element in decomposed:
151 if isinstance(element, dict) and element.get('name
'):
152 parts.append(element['name
'])
153 elif isinstance(element, str):
154 parts.append(element)
155 return ''.join(parts)
157 def extract_artist(artist_list):
158 if artist_list and isinstance(artist_list, list):
159 artists_names = [extract_artist_name(a) for a in artist_list if a.get('name
')]
161 return ', '.join(artists_names)
163 albums = track.get('albums
')
164 if albums and isinstance(albums, list):
166 if isinstance(album, dict):
167 year = album.get('year
')
168 disc_number = int_or_none(try_get(
169 album, lambda x: x['trackPosition
']['volume
']))
170 track_number = int_or_none(try_get(
171 album, lambda x: x['trackPosition
']['index
']))
173 'album
': album.get('title
'),
174 'album_artist
': extract_artist(album.get('artists
')),
175 'release_year
': int_or_none(year),
176 'genre
': album.get('genre
'),
177 'disc_number
': disc_number,
178 'track_number
': track_number,
181 track_artist = extract_artist(track.get('artists
'))
184 'artist
': track_artist,
185 'title
': f'{track_artist}
- {track_title}
',
188 track_info['title
'] = track_title
193 class YandexMusicPlaylistBaseIE(YandexMusicBaseIE):
194 def _extract_tracks(self, source, item_id, url, tld):
195 tracks = source['tracks
']
196 track_ids = [str(track_id) for track_id in source['trackIds
']]
198 # tracks dictionary shipped with playlist.jsx API is limited to 150 tracks,
199 # missing tracks should be retrieved manually.
200 if len(tracks) < len(track_ids):
201 present_track_ids = {
203 for track in tracks if track.get('id')}
204 missing_track_ids = [
205 track_id for track_id in track_ids
206 if track_id not in present_track_ids]
207 # Request missing tracks in chunks to avoid exceeding max HTTP header size,
208 # see https://github.com/ytdl-org/youtube-dl/issues/27355
209 _TRACKS_PER_CHUNK = 250
210 for chunk_num in itertools.count(0):
211 start = chunk_num * _TRACKS_PER_CHUNK
212 end = start + _TRACKS_PER_CHUNK
213 missing_track_ids_req = missing_track_ids[start:end]
214 assert missing_track_ids_req
215 missing_tracks = self._call_api(
216 'track
-entries
', tld, url, item_id,
217 f'Downloading missing tracks JSON chunk {chunk_num + 1}
', {
218 'entries
': ','.join(missing_track_ids_req),
220 'external
-domain
': f'music
.yandex
.{tld}
',
221 'overembed
': 'false
',
225 tracks.extend(missing_tracks)
226 if end >= len(missing_track_ids):
231 def _build_playlist(self, tracks):
234 track_id = track.get('id') or track.get('realId
')
237 albums = track.get('albums
')
238 if not albums or not isinstance(albums, list):
241 if not isinstance(album, dict):
243 album_id = album.get('id')
246 entries.append(self.url_result(
247 f'http
://music
.yandex
.ru
/album
/{album_id}
/track
/{track_id}
',
248 ie=YandexMusicTrackIE.ie_key(), video_id=track_id))
252 class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE):
253 IE_NAME = 'yandexmusic
:album
'
254 IE_DESC = 'Яндекс
.Музыка
- Альбом
'
255 _VALID_URL = rf'{YandexMusicBaseIE._VALID_URL_BASE}
/album
/(?P
<id>\d
+)'
258 'url
': 'http
://music
.yandex
.ru
/album
/540508',
261 'title
': 'md5
:7ed1c3567f28d14be9f61179116f5571
',
263 'playlist_count
': 50,
264 # 'skip
': 'Travis CI servers blocked by YandexMusic
',
266 'url
': 'https
://music
.yandex
.ru
/album
/3840501',
269 'title
': 'md5
:36733472cdaa7dcb1fd9473f7da8e50f
',
271 'playlist_count
': 33,
272 # 'skip
': 'Travis CI servers blocked by YandexMusic
',
275 'url
': 'https
://music
.yandex
.ru
/album
/9091882',
278 'title
': 'ТЕД на русском
',
280 'playlist_count
': 187,
284 def suitable(cls, url):
285 return False if YandexMusicTrackIE.suitable(url) else super().suitable(url)
287 def _real_extract(self, url):
288 mobj = self._match_valid_url(url)
289 tld = mobj.group('tld
')
290 album_id = mobj.group('id')
292 album = self._call_api(
293 'album
', tld, url, album_id, 'Downloading album JSON
',
296 entries = self._build_playlist([track for volume in album['volumes
'] for track in volume])
298 title = album['title
']
299 artist = try_get(album, lambda x: x['artists
'][0]['name
'], str)
301 title = f'{artist}
- {title}
'
302 year = album.get('year
')
304 title += f' ({year}
)'
306 return self.playlist_result(entries, str(album['id']), title)
309 class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE):
310 IE_NAME = 'yandexmusic
:playlist
'
311 IE_DESC = 'Яндекс
.Музыка
- Плейлист
'
312 _VALID_URL = rf'{YandexMusicBaseIE._VALID_URL_BASE}
/users
/(?P
<user
>[^
/]+)/playlists
/(?P
<id>\d
+)'
315 'url
': 'http
://music
.yandex
.ru
/users
/music
.partners
/playlists
/1245',
318 'title
': 'md5
:841559b3fe2b998eca88d0d2e22a3097
',
319 'description
': 'md5
:3b9f27b0efbe53f2ee1e844d07155cc9
',
322 # 'skip
': 'Travis CI servers blocked by YandexMusic
',
324 'url
': 'https
://music
.yandex
.ru
/users
/ya
.playlist
/playlists
/1036',
325 'only_matching
': True,
327 # playlist exceeding the limit of 150 tracks (see
328 # https://github.com/ytdl-org/youtube-dl/issues/6666)
329 'url
': 'https
://music
.yandex
.ru
/users
/mesiaz
/playlists
/1364',
332 'title
': 'md5
:b3b400f997d3f878a13ae0699653f7db
',
334 'playlist_mincount
': 437,
335 # 'skip
': 'Travis CI servers blocked by YandexMusic
',
338 def _real_extract(self, url):
339 mobj = self._match_valid_url(url)
340 tld = mobj.group('tld
')
341 user = mobj.group('user
')
342 playlist_id = mobj.group('id')
344 playlist = self._call_api(
345 'playlist
', tld, url, playlist_id, 'Downloading playlist JSON
', {
347 'kinds
': playlist_id,
350 'external
-domain
': f'music
.yandex
.{tld}
',
351 'overembed
': 'false
',
354 tracks = self._extract_tracks(playlist, playlist_id, url, tld)
356 return self.playlist_result(
357 self._build_playlist(tracks),
359 playlist.get('title
'), playlist.get('description
'))
362 class YandexMusicArtistBaseIE(YandexMusicPlaylistBaseIE):
363 def _call_artist(self, tld, url, artist_id):
364 return self._call_api(
365 'artist
', tld, url, artist_id,
366 f'Downloading artist {self._ARTIST_WHAT} JSON
', {
368 'what
': self._ARTIST_WHAT,
369 'sort
': self._ARTIST_SORT or '',
373 'external
-domain
': f'music
.yandex
.{tld}
',
374 'overembed
': 'false
',
377 def _real_extract(self, url):
378 mobj = self._match_valid_url(url)
379 tld = mobj.group('tld
')
380 artist_id = mobj.group('id')
381 data = self._call_artist(tld, url, artist_id)
382 tracks = self._extract_tracks(data, artist_id, url, tld)
383 title = try_get(data, lambda x: x['artist
']['name
'], str)
384 return self.playlist_result(
385 self._build_playlist(tracks), artist_id, title)
388 class YandexMusicArtistTracksIE(YandexMusicArtistBaseIE):
389 IE_NAME = 'yandexmusic
:artist
:tracks
'
390 IE_DESC = 'Яндекс
.Музыка
- Артист
- Треки
'
391 _VALID_URL = rf'{YandexMusicBaseIE._VALID_URL_BASE}
/artist
/(?P
<id>\d
+)/tracks
'
394 'url
': 'https
://music
.yandex
.ru
/artist
/617526/tracks
',
397 'title
': 'md5
:131aef29d45fd5a965ca613e708c040b
',
399 'playlist_count
': 507,
400 # 'skip
': 'Travis CI servers blocked by YandexMusic
',
404 _ARTIST_WHAT = 'tracks
'
406 def _real_extract(self, url):
407 mobj = self._match_valid_url(url)
408 tld = mobj.group('tld
')
409 artist_id = mobj.group('id')
410 data = self._call_artist(tld, url, artist_id)
411 tracks = self._extract_tracks(data, artist_id, url, tld)
412 artist = try_get(data, lambda x: x['artist
']['name
'], str)
413 title = '{} - {}
'.format(artist or artist_id, 'Треки
')
414 return self.playlist_result(
415 self._build_playlist(tracks), artist_id, title)
418 class YandexMusicArtistAlbumsIE(YandexMusicArtistBaseIE):
419 IE_NAME = 'yandexmusic
:artist
:albums
'
420 IE_DESC = 'Яндекс
.Музыка
- Артист
- Альбомы
'
421 _VALID_URL = rf'{YandexMusicBaseIE._VALID_URL_BASE}
/artist
/(?P
<id>\d
+)/albums
'
424 'url
': 'https
://music
.yandex
.ru
/artist
/617526/albums
',
427 'title
': 'md5
:55dc58d5c85699b7fb41ee926700236c
',
430 # 'skip
': 'Travis CI servers blocked by YandexMusic
',
433 _ARTIST_SORT = 'year
'
434 _ARTIST_WHAT = 'albums
'
436 def _real_extract(self, url):
437 mobj = self._match_valid_url(url)
438 tld = mobj.group('tld
')
439 artist_id = mobj.group('id')
440 data = self._call_artist(tld, url, artist_id)
442 for album in data['albums
']:
443 if not isinstance(album, dict):
445 album_id = album.get('id')
448 entries.append(self.url_result(
449 f'http
://music
.yandex
.ru
/album
/{album_id}
',
450 ie=YandexMusicAlbumIE.ie_key(), video_id=album_id))
451 artist = try_get(data, lambda x: x['artist
']['name
'], str)
452 title = '{} - {}
'.format(artist or artist_id, 'Альбомы
')
453 return self.playlist_result(entries, artist_id, title)