]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/yandexmusic.py
[misc] Add `hatch`, `ruff`, `pre-commit` and improve dev docs (#7409)
[yt-dlp.git] / yt_dlp / extractor / yandexmusic.py
CommitLineData
4c603938 1import hashlib
bc2ca1bb 2import itertools
4c603938
MA
3
4from .common import InfoExtractor
6e6bc8da 5from ..compat import compat_str
47fe42e1 6from ..utils import (
ae7d31af 7 ExtractorError,
47fe42e1 8 float_or_none,
e897bd82 9 int_or_none,
0250161c 10 try_get,
47fe42e1 11)
4c603938 12
4c603938 13
ae7d31af 14class YandexMusicBaseIE(InfoExtractor):
29f7c58a 15 _VALID_URL_BASE = r'https?://music\.yandex\.(?P<tld>ru|kz|ua|by|com)'
16
ae7d31af
S
17 @staticmethod
18 def _handle_error(response):
eebe6b38
S
19 if isinstance(response, dict):
20 error = response.get('error')
21 if error:
22 raise ExtractorError(error, expected=True)
197a5da1
S
23 if response.get('type') == 'captcha' or 'captcha' in response:
24 YandexMusicBaseIE._raise_captcha()
25
26 @staticmethod
27 def _raise_captcha():
28 raise ExtractorError(
7a5c1cfe 29 'YandexMusic has considered yt-dlp requests automated and '
197a5da1
S
30 'asks you to solve a CAPTCHA. You can either wait for some '
31 'time until unblocked and optionally use --sleep-interval '
32 'in future or alternatively you can go to https://music.yandex.ru/ '
33 'solve CAPTCHA, then export cookies and pass cookie file to '
7a5c1cfe 34 'yt-dlp with --cookies',
197a5da1 35 expected=True)
ae7d31af 36
e5eadfa8
S
37 def _download_webpage_handle(self, *args, **kwargs):
38 webpage = super(YandexMusicBaseIE, self)._download_webpage_handle(*args, **kwargs)
7f776fa4 39 if 'Нам очень жаль, но&nbsp;запросы, поступившие с&nbsp;вашего IP-адреса, похожи на&nbsp;автоматические.' in webpage:
197a5da1 40 self._raise_captcha()
7f776fa4
YCH
41 return webpage
42
ae7d31af
S
43 def _download_json(self, *args, **kwargs):
44 response = super(YandexMusicBaseIE, self)._download_json(*args, **kwargs)
45 self._handle_error(response)
46 return response
47
29f7c58a 48 def _call_api(self, ep, tld, url, item_id, note, query):
49 return self._download_json(
50 'https://music.yandex.%s/handlers/%s.jsx' % (tld, ep),
51 item_id, note,
52 fatal=False,
53 headers={
54 'Referer': url,
55 'X-Requested-With': 'XMLHttpRequest',
56 'X-Retpath-Y': url,
57 },
58 query=query)
59
ae7d31af
S
60
61class YandexMusicTrackIE(YandexMusicBaseIE):
e4df2f98
S
62 IE_NAME = 'yandexmusic:track'
63 IE_DESC = 'Яндекс.Музыка - Трек'
29f7c58a 64 _VALID_URL = r'%s/album/(?P<album_id>\d+)/track/(?P<id>\d+)' % YandexMusicBaseIE._VALID_URL_BASE
e4df2f98 65
c452790a 66 _TESTS = [{
e4df2f98 67 'url': 'http://music.yandex.ru/album/540508/track/4878838',
29f7c58a 68 'md5': 'dec8b661f12027ceaba33318787fff76',
e4df2f98
S
69 'info_dict': {
70 'id': '4878838',
71 'ext': 'mp3',
29f7c58a 72 'title': 'md5:c63e19341fdbe84e43425a30bc777856',
73 'filesize': int,
e4df2f98 74 'duration': 193.04,
29f7c58a 75 'track': 'md5:210508c6ffdfd67a493a6c378f22c3ff',
76 'album': 'md5:cd04fb13c4efeafdfa0a6a6aca36d01a',
77 'album_artist': 'md5:5f54c35462c07952df33d97cfb5fc200',
78 'artist': 'md5:e6fd86621825f14dc0b25db3acd68160',
796bf9de 79 'release_year': 2009,
7f776fa4 80 },
29f7c58a 81 # 'skip': 'Travis CI servers blocked by YandexMusic',
c452790a 82 }, {
83 # multiple disks
84 'url': 'http://music.yandex.ru/album/3840501/track/705105',
29f7c58a 85 'md5': '82a54e9e787301dd45aba093cf6e58c0',
c452790a 86 'info_dict': {
87 'id': '705105',
88 'ext': 'mp3',
29f7c58a 89 'title': 'md5:f86d4a9188279860a83000277024c1a6',
90 'filesize': int,
c452790a 91 'duration': 239.27,
29f7c58a 92 'track': 'md5:40f887f0666ba1aa10b835aca44807d1',
93 'album': 'md5:624f5224b14f5c88a8e812fd7fbf1873',
94 'album_artist': 'md5:dd35f2af4e8927100cbe6f5e62e1fb12',
95 'artist': 'md5:dd35f2af4e8927100cbe6f5e62e1fb12',
c452790a 96 'release_year': 2016,
97 'genre': 'pop',
98 'disc_number': 2,
99 'track_number': 9,
100 },
29f7c58a 101 # 'skip': 'Travis CI servers blocked by YandexMusic',
102 }, {
103 'url': 'http://music.yandex.com/album/540508/track/4878838',
104 'only_matching': True,
c452790a 105 }]
e4df2f98 106
58ef5e78 107 def _real_extract(self, url):
5ad28e7f 108 mobj = self._match_valid_url(url)
29f7c58a 109 tld, album_id, track_id = mobj.group('tld'), mobj.group('album_id'), mobj.group('id')
4c603938 110
29f7c58a 111 track = self._call_api(
112 'track', tld, url, track_id, 'Downloading track JSON',
113 {'track': '%s:%s' % (track_id, album_id)})['track']
58ef5e78 114 track_title = track['title']
cf03e34a 115
58ef5e78
RA
116 download_data = self._download_json(
117 'https://music.yandex.ru/api/v2.1/handlers/track/%s:%s/web-album_track-track-track-main/download/m' % (track_id, album_id),
2ebe6fef 118 track_id, 'Downloading track location url JSON', query={'hq': 1}, headers={'X-Retpath-Y': url})
4c603938 119
58ef5e78
RA
120 fd_data = self._download_json(
121 download_data['src'], track_id,
122 'Downloading track location JSON',
123 query={'format': 'json'})
124 key = hashlib.md5(('XGRlBW9FXlekgbPrRHuSiA' + fd_data['path'][1:] + fd_data['s']).encode('utf-8')).hexdigest()
29f7c58a 125 f_url = 'http://%s/get-mp3/%s/%s?track-id=%s ' % (fd_data['host'], key, fd_data['ts'] + fd_data['path'], track['id'])
4c603938 126
ab953c64
S
127 thumbnail = None
128 cover_uri = track.get('albums', [{}])[0].get('coverUri')
129 if cover_uri:
130 thumbnail = cover_uri.replace('%%', 'orig')
131 if not thumbnail.startswith('http'):
132 thumbnail = 'http://' + thumbnail
e90d1754 133
e90d1754 134 track_info = {
58ef5e78 135 'id': track_id,
47fe42e1 136 'ext': 'mp3',
58ef5e78 137 'url': f_url,
47fe42e1
S
138 'filesize': int_or_none(track.get('fileSize')),
139 'duration': float_or_none(track.get('durationMs'), 1000),
ab953c64 140 'thumbnail': thumbnail,
e90d1754 141 'track': track_title,
58ef5e78
RA
142 'acodec': download_data.get('codec'),
143 'abr': int_or_none(download_data.get('bitrate')),
47fe42e1 144 }
4c603938 145
c452790a 146 def extract_artist_name(artist):
147 decomposed = artist.get('decomposed')
148 if not isinstance(decomposed, list):
149 return artist['name']
150 parts = [artist['name']]
151 for element in decomposed:
152 if isinstance(element, dict) and element.get('name'):
153 parts.append(element['name'])
154 elif isinstance(element, compat_str):
155 parts.append(element)
156 return ''.join(parts)
157
e90d1754
S
158 def extract_artist(artist_list):
159 if artist_list and isinstance(artist_list, list):
c452790a 160 artists_names = [extract_artist_name(a) for a in artist_list if a.get('name')]
e90d1754
S
161 if artists_names:
162 return ', '.join(artists_names)
163
164 albums = track.get('albums')
165 if albums and isinstance(albums, list):
166 album = albums[0]
167 if isinstance(album, dict):
168 year = album.get('year')
c452790a 169 disc_number = int_or_none(try_get(
170 album, lambda x: x['trackPosition']['volume']))
171 track_number = int_or_none(try_get(
172 album, lambda x: x['trackPosition']['index']))
e90d1754
S
173 track_info.update({
174 'album': album.get('title'),
175 'album_artist': extract_artist(album.get('artists')),
796bf9de 176 'release_year': int_or_none(year),
c452790a 177 'genre': album.get('genre'),
178 'disc_number': disc_number,
179 'track_number': track_number,
e90d1754
S
180 })
181
182 track_artist = extract_artist(track.get('artists'))
183 if track_artist:
184 track_info.update({
185 'artist': track_artist,
186 'title': '%s - %s' % (track_artist, track_title),
187 })
188 else:
189 track_info['title'] = track_title
e90d1754 190
58ef5e78 191 return track_info
4c603938 192
4c603938 193
ae7d31af 194class YandexMusicPlaylistBaseIE(YandexMusicBaseIE):
29f7c58a 195 def _extract_tracks(self, source, item_id, url, tld):
196 tracks = source['tracks']
197 track_ids = [compat_str(track_id) for track_id in source['trackIds']]
198
199 # tracks dictionary shipped with playlist.jsx API is limited to 150 tracks,
200 # missing tracks should be retrieved manually.
201 if len(tracks) < len(track_ids):
202 present_track_ids = set([
203 compat_str(track['id'])
204 for track in tracks if track.get('id')])
205 missing_track_ids = [
206 track_id for track_id in track_ids
207 if track_id not in present_track_ids]
bc2ca1bb 208 # Request missing tracks in chunks to avoid exceeding max HTTP header size,
209 # see https://github.com/ytdl-org/youtube-dl/issues/27355
210 _TRACKS_PER_CHUNK = 250
211 for chunk_num in itertools.count(0):
212 start = chunk_num * _TRACKS_PER_CHUNK
213 end = start + _TRACKS_PER_CHUNK
214 missing_track_ids_req = missing_track_ids[start:end]
215 assert missing_track_ids_req
216 missing_tracks = self._call_api(
217 'track-entries', tld, url, item_id,
218 'Downloading missing tracks JSON chunk %d' % (chunk_num + 1), {
219 'entries': ','.join(missing_track_ids_req),
220 'lang': tld,
221 'external-domain': 'music.yandex.%s' % tld,
222 'overembed': 'false',
223 'strict': 'true',
224 })
225 if missing_tracks:
226 tracks.extend(missing_tracks)
227 if end >= len(missing_track_ids):
228 break
29f7c58a 229
230 return tracks
231
e7c14660 232 def _build_playlist(self, tracks):
29f7c58a 233 entries = []
234 for track in tracks:
235 track_id = track.get('id') or track.get('realId')
236 if not track_id:
237 continue
238 albums = track.get('albums')
239 if not albums or not isinstance(albums, list):
240 continue
241 album = albums[0]
242 if not isinstance(album, dict):
243 continue
244 album_id = album.get('id')
245 if not album_id:
246 continue
247 entries.append(self.url_result(
248 'http://music.yandex.ru/album/%s/track/%s' % (album_id, track_id),
249 ie=YandexMusicTrackIE.ie_key(), video_id=track_id))
250 return entries
e7c14660
S
251
252
253class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE):
47fe42e1
S
254 IE_NAME = 'yandexmusic:album'
255 IE_DESC = 'Яндекс.Музыка - Альбом'
29f7c58a 256 _VALID_URL = r'%s/album/(?P<id>\d+)' % YandexMusicBaseIE._VALID_URL_BASE
4c603938 257
c452790a 258 _TESTS = [{
47fe42e1
S
259 'url': 'http://music.yandex.ru/album/540508',
260 'info_dict': {
261 'id': '540508',
29f7c58a 262 'title': 'md5:7ed1c3567f28d14be9f61179116f5571',
47fe42e1
S
263 },
264 'playlist_count': 50,
29f7c58a 265 # 'skip': 'Travis CI servers blocked by YandexMusic',
c452790a 266 }, {
267 'url': 'https://music.yandex.ru/album/3840501',
268 'info_dict': {
269 'id': '3840501',
29f7c58a 270 'title': 'md5:36733472cdaa7dcb1fd9473f7da8e50f',
c452790a 271 },
272 'playlist_count': 33,
29f7c58a 273 # 'skip': 'Travis CI servers blocked by YandexMusic',
274 }, {
275 # empty artists
276 'url': 'https://music.yandex.ru/album/9091882',
277 'info_dict': {
278 'id': '9091882',
279 'title': 'ТЕД на русском',
280 },
281 'playlist_count': 187,
c452790a 282 }]
4c603938 283
29f7c58a 284 @classmethod
285 def suitable(cls, url):
286 return False if YandexMusicTrackIE.suitable(url) else super(YandexMusicAlbumIE, cls).suitable(url)
287
4c603938 288 def _real_extract(self, url):
5ad28e7f 289 mobj = self._match_valid_url(url)
29f7c58a 290 tld = mobj.group('tld')
291 album_id = mobj.group('id')
4c603938 292
29f7c58a 293 album = self._call_api(
294 'album', tld, url, album_id, 'Downloading album JSON',
295 {'album': album_id})
4c603938 296
c452790a 297 entries = self._build_playlist([track for volume in album['volumes'] for track in volume])
47fe42e1 298
29f7c58a 299 title = album['title']
300 artist = try_get(album, lambda x: x['artists'][0]['name'], compat_str)
301 if artist:
302 title = '%s - %s' % (artist, title)
47fe42e1
S
303 year = album.get('year')
304 if year:
305 title += ' (%s)' % year
306
307 return self.playlist_result(entries, compat_str(album['id']), title)
4c603938 308
4c603938 309
e7c14660 310class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE):
47fe42e1
S
311 IE_NAME = 'yandexmusic:playlist'
312 IE_DESC = 'Яндекс.Музыка - Плейлист'
29f7c58a 313 _VALID_URL = r'%s/users/(?P<user>[^/]+)/playlists/(?P<id>\d+)' % YandexMusicBaseIE._VALID_URL_BASE
47fe42e1 314
baf510bf 315 _TESTS = [{
47fe42e1 316 'url': 'http://music.yandex.ru/users/music.partners/playlists/1245',
4c603938 317 'info_dict': {
47fe42e1 318 'id': '1245',
29f7c58a 319 'title': 'md5:841559b3fe2b998eca88d0d2e22a3097',
47fe42e1
S
320 'description': 'md5:3b9f27b0efbe53f2ee1e844d07155cc9',
321 },
29f7c58a 322 'playlist_count': 5,
323 # 'skip': 'Travis CI servers blocked by YandexMusic',
baf510bf 324 }, {
baf510bf 325 'url': 'https://music.yandex.ru/users/ya.playlist/playlists/1036',
29f7c58a 326 'only_matching': True,
327 }, {
328 # playlist exceeding the limit of 150 tracks (see
329 # https://github.com/ytdl-org/youtube-dl/issues/6666)
330 'url': 'https://music.yandex.ru/users/mesiaz/playlists/1364',
baf510bf 331 'info_dict': {
29f7c58a 332 'id': '1364',
333 'title': 'md5:b3b400f997d3f878a13ae0699653f7db',
baf510bf 334 },
29f7c58a 335 'playlist_mincount': 437,
336 # 'skip': 'Travis CI servers blocked by YandexMusic',
baf510bf 337 }]
4c603938
MA
338
339 def _real_extract(self, url):
5ad28e7f 340 mobj = self._match_valid_url(url)
e960c3c2
S
341 tld = mobj.group('tld')
342 user = mobj.group('user')
343 playlist_id = mobj.group('id')
344
29f7c58a 345 playlist = self._call_api(
346 'playlist', tld, url, playlist_id, 'Downloading playlist JSON', {
e960c3c2
S
347 'owner': user,
348 'kinds': playlist_id,
349 'light': 'true',
350 'lang': tld,
351 'external-domain': 'music.yandex.%s' % tld,
352 'overembed': 'false',
353 })['playlist']
baf510bf 354
29f7c58a 355 tracks = self._extract_tracks(playlist, playlist_id, url, tld)
47fe42e1 356
47fe42e1 357 return self.playlist_result(
baf510bf 358 self._build_playlist(tracks),
e7c14660 359 compat_str(playlist_id),
203a3c0e 360 playlist.get('title'), playlist.get('description'))
29f7c58a 361
362
363class YandexMusicArtistBaseIE(YandexMusicPlaylistBaseIE):
364 def _call_artist(self, tld, url, artist_id):
365 return self._call_api(
366 'artist', tld, url, artist_id,
367 'Downloading artist %s JSON' % self._ARTIST_WHAT, {
368 'artist': artist_id,
369 'what': self._ARTIST_WHAT,
370 'sort': self._ARTIST_SORT or '',
371 'dir': '',
372 'period': '',
373 'lang': tld,
374 'external-domain': 'music.yandex.%s' % tld,
375 'overembed': 'false',
376 })
377
378 def _real_extract(self, url):
5ad28e7f 379 mobj = self._match_valid_url(url)
29f7c58a 380 tld = mobj.group('tld')
381 artist_id = mobj.group('id')
382 data = self._call_artist(tld, url, artist_id)
383 tracks = self._extract_tracks(data, artist_id, url, tld)
384 title = try_get(data, lambda x: x['artist']['name'], compat_str)
385 return self.playlist_result(
386 self._build_playlist(tracks), artist_id, title)
387
388
389class YandexMusicArtistTracksIE(YandexMusicArtistBaseIE):
390 IE_NAME = 'yandexmusic:artist:tracks'
391 IE_DESC = 'Яндекс.Музыка - Артист - Треки'
392 _VALID_URL = r'%s/artist/(?P<id>\d+)/tracks' % YandexMusicBaseIE._VALID_URL_BASE
393
394 _TESTS = [{
395 'url': 'https://music.yandex.ru/artist/617526/tracks',
396 'info_dict': {
397 'id': '617526',
398 'title': 'md5:131aef29d45fd5a965ca613e708c040b',
399 },
400 'playlist_count': 507,
401 # 'skip': 'Travis CI servers blocked by YandexMusic',
402 }]
403
404 _ARTIST_SORT = ''
405 _ARTIST_WHAT = 'tracks'
406
407 def _real_extract(self, url):
5ad28e7f 408 mobj = self._match_valid_url(url)
29f7c58a 409 tld = mobj.group('tld')
410 artist_id = mobj.group('id')
411 data = self._call_artist(tld, url, artist_id)
412 tracks = self._extract_tracks(data, artist_id, url, tld)
413 artist = try_get(data, lambda x: x['artist']['name'], compat_str)
414 title = '%s - %s' % (artist or artist_id, 'Треки')
415 return self.playlist_result(
416 self._build_playlist(tracks), artist_id, title)
417
418
419class YandexMusicArtistAlbumsIE(YandexMusicArtistBaseIE):
420 IE_NAME = 'yandexmusic:artist:albums'
421 IE_DESC = 'Яндекс.Музыка - Артист - Альбомы'
422 _VALID_URL = r'%s/artist/(?P<id>\d+)/albums' % YandexMusicBaseIE._VALID_URL_BASE
423
424 _TESTS = [{
425 'url': 'https://music.yandex.ru/artist/617526/albums',
426 'info_dict': {
427 'id': '617526',
428 'title': 'md5:55dc58d5c85699b7fb41ee926700236c',
429 },
430 'playlist_count': 8,
431 # 'skip': 'Travis CI servers blocked by YandexMusic',
432 }]
433
434 _ARTIST_SORT = 'year'
435 _ARTIST_WHAT = 'albums'
436
437 def _real_extract(self, url):
5ad28e7f 438 mobj = self._match_valid_url(url)
29f7c58a 439 tld = mobj.group('tld')
440 artist_id = mobj.group('id')
441 data = self._call_artist(tld, url, artist_id)
442 entries = []
443 for album in data['albums']:
444 if not isinstance(album, dict):
445 continue
446 album_id = album.get('id')
447 if not album_id:
448 continue
449 entries.append(self.url_result(
450 'http://music.yandex.ru/album/%s' % album_id,
451 ie=YandexMusicAlbumIE.ie_key(), video_id=album_id))
452 artist = try_get(data, lambda x: x['artist']['name'], compat_str)
453 title = '%s - %s' % (artist or artist_id, 'Альбомы')
454 return self.playlist_result(entries, artist_id, title)