]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/yandexmusic.py
[misc] Add `hatch`, `ruff`, `pre-commit` and improve dev docs (#7409)
[yt-dlp.git] / yt_dlp / extractor / yandexmusic.py
1 import hashlib
2 import itertools
3
4 from .common import InfoExtractor
5 from ..compat import compat_str
6 from ..utils import (
7 ExtractorError,
8 float_or_none,
9 int_or_none,
10 try_get,
11 )
12
13
14 class YandexMusicBaseIE(InfoExtractor):
15 _VALID_URL_BASE = r'https?://music\.yandex\.(?P<tld>ru|kz|ua|by|com)'
16
17 @staticmethod
18 def _handle_error(response):
19 if isinstance(response, dict):
20 error = response.get('error')
21 if error:
22 raise ExtractorError(error, expected=True)
23 if response.get('type') == 'captcha' or 'captcha' in response:
24 YandexMusicBaseIE._raise_captcha()
25
26 @staticmethod
27 def _raise_captcha():
28 raise ExtractorError(
29 'YandexMusic has considered yt-dlp requests automated and '
30 'asks you to solve a CAPTCHA. You can either wait for some '
31 'time until unblocked and optionally use --sleep-interval '
32 'in future or alternatively you can go to https://music.yandex.ru/ '
33 'solve CAPTCHA, then export cookies and pass cookie file to '
34 'yt-dlp with --cookies',
35 expected=True)
36
37 def _download_webpage_handle(self, *args, **kwargs):
38 webpage = super(YandexMusicBaseIE, self)._download_webpage_handle(*args, **kwargs)
39 if 'Нам очень жаль, но&nbsp;запросы, поступившие с&nbsp;вашего IP-адреса, похожи на&nbsp;автоматические.' in webpage:
40 self._raise_captcha()
41 return webpage
42
43 def _download_json(self, *args, **kwargs):
44 response = super(YandexMusicBaseIE, self)._download_json(*args, **kwargs)
45 self._handle_error(response)
46 return response
47
48 def _call_api(self, ep, tld, url, item_id, note, query):
49 return self._download_json(
50 'https://music.yandex.%s/handlers/%s.jsx' % (tld, ep),
51 item_id, note,
52 fatal=False,
53 headers={
54 'Referer': url,
55 'X-Requested-With': 'XMLHttpRequest',
56 'X-Retpath-Y': url,
57 },
58 query=query)
59
60
61 class YandexMusicTrackIE(YandexMusicBaseIE):
62 IE_NAME = 'yandexmusic:track'
63 IE_DESC = 'Яндекс.Музыка - Трек'
64 _VALID_URL = r'%s/album/(?P<album_id>\d+)/track/(?P<id>\d+)' % YandexMusicBaseIE._VALID_URL_BASE
65
66 _TESTS = [{
67 'url': 'http://music.yandex.ru/album/540508/track/4878838',
68 'md5': 'dec8b661f12027ceaba33318787fff76',
69 'info_dict': {
70 'id': '4878838',
71 'ext': 'mp3',
72 'title': 'md5:c63e19341fdbe84e43425a30bc777856',
73 'filesize': int,
74 'duration': 193.04,
75 'track': 'md5:210508c6ffdfd67a493a6c378f22c3ff',
76 'album': 'md5:cd04fb13c4efeafdfa0a6a6aca36d01a',
77 'album_artist': 'md5:5f54c35462c07952df33d97cfb5fc200',
78 'artist': 'md5:e6fd86621825f14dc0b25db3acd68160',
79 'release_year': 2009,
80 },
81 # 'skip': 'Travis CI servers blocked by YandexMusic',
82 }, {
83 # multiple disks
84 'url': 'http://music.yandex.ru/album/3840501/track/705105',
85 'md5': '82a54e9e787301dd45aba093cf6e58c0',
86 'info_dict': {
87 'id': '705105',
88 'ext': 'mp3',
89 'title': 'md5:f86d4a9188279860a83000277024c1a6',
90 'filesize': int,
91 'duration': 239.27,
92 'track': 'md5:40f887f0666ba1aa10b835aca44807d1',
93 'album': 'md5:624f5224b14f5c88a8e812fd7fbf1873',
94 'album_artist': 'md5:dd35f2af4e8927100cbe6f5e62e1fb12',
95 'artist': 'md5:dd35f2af4e8927100cbe6f5e62e1fb12',
96 'release_year': 2016,
97 'genre': 'pop',
98 'disc_number': 2,
99 'track_number': 9,
100 },
101 # 'skip': 'Travis CI servers blocked by YandexMusic',
102 }, {
103 'url': 'http://music.yandex.com/album/540508/track/4878838',
104 'only_matching': True,
105 }]
106
107 def _real_extract(self, url):
108 mobj = self._match_valid_url(url)
109 tld, album_id, track_id = mobj.group('tld'), mobj.group('album_id'), mobj.group('id')
110
111 track = self._call_api(
112 'track', tld, url, track_id, 'Downloading track JSON',
113 {'track': '%s:%s' % (track_id, album_id)})['track']
114 track_title = track['title']
115
116 download_data = self._download_json(
117 'https://music.yandex.ru/api/v2.1/handlers/track/%s:%s/web-album_track-track-track-main/download/m' % (track_id, album_id),
118 track_id, 'Downloading track location url JSON', query={'hq': 1}, headers={'X-Retpath-Y': url})
119
120 fd_data = self._download_json(
121 download_data['src'], track_id,
122 'Downloading track location JSON',
123 query={'format': 'json'})
124 key = hashlib.md5(('XGRlBW9FXlekgbPrRHuSiA' + fd_data['path'][1:] + fd_data['s']).encode('utf-8')).hexdigest()
125 f_url = 'http://%s/get-mp3/%s/%s?track-id=%s ' % (fd_data['host'], key, fd_data['ts'] + fd_data['path'], track['id'])
126
127 thumbnail = None
128 cover_uri = track.get('albums', [{}])[0].get('coverUri')
129 if cover_uri:
130 thumbnail = cover_uri.replace('%%', 'orig')
131 if not thumbnail.startswith('http'):
132 thumbnail = 'http://' + thumbnail
133
134 track_info = {
135 'id': track_id,
136 'ext': 'mp3',
137 'url': f_url,
138 'filesize': int_or_none(track.get('fileSize')),
139 'duration': float_or_none(track.get('durationMs'), 1000),
140 'thumbnail': thumbnail,
141 'track': track_title,
142 'acodec': download_data.get('codec'),
143 'abr': int_or_none(download_data.get('bitrate')),
144 }
145
146 def extract_artist_name(artist):
147 decomposed = artist.get('decomposed')
148 if not isinstance(decomposed, list):
149 return artist['name']
150 parts = [artist['name']]
151 for element in decomposed:
152 if isinstance(element, dict) and element.get('name'):
153 parts.append(element['name'])
154 elif isinstance(element, compat_str):
155 parts.append(element)
156 return ''.join(parts)
157
158 def extract_artist(artist_list):
159 if artist_list and isinstance(artist_list, list):
160 artists_names = [extract_artist_name(a) for a in artist_list if a.get('name')]
161 if artists_names:
162 return ', '.join(artists_names)
163
164 albums = track.get('albums')
165 if albums and isinstance(albums, list):
166 album = albums[0]
167 if isinstance(album, dict):
168 year = album.get('year')
169 disc_number = int_or_none(try_get(
170 album, lambda x: x['trackPosition']['volume']))
171 track_number = int_or_none(try_get(
172 album, lambda x: x['trackPosition']['index']))
173 track_info.update({
174 'album': album.get('title'),
175 'album_artist': extract_artist(album.get('artists')),
176 'release_year': int_or_none(year),
177 'genre': album.get('genre'),
178 'disc_number': disc_number,
179 'track_number': track_number,
180 })
181
182 track_artist = extract_artist(track.get('artists'))
183 if track_artist:
184 track_info.update({
185 'artist': track_artist,
186 'title': '%s - %s' % (track_artist, track_title),
187 })
188 else:
189 track_info['title'] = track_title
190
191 return track_info
192
193
194 class YandexMusicPlaylistBaseIE(YandexMusicBaseIE):
195 def _extract_tracks(self, source, item_id, url, tld):
196 tracks = source['tracks']
197 track_ids = [compat_str(track_id) for track_id in source['trackIds']]
198
199 # tracks dictionary shipped with playlist.jsx API is limited to 150 tracks,
200 # missing tracks should be retrieved manually.
201 if len(tracks) < len(track_ids):
202 present_track_ids = set([
203 compat_str(track['id'])
204 for track in tracks if track.get('id')])
205 missing_track_ids = [
206 track_id for track_id in track_ids
207 if track_id not in present_track_ids]
208 # Request missing tracks in chunks to avoid exceeding max HTTP header size,
209 # see https://github.com/ytdl-org/youtube-dl/issues/27355
210 _TRACKS_PER_CHUNK = 250
211 for chunk_num in itertools.count(0):
212 start = chunk_num * _TRACKS_PER_CHUNK
213 end = start + _TRACKS_PER_CHUNK
214 missing_track_ids_req = missing_track_ids[start:end]
215 assert missing_track_ids_req
216 missing_tracks = self._call_api(
217 'track-entries', tld, url, item_id,
218 'Downloading missing tracks JSON chunk %d' % (chunk_num + 1), {
219 'entries': ','.join(missing_track_ids_req),
220 'lang': tld,
221 'external-domain': 'music.yandex.%s' % tld,
222 'overembed': 'false',
223 'strict': 'true',
224 })
225 if missing_tracks:
226 tracks.extend(missing_tracks)
227 if end >= len(missing_track_ids):
228 break
229
230 return tracks
231
232 def _build_playlist(self, tracks):
233 entries = []
234 for track in tracks:
235 track_id = track.get('id') or track.get('realId')
236 if not track_id:
237 continue
238 albums = track.get('albums')
239 if not albums or not isinstance(albums, list):
240 continue
241 album = albums[0]
242 if not isinstance(album, dict):
243 continue
244 album_id = album.get('id')
245 if not album_id:
246 continue
247 entries.append(self.url_result(
248 'http://music.yandex.ru/album/%s/track/%s' % (album_id, track_id),
249 ie=YandexMusicTrackIE.ie_key(), video_id=track_id))
250 return entries
251
252
253 class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE):
254 IE_NAME = 'yandexmusic:album'
255 IE_DESC = 'Яндекс.Музыка - Альбом'
256 _VALID_URL = r'%s/album/(?P<id>\d+)' % YandexMusicBaseIE._VALID_URL_BASE
257
258 _TESTS = [{
259 'url': 'http://music.yandex.ru/album/540508',
260 'info_dict': {
261 'id': '540508',
262 'title': 'md5:7ed1c3567f28d14be9f61179116f5571',
263 },
264 'playlist_count': 50,
265 # 'skip': 'Travis CI servers blocked by YandexMusic',
266 }, {
267 'url': 'https://music.yandex.ru/album/3840501',
268 'info_dict': {
269 'id': '3840501',
270 'title': 'md5:36733472cdaa7dcb1fd9473f7da8e50f',
271 },
272 'playlist_count': 33,
273 # 'skip': 'Travis CI servers blocked by YandexMusic',
274 }, {
275 # empty artists
276 'url': 'https://music.yandex.ru/album/9091882',
277 'info_dict': {
278 'id': '9091882',
279 'title': 'ТЕД на русском',
280 },
281 'playlist_count': 187,
282 }]
283
284 @classmethod
285 def suitable(cls, url):
286 return False if YandexMusicTrackIE.suitable(url) else super(YandexMusicAlbumIE, cls).suitable(url)
287
288 def _real_extract(self, url):
289 mobj = self._match_valid_url(url)
290 tld = mobj.group('tld')
291 album_id = mobj.group('id')
292
293 album = self._call_api(
294 'album', tld, url, album_id, 'Downloading album JSON',
295 {'album': album_id})
296
297 entries = self._build_playlist([track for volume in album['volumes'] for track in volume])
298
299 title = album['title']
300 artist = try_get(album, lambda x: x['artists'][0]['name'], compat_str)
301 if artist:
302 title = '%s - %s' % (artist, title)
303 year = album.get('year')
304 if year:
305 title += ' (%s)' % year
306
307 return self.playlist_result(entries, compat_str(album['id']), title)
308
309
310 class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE):
311 IE_NAME = 'yandexmusic:playlist'
312 IE_DESC = 'Яндекс.Музыка - Плейлист'
313 _VALID_URL = r'%s/users/(?P<user>[^/]+)/playlists/(?P<id>\d+)' % YandexMusicBaseIE._VALID_URL_BASE
314
315 _TESTS = [{
316 'url': 'http://music.yandex.ru/users/music.partners/playlists/1245',
317 'info_dict': {
318 'id': '1245',
319 'title': 'md5:841559b3fe2b998eca88d0d2e22a3097',
320 'description': 'md5:3b9f27b0efbe53f2ee1e844d07155cc9',
321 },
322 'playlist_count': 5,
323 # 'skip': 'Travis CI servers blocked by YandexMusic',
324 }, {
325 'url': 'https://music.yandex.ru/users/ya.playlist/playlists/1036',
326 'only_matching': True,
327 }, {
328 # playlist exceeding the limit of 150 tracks (see
329 # https://github.com/ytdl-org/youtube-dl/issues/6666)
330 'url': 'https://music.yandex.ru/users/mesiaz/playlists/1364',
331 'info_dict': {
332 'id': '1364',
333 'title': 'md5:b3b400f997d3f878a13ae0699653f7db',
334 },
335 'playlist_mincount': 437,
336 # 'skip': 'Travis CI servers blocked by YandexMusic',
337 }]
338
339 def _real_extract(self, url):
340 mobj = self._match_valid_url(url)
341 tld = mobj.group('tld')
342 user = mobj.group('user')
343 playlist_id = mobj.group('id')
344
345 playlist = self._call_api(
346 'playlist', tld, url, playlist_id, 'Downloading playlist JSON', {
347 'owner': user,
348 'kinds': playlist_id,
349 'light': 'true',
350 'lang': tld,
351 'external-domain': 'music.yandex.%s' % tld,
352 'overembed': 'false',
353 })['playlist']
354
355 tracks = self._extract_tracks(playlist, playlist_id, url, tld)
356
357 return self.playlist_result(
358 self._build_playlist(tracks),
359 compat_str(playlist_id),
360 playlist.get('title'), playlist.get('description'))
361
362
363 class YandexMusicArtistBaseIE(YandexMusicPlaylistBaseIE):
364 def _call_artist(self, tld, url, artist_id):
365 return self._call_api(
366 'artist', tld, url, artist_id,
367 'Downloading artist %s JSON' % self._ARTIST_WHAT, {
368 'artist': artist_id,
369 'what': self._ARTIST_WHAT,
370 'sort': self._ARTIST_SORT or '',
371 'dir': '',
372 'period': '',
373 'lang': tld,
374 'external-domain': 'music.yandex.%s' % tld,
375 'overembed': 'false',
376 })
377
378 def _real_extract(self, url):
379 mobj = self._match_valid_url(url)
380 tld = mobj.group('tld')
381 artist_id = mobj.group('id')
382 data = self._call_artist(tld, url, artist_id)
383 tracks = self._extract_tracks(data, artist_id, url, tld)
384 title = try_get(data, lambda x: x['artist']['name'], compat_str)
385 return self.playlist_result(
386 self._build_playlist(tracks), artist_id, title)
387
388
389 class YandexMusicArtistTracksIE(YandexMusicArtistBaseIE):
390 IE_NAME = 'yandexmusic:artist:tracks'
391 IE_DESC = 'Яндекс.Музыка - Артист - Треки'
392 _VALID_URL = r'%s/artist/(?P<id>\d+)/tracks' % YandexMusicBaseIE._VALID_URL_BASE
393
394 _TESTS = [{
395 'url': 'https://music.yandex.ru/artist/617526/tracks',
396 'info_dict': {
397 'id': '617526',
398 'title': 'md5:131aef29d45fd5a965ca613e708c040b',
399 },
400 'playlist_count': 507,
401 # 'skip': 'Travis CI servers blocked by YandexMusic',
402 }]
403
404 _ARTIST_SORT = ''
405 _ARTIST_WHAT = 'tracks'
406
407 def _real_extract(self, url):
408 mobj = self._match_valid_url(url)
409 tld = mobj.group('tld')
410 artist_id = mobj.group('id')
411 data = self._call_artist(tld, url, artist_id)
412 tracks = self._extract_tracks(data, artist_id, url, tld)
413 artist = try_get(data, lambda x: x['artist']['name'], compat_str)
414 title = '%s - %s' % (artist or artist_id, 'Треки')
415 return self.playlist_result(
416 self._build_playlist(tracks), artist_id, title)
417
418
419 class YandexMusicArtistAlbumsIE(YandexMusicArtistBaseIE):
420 IE_NAME = 'yandexmusic:artist:albums'
421 IE_DESC = 'Яндекс.Музыка - Артист - Альбомы'
422 _VALID_URL = r'%s/artist/(?P<id>\d+)/albums' % YandexMusicBaseIE._VALID_URL_BASE
423
424 _TESTS = [{
425 'url': 'https://music.yandex.ru/artist/617526/albums',
426 'info_dict': {
427 'id': '617526',
428 'title': 'md5:55dc58d5c85699b7fb41ee926700236c',
429 },
430 'playlist_count': 8,
431 # 'skip': 'Travis CI servers blocked by YandexMusic',
432 }]
433
434 _ARTIST_SORT = 'year'
435 _ARTIST_WHAT = 'albums'
436
437 def _real_extract(self, url):
438 mobj = self._match_valid_url(url)
439 tld = mobj.group('tld')
440 artist_id = mobj.group('id')
441 data = self._call_artist(tld, url, artist_id)
442 entries = []
443 for album in data['albums']:
444 if not isinstance(album, dict):
445 continue
446 album_id = album.get('id')
447 if not album_id:
448 continue
449 entries.append(self.url_result(
450 'http://music.yandex.ru/album/%s' % album_id,
451 ie=YandexMusicAlbumIE.ie_key(), video_id=album_id))
452 artist = try_get(data, lambda x: x['artist']['name'], compat_str)
453 title = '%s - %s' % (artist or artist_id, 'Альбомы')
454 return self.playlist_result(entries, artist_id, title)