]>
Commit | Line | Data |
---|---|---|
e7c14660 | 1 | # coding: utf-8 |
4c603938 MA |
2 | from __future__ import unicode_literals |
3 | ||
4c603938 | 4 | import hashlib |
bc2ca1bb | 5 | import itertools |
6 | import re | |
4c603938 MA |
7 | |
8 | from .common import InfoExtractor | |
6e6bc8da | 9 | from ..compat import compat_str |
47fe42e1 | 10 | from ..utils import ( |
ae7d31af | 11 | ExtractorError, |
47fe42e1 S |
12 | int_or_none, |
13 | float_or_none, | |
0250161c | 14 | try_get, |
47fe42e1 | 15 | ) |
4c603938 | 16 | |
4c603938 | 17 | |
ae7d31af | 18 | class YandexMusicBaseIE(InfoExtractor): |
29f7c58a | 19 | _VALID_URL_BASE = r'https?://music\.yandex\.(?P<tld>ru|kz|ua|by|com)' |
20 | ||
ae7d31af S |
21 | @staticmethod |
22 | def _handle_error(response): | |
eebe6b38 S |
23 | if isinstance(response, dict): |
24 | error = response.get('error') | |
25 | if error: | |
26 | raise ExtractorError(error, expected=True) | |
197a5da1 S |
27 | if response.get('type') == 'captcha' or 'captcha' in response: |
28 | YandexMusicBaseIE._raise_captcha() | |
29 | ||
30 | @staticmethod | |
31 | def _raise_captcha(): | |
32 | raise ExtractorError( | |
7a5c1cfe | 33 | 'YandexMusic has considered yt-dlp requests automated and ' |
197a5da1 S |
34 | 'asks you to solve a CAPTCHA. You can either wait for some ' |
35 | 'time until unblocked and optionally use --sleep-interval ' | |
36 | 'in future or alternatively you can go to https://music.yandex.ru/ ' | |
37 | 'solve CAPTCHA, then export cookies and pass cookie file to ' | |
7a5c1cfe | 38 | 'yt-dlp with --cookies', |
197a5da1 | 39 | expected=True) |
ae7d31af | 40 | |
e5eadfa8 S |
41 | def _download_webpage_handle(self, *args, **kwargs): |
42 | webpage = super(YandexMusicBaseIE, self)._download_webpage_handle(*args, **kwargs) | |
7f776fa4 | 43 | if 'Нам очень жаль, но запросы, поступившие с вашего IP-адреса, похожи на автоматические.' in webpage: |
197a5da1 | 44 | self._raise_captcha() |
7f776fa4 YCH |
45 | return webpage |
46 | ||
ae7d31af S |
47 | def _download_json(self, *args, **kwargs): |
48 | response = super(YandexMusicBaseIE, self)._download_json(*args, **kwargs) | |
49 | self._handle_error(response) | |
50 | return response | |
51 | ||
29f7c58a | 52 | def _call_api(self, ep, tld, url, item_id, note, query): |
53 | return self._download_json( | |
54 | 'https://music.yandex.%s/handlers/%s.jsx' % (tld, ep), | |
55 | item_id, note, | |
56 | fatal=False, | |
57 | headers={ | |
58 | 'Referer': url, | |
59 | 'X-Requested-With': 'XMLHttpRequest', | |
60 | 'X-Retpath-Y': url, | |
61 | }, | |
62 | query=query) | |
63 | ||
ae7d31af S |
64 | |
65 | class YandexMusicTrackIE(YandexMusicBaseIE): | |
e4df2f98 S |
66 | IE_NAME = 'yandexmusic:track' |
67 | IE_DESC = 'Яндекс.Музыка - Трек' | |
29f7c58a | 68 | _VALID_URL = r'%s/album/(?P<album_id>\d+)/track/(?P<id>\d+)' % YandexMusicBaseIE._VALID_URL_BASE |
e4df2f98 | 69 | |
c452790a | 70 | _TESTS = [{ |
e4df2f98 | 71 | 'url': 'http://music.yandex.ru/album/540508/track/4878838', |
29f7c58a | 72 | 'md5': 'dec8b661f12027ceaba33318787fff76', |
e4df2f98 S |
73 | 'info_dict': { |
74 | 'id': '4878838', | |
75 | 'ext': 'mp3', | |
29f7c58a | 76 | 'title': 'md5:c63e19341fdbe84e43425a30bc777856', |
77 | 'filesize': int, | |
e4df2f98 | 78 | 'duration': 193.04, |
29f7c58a | 79 | 'track': 'md5:210508c6ffdfd67a493a6c378f22c3ff', |
80 | 'album': 'md5:cd04fb13c4efeafdfa0a6a6aca36d01a', | |
81 | 'album_artist': 'md5:5f54c35462c07952df33d97cfb5fc200', | |
82 | 'artist': 'md5:e6fd86621825f14dc0b25db3acd68160', | |
796bf9de | 83 | 'release_year': 2009, |
7f776fa4 | 84 | }, |
29f7c58a | 85 | # 'skip': 'Travis CI servers blocked by YandexMusic', |
c452790a | 86 | }, { |
87 | # multiple disks | |
88 | 'url': 'http://music.yandex.ru/album/3840501/track/705105', | |
29f7c58a | 89 | 'md5': '82a54e9e787301dd45aba093cf6e58c0', |
c452790a | 90 | 'info_dict': { |
91 | 'id': '705105', | |
92 | 'ext': 'mp3', | |
29f7c58a | 93 | 'title': 'md5:f86d4a9188279860a83000277024c1a6', |
94 | 'filesize': int, | |
c452790a | 95 | 'duration': 239.27, |
29f7c58a | 96 | 'track': 'md5:40f887f0666ba1aa10b835aca44807d1', |
97 | 'album': 'md5:624f5224b14f5c88a8e812fd7fbf1873', | |
98 | 'album_artist': 'md5:dd35f2af4e8927100cbe6f5e62e1fb12', | |
99 | 'artist': 'md5:dd35f2af4e8927100cbe6f5e62e1fb12', | |
c452790a | 100 | 'release_year': 2016, |
101 | 'genre': 'pop', | |
102 | 'disc_number': 2, | |
103 | 'track_number': 9, | |
104 | }, | |
29f7c58a | 105 | # 'skip': 'Travis CI servers blocked by YandexMusic', |
106 | }, { | |
107 | 'url': 'http://music.yandex.com/album/540508/track/4878838', | |
108 | 'only_matching': True, | |
c452790a | 109 | }] |
e4df2f98 | 110 | |
58ef5e78 RA |
111 | def _real_extract(self, url): |
112 | mobj = re.match(self._VALID_URL, url) | |
29f7c58a | 113 | tld, album_id, track_id = mobj.group('tld'), mobj.group('album_id'), mobj.group('id') |
4c603938 | 114 | |
29f7c58a | 115 | track = self._call_api( |
116 | 'track', tld, url, track_id, 'Downloading track JSON', | |
117 | {'track': '%s:%s' % (track_id, album_id)})['track'] | |
58ef5e78 | 118 | track_title = track['title'] |
cf03e34a | 119 | |
58ef5e78 RA |
120 | download_data = self._download_json( |
121 | 'https://music.yandex.ru/api/v2.1/handlers/track/%s:%s/web-album_track-track-track-main/download/m' % (track_id, album_id), | |
122 | track_id, 'Downloading track location url JSON', | |
123 | headers={'X-Retpath-Y': url}) | |
4c603938 | 124 | |
58ef5e78 RA |
125 | fd_data = self._download_json( |
126 | download_data['src'], track_id, | |
127 | 'Downloading track location JSON', | |
128 | query={'format': 'json'}) | |
129 | key = hashlib.md5(('XGRlBW9FXlekgbPrRHuSiA' + fd_data['path'][1:] + fd_data['s']).encode('utf-8')).hexdigest() | |
29f7c58a | 130 | f_url = 'http://%s/get-mp3/%s/%s?track-id=%s ' % (fd_data['host'], key, fd_data['ts'] + fd_data['path'], track['id']) |
4c603938 | 131 | |
ab953c64 S |
132 | thumbnail = None |
133 | cover_uri = track.get('albums', [{}])[0].get('coverUri') | |
134 | if cover_uri: | |
135 | thumbnail = cover_uri.replace('%%', 'orig') | |
136 | if not thumbnail.startswith('http'): | |
137 | thumbnail = 'http://' + thumbnail | |
e90d1754 | 138 | |
e90d1754 | 139 | track_info = { |
58ef5e78 | 140 | 'id': track_id, |
47fe42e1 | 141 | 'ext': 'mp3', |
58ef5e78 | 142 | 'url': f_url, |
47fe42e1 S |
143 | 'filesize': int_or_none(track.get('fileSize')), |
144 | 'duration': float_or_none(track.get('durationMs'), 1000), | |
ab953c64 | 145 | 'thumbnail': thumbnail, |
e90d1754 | 146 | 'track': track_title, |
58ef5e78 RA |
147 | 'acodec': download_data.get('codec'), |
148 | 'abr': int_or_none(download_data.get('bitrate')), | |
47fe42e1 | 149 | } |
4c603938 | 150 | |
c452790a | 151 | def extract_artist_name(artist): |
152 | decomposed = artist.get('decomposed') | |
153 | if not isinstance(decomposed, list): | |
154 | return artist['name'] | |
155 | parts = [artist['name']] | |
156 | for element in decomposed: | |
157 | if isinstance(element, dict) and element.get('name'): | |
158 | parts.append(element['name']) | |
159 | elif isinstance(element, compat_str): | |
160 | parts.append(element) | |
161 | return ''.join(parts) | |
162 | ||
e90d1754 S |
163 | def extract_artist(artist_list): |
164 | if artist_list and isinstance(artist_list, list): | |
c452790a | 165 | artists_names = [extract_artist_name(a) for a in artist_list if a.get('name')] |
e90d1754 S |
166 | if artists_names: |
167 | return ', '.join(artists_names) | |
168 | ||
169 | albums = track.get('albums') | |
170 | if albums and isinstance(albums, list): | |
171 | album = albums[0] | |
172 | if isinstance(album, dict): | |
173 | year = album.get('year') | |
c452790a | 174 | disc_number = int_or_none(try_get( |
175 | album, lambda x: x['trackPosition']['volume'])) | |
176 | track_number = int_or_none(try_get( | |
177 | album, lambda x: x['trackPosition']['index'])) | |
e90d1754 S |
178 | track_info.update({ |
179 | 'album': album.get('title'), | |
180 | 'album_artist': extract_artist(album.get('artists')), | |
796bf9de | 181 | 'release_year': int_or_none(year), |
c452790a | 182 | 'genre': album.get('genre'), |
183 | 'disc_number': disc_number, | |
184 | 'track_number': track_number, | |
e90d1754 S |
185 | }) |
186 | ||
187 | track_artist = extract_artist(track.get('artists')) | |
188 | if track_artist: | |
189 | track_info.update({ | |
190 | 'artist': track_artist, | |
191 | 'title': '%s - %s' % (track_artist, track_title), | |
192 | }) | |
193 | else: | |
194 | track_info['title'] = track_title | |
e90d1754 | 195 | |
58ef5e78 | 196 | return track_info |
4c603938 | 197 | |
4c603938 | 198 | |
ae7d31af | 199 | class YandexMusicPlaylistBaseIE(YandexMusicBaseIE): |
29f7c58a | 200 | def _extract_tracks(self, source, item_id, url, tld): |
201 | tracks = source['tracks'] | |
202 | track_ids = [compat_str(track_id) for track_id in source['trackIds']] | |
203 | ||
204 | # tracks dictionary shipped with playlist.jsx API is limited to 150 tracks, | |
205 | # missing tracks should be retrieved manually. | |
206 | if len(tracks) < len(track_ids): | |
207 | present_track_ids = set([ | |
208 | compat_str(track['id']) | |
209 | for track in tracks if track.get('id')]) | |
210 | missing_track_ids = [ | |
211 | track_id for track_id in track_ids | |
212 | if track_id not in present_track_ids] | |
bc2ca1bb | 213 | # Request missing tracks in chunks to avoid exceeding max HTTP header size, |
214 | # see https://github.com/ytdl-org/youtube-dl/issues/27355 | |
215 | _TRACKS_PER_CHUNK = 250 | |
216 | for chunk_num in itertools.count(0): | |
217 | start = chunk_num * _TRACKS_PER_CHUNK | |
218 | end = start + _TRACKS_PER_CHUNK | |
219 | missing_track_ids_req = missing_track_ids[start:end] | |
220 | assert missing_track_ids_req | |
221 | missing_tracks = self._call_api( | |
222 | 'track-entries', tld, url, item_id, | |
223 | 'Downloading missing tracks JSON chunk %d' % (chunk_num + 1), { | |
224 | 'entries': ','.join(missing_track_ids_req), | |
225 | 'lang': tld, | |
226 | 'external-domain': 'music.yandex.%s' % tld, | |
227 | 'overembed': 'false', | |
228 | 'strict': 'true', | |
229 | }) | |
230 | if missing_tracks: | |
231 | tracks.extend(missing_tracks) | |
232 | if end >= len(missing_track_ids): | |
233 | break | |
29f7c58a | 234 | |
235 | return tracks | |
236 | ||
e7c14660 | 237 | def _build_playlist(self, tracks): |
29f7c58a | 238 | entries = [] |
239 | for track in tracks: | |
240 | track_id = track.get('id') or track.get('realId') | |
241 | if not track_id: | |
242 | continue | |
243 | albums = track.get('albums') | |
244 | if not albums or not isinstance(albums, list): | |
245 | continue | |
246 | album = albums[0] | |
247 | if not isinstance(album, dict): | |
248 | continue | |
249 | album_id = album.get('id') | |
250 | if not album_id: | |
251 | continue | |
252 | entries.append(self.url_result( | |
253 | 'http://music.yandex.ru/album/%s/track/%s' % (album_id, track_id), | |
254 | ie=YandexMusicTrackIE.ie_key(), video_id=track_id)) | |
255 | return entries | |
e7c14660 S |
256 | |
257 | ||
258 | class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE): | |
47fe42e1 S |
259 | IE_NAME = 'yandexmusic:album' |
260 | IE_DESC = 'Яндекс.Музыка - Альбом' | |
29f7c58a | 261 | _VALID_URL = r'%s/album/(?P<id>\d+)' % YandexMusicBaseIE._VALID_URL_BASE |
4c603938 | 262 | |
c452790a | 263 | _TESTS = [{ |
47fe42e1 S |
264 | 'url': 'http://music.yandex.ru/album/540508', |
265 | 'info_dict': { | |
266 | 'id': '540508', | |
29f7c58a | 267 | 'title': 'md5:7ed1c3567f28d14be9f61179116f5571', |
47fe42e1 S |
268 | }, |
269 | 'playlist_count': 50, | |
29f7c58a | 270 | # 'skip': 'Travis CI servers blocked by YandexMusic', |
c452790a | 271 | }, { |
272 | 'url': 'https://music.yandex.ru/album/3840501', | |
273 | 'info_dict': { | |
274 | 'id': '3840501', | |
29f7c58a | 275 | 'title': 'md5:36733472cdaa7dcb1fd9473f7da8e50f', |
c452790a | 276 | }, |
277 | 'playlist_count': 33, | |
29f7c58a | 278 | # 'skip': 'Travis CI servers blocked by YandexMusic', |
279 | }, { | |
280 | # empty artists | |
281 | 'url': 'https://music.yandex.ru/album/9091882', | |
282 | 'info_dict': { | |
283 | 'id': '9091882', | |
284 | 'title': 'ТЕД на русском', | |
285 | }, | |
286 | 'playlist_count': 187, | |
c452790a | 287 | }] |
4c603938 | 288 | |
29f7c58a | 289 | @classmethod |
290 | def suitable(cls, url): | |
291 | return False if YandexMusicTrackIE.suitable(url) else super(YandexMusicAlbumIE, cls).suitable(url) | |
292 | ||
4c603938 | 293 | def _real_extract(self, url): |
29f7c58a | 294 | mobj = re.match(self._VALID_URL, url) |
295 | tld = mobj.group('tld') | |
296 | album_id = mobj.group('id') | |
4c603938 | 297 | |
29f7c58a | 298 | album = self._call_api( |
299 | 'album', tld, url, album_id, 'Downloading album JSON', | |
300 | {'album': album_id}) | |
4c603938 | 301 | |
c452790a | 302 | entries = self._build_playlist([track for volume in album['volumes'] for track in volume]) |
47fe42e1 | 303 | |
29f7c58a | 304 | title = album['title'] |
305 | artist = try_get(album, lambda x: x['artists'][0]['name'], compat_str) | |
306 | if artist: | |
307 | title = '%s - %s' % (artist, title) | |
47fe42e1 S |
308 | year = album.get('year') |
309 | if year: | |
310 | title += ' (%s)' % year | |
311 | ||
312 | return self.playlist_result(entries, compat_str(album['id']), title) | |
4c603938 | 313 | |
4c603938 | 314 | |
e7c14660 | 315 | class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE): |
47fe42e1 S |
316 | IE_NAME = 'yandexmusic:playlist' |
317 | IE_DESC = 'Яндекс.Музыка - Плейлист' | |
29f7c58a | 318 | _VALID_URL = r'%s/users/(?P<user>[^/]+)/playlists/(?P<id>\d+)' % YandexMusicBaseIE._VALID_URL_BASE |
47fe42e1 | 319 | |
baf510bf | 320 | _TESTS = [{ |
47fe42e1 | 321 | 'url': 'http://music.yandex.ru/users/music.partners/playlists/1245', |
4c603938 | 322 | 'info_dict': { |
47fe42e1 | 323 | 'id': '1245', |
29f7c58a | 324 | 'title': 'md5:841559b3fe2b998eca88d0d2e22a3097', |
47fe42e1 S |
325 | 'description': 'md5:3b9f27b0efbe53f2ee1e844d07155cc9', |
326 | }, | |
29f7c58a | 327 | 'playlist_count': 5, |
328 | # 'skip': 'Travis CI servers blocked by YandexMusic', | |
baf510bf | 329 | }, { |
baf510bf | 330 | 'url': 'https://music.yandex.ru/users/ya.playlist/playlists/1036', |
29f7c58a | 331 | 'only_matching': True, |
332 | }, { | |
333 | # playlist exceeding the limit of 150 tracks (see | |
334 | # https://github.com/ytdl-org/youtube-dl/issues/6666) | |
335 | 'url': 'https://music.yandex.ru/users/mesiaz/playlists/1364', | |
baf510bf | 336 | 'info_dict': { |
29f7c58a | 337 | 'id': '1364', |
338 | 'title': 'md5:b3b400f997d3f878a13ae0699653f7db', | |
baf510bf | 339 | }, |
29f7c58a | 340 | 'playlist_mincount': 437, |
341 | # 'skip': 'Travis CI servers blocked by YandexMusic', | |
baf510bf | 342 | }] |
4c603938 MA |
343 | |
344 | def _real_extract(self, url): | |
e960c3c2 S |
345 | mobj = re.match(self._VALID_URL, url) |
346 | tld = mobj.group('tld') | |
347 | user = mobj.group('user') | |
348 | playlist_id = mobj.group('id') | |
349 | ||
29f7c58a | 350 | playlist = self._call_api( |
351 | 'playlist', tld, url, playlist_id, 'Downloading playlist JSON', { | |
e960c3c2 S |
352 | 'owner': user, |
353 | 'kinds': playlist_id, | |
354 | 'light': 'true', | |
355 | 'lang': tld, | |
356 | 'external-domain': 'music.yandex.%s' % tld, | |
357 | 'overembed': 'false', | |
358 | })['playlist'] | |
baf510bf | 359 | |
29f7c58a | 360 | tracks = self._extract_tracks(playlist, playlist_id, url, tld) |
47fe42e1 | 361 | |
47fe42e1 | 362 | return self.playlist_result( |
baf510bf | 363 | self._build_playlist(tracks), |
e7c14660 | 364 | compat_str(playlist_id), |
203a3c0e | 365 | playlist.get('title'), playlist.get('description')) |
29f7c58a | 366 | |
367 | ||
368 | class YandexMusicArtistBaseIE(YandexMusicPlaylistBaseIE): | |
369 | def _call_artist(self, tld, url, artist_id): | |
370 | return self._call_api( | |
371 | 'artist', tld, url, artist_id, | |
372 | 'Downloading artist %s JSON' % self._ARTIST_WHAT, { | |
373 | 'artist': artist_id, | |
374 | 'what': self._ARTIST_WHAT, | |
375 | 'sort': self._ARTIST_SORT or '', | |
376 | 'dir': '', | |
377 | 'period': '', | |
378 | 'lang': tld, | |
379 | 'external-domain': 'music.yandex.%s' % tld, | |
380 | 'overembed': 'false', | |
381 | }) | |
382 | ||
383 | def _real_extract(self, url): | |
384 | mobj = re.match(self._VALID_URL, url) | |
385 | tld = mobj.group('tld') | |
386 | artist_id = mobj.group('id') | |
387 | data = self._call_artist(tld, url, artist_id) | |
388 | tracks = self._extract_tracks(data, artist_id, url, tld) | |
389 | title = try_get(data, lambda x: x['artist']['name'], compat_str) | |
390 | return self.playlist_result( | |
391 | self._build_playlist(tracks), artist_id, title) | |
392 | ||
393 | ||
394 | class YandexMusicArtistTracksIE(YandexMusicArtistBaseIE): | |
395 | IE_NAME = 'yandexmusic:artist:tracks' | |
396 | IE_DESC = 'Яндекс.Музыка - Артист - Треки' | |
397 | _VALID_URL = r'%s/artist/(?P<id>\d+)/tracks' % YandexMusicBaseIE._VALID_URL_BASE | |
398 | ||
399 | _TESTS = [{ | |
400 | 'url': 'https://music.yandex.ru/artist/617526/tracks', | |
401 | 'info_dict': { | |
402 | 'id': '617526', | |
403 | 'title': 'md5:131aef29d45fd5a965ca613e708c040b', | |
404 | }, | |
405 | 'playlist_count': 507, | |
406 | # 'skip': 'Travis CI servers blocked by YandexMusic', | |
407 | }] | |
408 | ||
409 | _ARTIST_SORT = '' | |
410 | _ARTIST_WHAT = 'tracks' | |
411 | ||
412 | def _real_extract(self, url): | |
413 | mobj = re.match(self._VALID_URL, url) | |
414 | tld = mobj.group('tld') | |
415 | artist_id = mobj.group('id') | |
416 | data = self._call_artist(tld, url, artist_id) | |
417 | tracks = self._extract_tracks(data, artist_id, url, tld) | |
418 | artist = try_get(data, lambda x: x['artist']['name'], compat_str) | |
419 | title = '%s - %s' % (artist or artist_id, 'Треки') | |
420 | return self.playlist_result( | |
421 | self._build_playlist(tracks), artist_id, title) | |
422 | ||
423 | ||
424 | class YandexMusicArtistAlbumsIE(YandexMusicArtistBaseIE): | |
425 | IE_NAME = 'yandexmusic:artist:albums' | |
426 | IE_DESC = 'Яндекс.Музыка - Артист - Альбомы' | |
427 | _VALID_URL = r'%s/artist/(?P<id>\d+)/albums' % YandexMusicBaseIE._VALID_URL_BASE | |
428 | ||
429 | _TESTS = [{ | |
430 | 'url': 'https://music.yandex.ru/artist/617526/albums', | |
431 | 'info_dict': { | |
432 | 'id': '617526', | |
433 | 'title': 'md5:55dc58d5c85699b7fb41ee926700236c', | |
434 | }, | |
435 | 'playlist_count': 8, | |
436 | # 'skip': 'Travis CI servers blocked by YandexMusic', | |
437 | }] | |
438 | ||
439 | _ARTIST_SORT = 'year' | |
440 | _ARTIST_WHAT = 'albums' | |
441 | ||
442 | def _real_extract(self, url): | |
443 | mobj = re.match(self._VALID_URL, url) | |
444 | tld = mobj.group('tld') | |
445 | artist_id = mobj.group('id') | |
446 | data = self._call_artist(tld, url, artist_id) | |
447 | entries = [] | |
448 | for album in data['albums']: | |
449 | if not isinstance(album, dict): | |
450 | continue | |
451 | album_id = album.get('id') | |
452 | if not album_id: | |
453 | continue | |
454 | entries.append(self.url_result( | |
455 | 'http://music.yandex.ru/album/%s' % album_id, | |
456 | ie=YandexMusicAlbumIE.ie_key(), video_id=album_id)) | |
457 | artist = try_get(data, lambda x: x['artist']['name'], compat_str) | |
458 | title = '%s - %s' % (artist or artist_id, 'Альбомы') | |
459 | return self.playlist_result(entries, artist_id, title) |