]>
Commit | Line | Data |
---|---|---|
e7c14660 | 1 | # coding: utf-8 |
4c603938 MA |
2 | from __future__ import unicode_literals |
3 | ||
4 | import re | |
5 | import hashlib | |
4c603938 MA |
6 | |
7 | from .common import InfoExtractor | |
6e6bc8da | 8 | from ..compat import compat_str |
47fe42e1 | 9 | from ..utils import ( |
ae7d31af | 10 | ExtractorError, |
47fe42e1 S |
11 | int_or_none, |
12 | float_or_none, | |
5c2266df | 13 | sanitized_Request, |
6e6bc8da | 14 | urlencode_postdata, |
47fe42e1 | 15 | ) |
4c603938 | 16 | |
4c603938 | 17 | |
ae7d31af S |
18 | class YandexMusicBaseIE(InfoExtractor): |
19 | @staticmethod | |
20 | def _handle_error(response): | |
eebe6b38 S |
21 | if isinstance(response, dict): |
22 | error = response.get('error') | |
23 | if error: | |
24 | raise ExtractorError(error, expected=True) | |
ae7d31af | 25 | |
7f776fa4 YCH |
26 | def _download_webpage(self, *args, **kwargs): |
27 | webpage = super(YandexMusicBaseIE, self)._download_webpage(*args, **kwargs) | |
28 | if 'Нам очень жаль, но запросы, поступившие с вашего IP-адреса, похожи на автоматические.' in webpage: | |
4b537629 | 29 | raise ExtractorError( |
497971cd S |
30 | 'YandexMusic has considered youtube-dl requests automated and ' |
31 | 'asks you to solve a CAPTCHA. You can either wait for some ' | |
32 | 'time until unblocked and optionally use --sleep-interval ' | |
33 | 'in future or alternatively you can go to https://music.yandex.ru/ ' | |
34 | 'solve CAPTCHA, then export cookies and pass cookie file to ' | |
35 | 'youtube-dl with --cookies', | |
4b537629 | 36 | expected=True) |
7f776fa4 YCH |
37 | return webpage |
38 | ||
ae7d31af S |
39 | def _download_json(self, *args, **kwargs): |
40 | response = super(YandexMusicBaseIE, self)._download_json(*args, **kwargs) | |
41 | self._handle_error(response) | |
42 | return response | |
43 | ||
44 | ||
45 | class YandexMusicTrackIE(YandexMusicBaseIE): | |
e4df2f98 S |
46 | IE_NAME = 'yandexmusic:track' |
47 | IE_DESC = 'Яндекс.Музыка - Трек' | |
48 | _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<album_id>\d+)/track/(?P<id>\d+)' | |
49 | ||
50 | _TEST = { | |
51 | 'url': 'http://music.yandex.ru/album/540508/track/4878838', | |
52 | 'md5': 'f496818aa2f60b6c0062980d2e00dc20', | |
53 | 'info_dict': { | |
54 | 'id': '4878838', | |
55 | 'ext': 'mp3', | |
e90d1754 | 56 | 'title': 'Carlo Ambrosio & Fabio Di Bari, Carlo Ambrosio - Gypsy Eyes 1', |
e4df2f98 S |
57 | 'filesize': 4628061, |
58 | 'duration': 193.04, | |
e90d1754 S |
59 | 'track': 'Gypsy Eyes 1', |
60 | 'album': 'Gypsy Soul', | |
61 | 'album_artist': 'Carlo Ambrosio', | |
62 | 'artist': 'Carlo Ambrosio & Fabio Di Bari, Carlo Ambrosio', | |
63 | 'release_year': '2009', | |
7f776fa4 YCH |
64 | }, |
65 | 'skip': 'Travis CI servers blocked by YandexMusic', | |
e4df2f98 S |
66 | } |
67 | ||
4c603938 | 68 | def _get_track_url(self, storage_dir, track_id): |
47fe42e1 S |
69 | data = self._download_json( |
70 | 'http://music.yandex.ru/api/v1.5/handlers/api-jsonp.jsx?action=getTrackSrc&p=download-info/%s' | |
71 | % storage_dir, | |
72 | track_id, 'Downloading track location JSON') | |
4c603938 | 73 | |
47fe42e1 | 74 | key = hashlib.md5(('XGRlBW9FXlekgbPrRHuSiA' + data['path'][1:] + data['s']).encode('utf-8')).hexdigest() |
4c603938 MA |
75 | storage = storage_dir.split('.') |
76 | ||
47fe42e1 S |
77 | return ('http://%s/get-mp3/%s/%s?track-id=%s&from=service-10-track&similarities-experiment=default' |
78 | % (data['host'], key, data['ts'] + data['path'], storage[1])) | |
4c603938 | 79 | |
47fe42e1 | 80 | def _get_track_info(self, track): |
ab953c64 S |
81 | thumbnail = None |
82 | cover_uri = track.get('albums', [{}])[0].get('coverUri') | |
83 | if cover_uri: | |
84 | thumbnail = cover_uri.replace('%%', 'orig') | |
85 | if not thumbnail.startswith('http'): | |
86 | thumbnail = 'http://' + thumbnail | |
e90d1754 S |
87 | |
88 | track_title = track['title'] | |
89 | track_info = { | |
47fe42e1 S |
90 | 'id': track['id'], |
91 | 'ext': 'mp3', | |
92 | 'url': self._get_track_url(track['storageDir'], track['id']), | |
47fe42e1 S |
93 | 'filesize': int_or_none(track.get('fileSize')), |
94 | 'duration': float_or_none(track.get('durationMs'), 1000), | |
ab953c64 | 95 | 'thumbnail': thumbnail, |
e90d1754 | 96 | 'track': track_title, |
47fe42e1 | 97 | } |
4c603938 | 98 | |
e90d1754 S |
99 | def extract_artist(artist_list): |
100 | if artist_list and isinstance(artist_list, list): | |
101 | artists_names = [a['name'] for a in artist_list if a.get('name')] | |
102 | if artists_names: | |
103 | return ', '.join(artists_names) | |
104 | ||
105 | albums = track.get('albums') | |
106 | if albums and isinstance(albums, list): | |
107 | album = albums[0] | |
108 | if isinstance(album, dict): | |
109 | year = album.get('year') | |
110 | track_info.update({ | |
111 | 'album': album.get('title'), | |
112 | 'album_artist': extract_artist(album.get('artists')), | |
113 | 'release_year': compat_str(year) if year else None, | |
114 | }) | |
115 | ||
116 | track_artist = extract_artist(track.get('artists')) | |
117 | if track_artist: | |
118 | track_info.update({ | |
119 | 'artist': track_artist, | |
120 | 'title': '%s - %s' % (track_artist, track_title), | |
121 | }) | |
122 | else: | |
123 | track_info['title'] = track_title | |
124 | return track_info | |
125 | ||
4c603938 | 126 | def _real_extract(self, url): |
47fe42e1 S |
127 | mobj = re.match(self._VALID_URL, url) |
128 | album_id, track_id = mobj.group('album_id'), mobj.group('id') | |
4c603938 | 129 | |
47fe42e1 S |
130 | track = self._download_json( |
131 | 'http://music.yandex.ru/handlers/track.jsx?track=%s:%s' % (track_id, album_id), | |
132 | track_id, 'Downloading track JSON')['track'] | |
4c603938 | 133 | |
47fe42e1 | 134 | return self._get_track_info(track) |
4c603938 | 135 | |
4c603938 | 136 | |
ae7d31af | 137 | class YandexMusicPlaylistBaseIE(YandexMusicBaseIE): |
e7c14660 S |
138 | def _build_playlist(self, tracks): |
139 | return [ | |
140 | self.url_result( | |
141 | 'http://music.yandex.ru/album/%s/track/%s' % (track['albums'][0]['id'], track['id'])) | |
6d53cdd6 | 142 | for track in tracks if track.get('albums') and isinstance(track.get('albums'), list)] |
e7c14660 S |
143 | |
144 | ||
145 | class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE): | |
47fe42e1 S |
146 | IE_NAME = 'yandexmusic:album' |
147 | IE_DESC = 'Яндекс.Музыка - Альбом' | |
29171bc2 | 148 | _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<id>\d+)/?(\?|$)' |
4c603938 | 149 | |
47fe42e1 S |
150 | _TEST = { |
151 | 'url': 'http://music.yandex.ru/album/540508', | |
152 | 'info_dict': { | |
153 | 'id': '540508', | |
154 | 'title': 'Carlo Ambrosio - Gypsy Soul (2009)', | |
155 | }, | |
156 | 'playlist_count': 50, | |
7f776fa4 | 157 | 'skip': 'Travis CI servers blocked by YandexMusic', |
47fe42e1 | 158 | } |
4c603938 MA |
159 | |
160 | def _real_extract(self, url): | |
47fe42e1 | 161 | album_id = self._match_id(url) |
4c603938 | 162 | |
47fe42e1 S |
163 | album = self._download_json( |
164 | 'http://music.yandex.ru/handlers/album.jsx?album=%s' % album_id, | |
165 | album_id, 'Downloading album JSON') | |
4c603938 | 166 | |
e7c14660 | 167 | entries = self._build_playlist(album['volumes'][0]) |
47fe42e1 S |
168 | |
169 | title = '%s - %s' % (album['artists'][0]['name'], album['title']) | |
170 | year = album.get('year') | |
171 | if year: | |
172 | title += ' (%s)' % year | |
173 | ||
174 | return self.playlist_result(entries, compat_str(album['id']), title) | |
4c603938 | 175 | |
4c603938 | 176 | |
e7c14660 | 177 | class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE): |
47fe42e1 S |
178 | IE_NAME = 'yandexmusic:playlist' |
179 | IE_DESC = 'Яндекс.Музыка - Плейлист' | |
29171bc2 | 180 | _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/users/[^/]+/playlists/(?P<id>\d+)' |
47fe42e1 | 181 | |
baf510bf | 182 | _TESTS = [{ |
47fe42e1 | 183 | 'url': 'http://music.yandex.ru/users/music.partners/playlists/1245', |
4c603938 | 184 | 'info_dict': { |
47fe42e1 S |
185 | 'id': '1245', |
186 | 'title': 'Что слушают Enter Shikari', | |
187 | 'description': 'md5:3b9f27b0efbe53f2ee1e844d07155cc9', | |
188 | }, | |
189 | 'playlist_count': 6, | |
7f776fa4 | 190 | 'skip': 'Travis CI servers blocked by YandexMusic', |
baf510bf S |
191 | }, { |
192 | # playlist exceeding the limit of 150 tracks shipped with webpage (see | |
193 | # https://github.com/rg3/youtube-dl/issues/6666) | |
194 | 'url': 'https://music.yandex.ru/users/ya.playlist/playlists/1036', | |
195 | 'info_dict': { | |
196 | 'id': '1036', | |
197 | 'title': 'Музыка 90-х', | |
198 | }, | |
199 | 'playlist_count': 310, | |
7f776fa4 | 200 | 'skip': 'Travis CI servers blocked by YandexMusic', |
baf510bf | 201 | }] |
4c603938 MA |
202 | |
203 | def _real_extract(self, url): | |
47fe42e1 | 204 | playlist_id = self._match_id(url) |
4c603938 | 205 | |
47fe42e1 | 206 | webpage = self._download_webpage(url, playlist_id) |
4c603938 | 207 | |
baf510bf | 208 | mu = self._parse_json( |
47fe42e1 S |
209 | self._search_regex( |
210 | r'var\s+Mu\s*=\s*({.+?});\s*</script>', webpage, 'player'), | |
baf510bf S |
211 | playlist_id) |
212 | ||
213 | playlist = mu['pageData']['playlist'] | |
214 | tracks, track_ids = playlist['tracks'], playlist['trackIds'] | |
215 | ||
216 | # tracks dictionary shipped with webpage is limited to 150 tracks, | |
217 | # missing tracks should be retrieved manually. | |
218 | if len(tracks) < len(track_ids): | |
219 | present_track_ids = set([compat_str(track['id']) for track in tracks if track.get('id')]) | |
220 | missing_track_ids = set(map(compat_str, track_ids)) - set(present_track_ids) | |
5c2266df | 221 | request = sanitized_Request( |
baf510bf | 222 | 'https://music.yandex.ru/handlers/track-entries.jsx', |
6e6bc8da | 223 | urlencode_postdata({ |
baf510bf S |
224 | 'entries': ','.join(missing_track_ids), |
225 | 'lang': mu.get('settings', {}).get('lang', 'en'), | |
226 | 'external-domain': 'music.yandex.ru', | |
227 | 'overembed': 'false', | |
228 | 'sign': mu.get('authData', {}).get('user', {}).get('sign'), | |
229 | 'strict': 'true', | |
6e6bc8da | 230 | })) |
baf510bf S |
231 | request.add_header('Referer', url) |
232 | request.add_header('X-Requested-With', 'XMLHttpRequest') | |
233 | ||
234 | missing_tracks = self._download_json( | |
235 | request, playlist_id, 'Downloading missing tracks JSON', fatal=False) | |
236 | if missing_tracks: | |
237 | tracks.extend(missing_tracks) | |
47fe42e1 | 238 | |
47fe42e1 | 239 | return self.playlist_result( |
baf510bf | 240 | self._build_playlist(tracks), |
e7c14660 | 241 | compat_str(playlist_id), |
47fe42e1 | 242 | playlist['title'], playlist.get('description')) |