]>
Commit | Line | Data |
---|---|---|
e7c14660 | 1 | # coding: utf-8 |
4c603938 MA |
2 | from __future__ import unicode_literals |
3 | ||
4 | import re | |
5 | import hashlib | |
4c603938 MA |
6 | |
7 | from .common import InfoExtractor | |
6e6bc8da | 8 | from ..compat import compat_str |
47fe42e1 | 9 | from ..utils import ( |
ae7d31af | 10 | ExtractorError, |
47fe42e1 S |
11 | int_or_none, |
12 | float_or_none, | |
0250161c | 13 | try_get, |
47fe42e1 | 14 | ) |
4c603938 | 15 | |
4c603938 | 16 | |
ae7d31af S |
17 | class YandexMusicBaseIE(InfoExtractor): |
18 | @staticmethod | |
19 | def _handle_error(response): | |
eebe6b38 S |
20 | if isinstance(response, dict): |
21 | error = response.get('error') | |
22 | if error: | |
23 | raise ExtractorError(error, expected=True) | |
197a5da1 S |
24 | if response.get('type') == 'captcha' or 'captcha' in response: |
25 | YandexMusicBaseIE._raise_captcha() | |
26 | ||
27 | @staticmethod | |
28 | def _raise_captcha(): | |
29 | raise ExtractorError( | |
cefecac1 | 30 | 'YandexMusic has considered youtube-dlc requests automated and ' |
197a5da1 S |
31 | 'asks you to solve a CAPTCHA. You can either wait for some ' |
32 | 'time until unblocked and optionally use --sleep-interval ' | |
33 | 'in future or alternatively you can go to https://music.yandex.ru/ ' | |
34 | 'solve CAPTCHA, then export cookies and pass cookie file to ' | |
cefecac1 | 35 | 'youtube-dlc with --cookies', |
197a5da1 | 36 | expected=True) |
ae7d31af | 37 | |
e5eadfa8 S |
38 | def _download_webpage_handle(self, *args, **kwargs): |
39 | webpage = super(YandexMusicBaseIE, self)._download_webpage_handle(*args, **kwargs) | |
7f776fa4 | 40 | if 'Нам очень жаль, но запросы, поступившие с вашего IP-адреса, похожи на автоматические.' in webpage: |
197a5da1 | 41 | self._raise_captcha() |
7f776fa4 YCH |
42 | return webpage |
43 | ||
ae7d31af S |
44 | def _download_json(self, *args, **kwargs): |
45 | response = super(YandexMusicBaseIE, self)._download_json(*args, **kwargs) | |
46 | self._handle_error(response) | |
47 | return response | |
48 | ||
49 | ||
50 | class YandexMusicTrackIE(YandexMusicBaseIE): | |
e4df2f98 S |
51 | IE_NAME = 'yandexmusic:track' |
52 | IE_DESC = 'Яндекс.Музыка - Трек' | |
53 | _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<album_id>\d+)/track/(?P<id>\d+)' | |
54 | ||
c452790a | 55 | _TESTS = [{ |
e4df2f98 S |
56 | 'url': 'http://music.yandex.ru/album/540508/track/4878838', |
57 | 'md5': 'f496818aa2f60b6c0062980d2e00dc20', | |
58 | 'info_dict': { | |
59 | 'id': '4878838', | |
60 | 'ext': 'mp3', | |
c452790a | 61 | 'title': 'Carlo Ambrosio & Fabio Di Bari - Gypsy Eyes 1', |
e4df2f98 S |
62 | 'filesize': 4628061, |
63 | 'duration': 193.04, | |
e90d1754 S |
64 | 'track': 'Gypsy Eyes 1', |
65 | 'album': 'Gypsy Soul', | |
66 | 'album_artist': 'Carlo Ambrosio', | |
c452790a | 67 | 'artist': 'Carlo Ambrosio & Fabio Di Bari', |
796bf9de | 68 | 'release_year': 2009, |
7f776fa4 YCH |
69 | }, |
70 | 'skip': 'Travis CI servers blocked by YandexMusic', | |
c452790a | 71 | }, { |
72 | # multiple disks | |
73 | 'url': 'http://music.yandex.ru/album/3840501/track/705105', | |
74 | 'md5': 'ebe7b4e2ac7ac03fe11c19727ca6153e', | |
75 | 'info_dict': { | |
76 | 'id': '705105', | |
77 | 'ext': 'mp3', | |
78 | 'title': 'Hooverphonic - Sometimes', | |
79 | 'filesize': 5743386, | |
80 | 'duration': 239.27, | |
81 | 'track': 'Sometimes', | |
82 | 'album': 'The Best of Hooverphonic', | |
83 | 'album_artist': 'Hooverphonic', | |
84 | 'artist': 'Hooverphonic', | |
85 | 'release_year': 2016, | |
86 | 'genre': 'pop', | |
87 | 'disc_number': 2, | |
88 | 'track_number': 9, | |
89 | }, | |
90 | 'skip': 'Travis CI servers blocked by YandexMusic', | |
91 | }] | |
e4df2f98 | 92 | |
58ef5e78 RA |
93 | def _real_extract(self, url): |
94 | mobj = re.match(self._VALID_URL, url) | |
95 | album_id, track_id = mobj.group('album_id'), mobj.group('id') | |
4c603938 | 96 | |
58ef5e78 RA |
97 | track = self._download_json( |
98 | 'http://music.yandex.ru/handlers/track.jsx?track=%s:%s' % (track_id, album_id), | |
99 | track_id, 'Downloading track JSON')['track'] | |
100 | track_title = track['title'] | |
cf03e34a | 101 | |
58ef5e78 RA |
102 | download_data = self._download_json( |
103 | 'https://music.yandex.ru/api/v2.1/handlers/track/%s:%s/web-album_track-track-track-main/download/m' % (track_id, album_id), | |
104 | track_id, 'Downloading track location url JSON', | |
105 | headers={'X-Retpath-Y': url}) | |
4c603938 | 106 | |
58ef5e78 RA |
107 | fd_data = self._download_json( |
108 | download_data['src'], track_id, | |
109 | 'Downloading track location JSON', | |
110 | query={'format': 'json'}) | |
111 | key = hashlib.md5(('XGRlBW9FXlekgbPrRHuSiA' + fd_data['path'][1:] + fd_data['s']).encode('utf-8')).hexdigest() | |
112 | storage = track['storageDir'].split('.') | |
113 | f_url = 'http://%s/get-mp3/%s/%s?track-id=%s ' % (fd_data['host'], key, fd_data['ts'] + fd_data['path'], storage[1]) | |
4c603938 | 114 | |
ab953c64 S |
115 | thumbnail = None |
116 | cover_uri = track.get('albums', [{}])[0].get('coverUri') | |
117 | if cover_uri: | |
118 | thumbnail = cover_uri.replace('%%', 'orig') | |
119 | if not thumbnail.startswith('http'): | |
120 | thumbnail = 'http://' + thumbnail | |
e90d1754 | 121 | |
e90d1754 | 122 | track_info = { |
58ef5e78 | 123 | 'id': track_id, |
47fe42e1 | 124 | 'ext': 'mp3', |
58ef5e78 | 125 | 'url': f_url, |
47fe42e1 S |
126 | 'filesize': int_or_none(track.get('fileSize')), |
127 | 'duration': float_or_none(track.get('durationMs'), 1000), | |
ab953c64 | 128 | 'thumbnail': thumbnail, |
e90d1754 | 129 | 'track': track_title, |
58ef5e78 RA |
130 | 'acodec': download_data.get('codec'), |
131 | 'abr': int_or_none(download_data.get('bitrate')), | |
47fe42e1 | 132 | } |
4c603938 | 133 | |
c452790a | 134 | def extract_artist_name(artist): |
135 | decomposed = artist.get('decomposed') | |
136 | if not isinstance(decomposed, list): | |
137 | return artist['name'] | |
138 | parts = [artist['name']] | |
139 | for element in decomposed: | |
140 | if isinstance(element, dict) and element.get('name'): | |
141 | parts.append(element['name']) | |
142 | elif isinstance(element, compat_str): | |
143 | parts.append(element) | |
144 | return ''.join(parts) | |
145 | ||
e90d1754 S |
146 | def extract_artist(artist_list): |
147 | if artist_list and isinstance(artist_list, list): | |
c452790a | 148 | artists_names = [extract_artist_name(a) for a in artist_list if a.get('name')] |
e90d1754 S |
149 | if artists_names: |
150 | return ', '.join(artists_names) | |
151 | ||
152 | albums = track.get('albums') | |
153 | if albums and isinstance(albums, list): | |
154 | album = albums[0] | |
155 | if isinstance(album, dict): | |
156 | year = album.get('year') | |
c452790a | 157 | disc_number = int_or_none(try_get( |
158 | album, lambda x: x['trackPosition']['volume'])) | |
159 | track_number = int_or_none(try_get( | |
160 | album, lambda x: x['trackPosition']['index'])) | |
e90d1754 S |
161 | track_info.update({ |
162 | 'album': album.get('title'), | |
163 | 'album_artist': extract_artist(album.get('artists')), | |
796bf9de | 164 | 'release_year': int_or_none(year), |
c452790a | 165 | 'genre': album.get('genre'), |
166 | 'disc_number': disc_number, | |
167 | 'track_number': track_number, | |
e90d1754 S |
168 | }) |
169 | ||
170 | track_artist = extract_artist(track.get('artists')) | |
171 | if track_artist: | |
172 | track_info.update({ | |
173 | 'artist': track_artist, | |
174 | 'title': '%s - %s' % (track_artist, track_title), | |
175 | }) | |
176 | else: | |
177 | track_info['title'] = track_title | |
e90d1754 | 178 | |
58ef5e78 | 179 | return track_info |
4c603938 | 180 | |
4c603938 | 181 | |
ae7d31af | 182 | class YandexMusicPlaylistBaseIE(YandexMusicBaseIE): |
e7c14660 S |
183 | def _build_playlist(self, tracks): |
184 | return [ | |
185 | self.url_result( | |
186 | 'http://music.yandex.ru/album/%s/track/%s' % (track['albums'][0]['id'], track['id'])) | |
6d53cdd6 | 187 | for track in tracks if track.get('albums') and isinstance(track.get('albums'), list)] |
e7c14660 S |
188 | |
189 | ||
190 | class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE): | |
47fe42e1 S |
191 | IE_NAME = 'yandexmusic:album' |
192 | IE_DESC = 'Яндекс.Музыка - Альбом' | |
29171bc2 | 193 | _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<id>\d+)/?(\?|$)' |
4c603938 | 194 | |
c452790a | 195 | _TESTS = [{ |
47fe42e1 S |
196 | 'url': 'http://music.yandex.ru/album/540508', |
197 | 'info_dict': { | |
198 | 'id': '540508', | |
199 | 'title': 'Carlo Ambrosio - Gypsy Soul (2009)', | |
200 | }, | |
201 | 'playlist_count': 50, | |
7f776fa4 | 202 | 'skip': 'Travis CI servers blocked by YandexMusic', |
c452790a | 203 | }, { |
204 | 'url': 'https://music.yandex.ru/album/3840501', | |
205 | 'info_dict': { | |
206 | 'id': '3840501', | |
207 | 'title': 'Hooverphonic - The Best of Hooverphonic (2016)', | |
208 | }, | |
209 | 'playlist_count': 33, | |
210 | 'skip': 'Travis CI servers blocked by YandexMusic', | |
211 | }] | |
4c603938 MA |
212 | |
213 | def _real_extract(self, url): | |
47fe42e1 | 214 | album_id = self._match_id(url) |
4c603938 | 215 | |
47fe42e1 S |
216 | album = self._download_json( |
217 | 'http://music.yandex.ru/handlers/album.jsx?album=%s' % album_id, | |
218 | album_id, 'Downloading album JSON') | |
4c603938 | 219 | |
c452790a | 220 | entries = self._build_playlist([track for volume in album['volumes'] for track in volume]) |
47fe42e1 S |
221 | |
222 | title = '%s - %s' % (album['artists'][0]['name'], album['title']) | |
223 | year = album.get('year') | |
224 | if year: | |
225 | title += ' (%s)' % year | |
226 | ||
227 | return self.playlist_result(entries, compat_str(album['id']), title) | |
4c603938 | 228 | |
4c603938 | 229 | |
e7c14660 | 230 | class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE): |
47fe42e1 S |
231 | IE_NAME = 'yandexmusic:playlist' |
232 | IE_DESC = 'Яндекс.Музыка - Плейлист' | |
e960c3c2 | 233 | _VALID_URL = r'https?://music\.yandex\.(?P<tld>ru|kz|ua|by)/users/(?P<user>[^/]+)/playlists/(?P<id>\d+)' |
47fe42e1 | 234 | |
baf510bf | 235 | _TESTS = [{ |
47fe42e1 | 236 | 'url': 'http://music.yandex.ru/users/music.partners/playlists/1245', |
4c603938 | 237 | 'info_dict': { |
47fe42e1 S |
238 | 'id': '1245', |
239 | 'title': 'Что слушают Enter Shikari', | |
240 | 'description': 'md5:3b9f27b0efbe53f2ee1e844d07155cc9', | |
241 | }, | |
242 | 'playlist_count': 6, | |
7f776fa4 | 243 | 'skip': 'Travis CI servers blocked by YandexMusic', |
baf510bf S |
244 | }, { |
245 | # playlist exceeding the limit of 150 tracks shipped with webpage (see | |
067aa17e | 246 | # https://github.com/ytdl-org/youtube-dl/issues/6666) |
baf510bf S |
247 | 'url': 'https://music.yandex.ru/users/ya.playlist/playlists/1036', |
248 | 'info_dict': { | |
249 | 'id': '1036', | |
250 | 'title': 'Музыка 90-х', | |
251 | }, | |
9da526aa | 252 | 'playlist_mincount': 300, |
7f776fa4 | 253 | 'skip': 'Travis CI servers blocked by YandexMusic', |
baf510bf | 254 | }] |
4c603938 MA |
255 | |
256 | def _real_extract(self, url): | |
e960c3c2 S |
257 | mobj = re.match(self._VALID_URL, url) |
258 | tld = mobj.group('tld') | |
259 | user = mobj.group('user') | |
260 | playlist_id = mobj.group('id') | |
261 | ||
262 | playlist = self._download_json( | |
263 | 'https://music.yandex.%s/handlers/playlist.jsx' % tld, | |
264 | playlist_id, 'Downloading missing tracks JSON', | |
265 | fatal=False, | |
266 | headers={ | |
267 | 'Referer': url, | |
268 | 'X-Requested-With': 'XMLHttpRequest', | |
269 | 'X-Retpath-Y': url, | |
270 | }, | |
271 | query={ | |
272 | 'owner': user, | |
273 | 'kinds': playlist_id, | |
274 | 'light': 'true', | |
275 | 'lang': tld, | |
276 | 'external-domain': 'music.yandex.%s' % tld, | |
277 | 'overembed': 'false', | |
278 | })['playlist'] | |
baf510bf | 279 | |
a4c81e49 S |
280 | tracks = playlist['tracks'] |
281 | track_ids = [compat_str(track_id) for track_id in playlist['trackIds']] | |
baf510bf | 282 | |
e960c3c2 | 283 | # tracks dictionary shipped with playlist.jsx API is limited to 150 tracks, |
baf510bf S |
284 | # missing tracks should be retrieved manually. |
285 | if len(tracks) < len(track_ids): | |
2a48e6f0 S |
286 | present_track_ids = set([ |
287 | compat_str(track['id']) | |
288 | for track in tracks if track.get('id')]) | |
289 | missing_track_ids = [ | |
290 | track_id for track_id in track_ids | |
291 | if track_id not in present_track_ids] | |
15fc0658 S |
292 | missing_tracks = self._download_json( |
293 | 'https://music.yandex.%s/handlers/track-entries.jsx' % tld, | |
294 | playlist_id, 'Downloading missing tracks JSON', | |
295 | fatal=False, | |
296 | headers={ | |
297 | 'Referer': url, | |
298 | 'X-Requested-With': 'XMLHttpRequest', | |
299 | }, | |
300 | query={ | |
baf510bf | 301 | 'entries': ','.join(missing_track_ids), |
e960c3c2 S |
302 | 'lang': tld, |
303 | 'external-domain': 'music.yandex.%s' % tld, | |
baf510bf | 304 | 'overembed': 'false', |
baf510bf | 305 | 'strict': 'true', |
15fc0658 | 306 | }) |
baf510bf S |
307 | if missing_tracks: |
308 | tracks.extend(missing_tracks) | |
47fe42e1 | 309 | |
47fe42e1 | 310 | return self.playlist_result( |
baf510bf | 311 | self._build_playlist(tracks), |
e7c14660 | 312 | compat_str(playlist_id), |
203a3c0e | 313 | playlist.get('title'), playlist.get('description')) |