]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/yandexmusic.py
[viewster] Improve extraction
[yt-dlp.git] / youtube_dl / extractor / yandexmusic.py
CommitLineData
4c603938
MA
1# coding=utf-8
2from __future__ import unicode_literals
3
4import re
5import hashlib
4c603938
MA
6
7from .common import InfoExtractor
47fe42e1
S
8from ..compat import compat_str
9from ..utils import (
10 int_or_none,
11 float_or_none,
12)
4c603938 13
4c603938 14
47fe42e1 15class YandexMusicBaseIE(InfoExtractor):
4c603938 16 def _get_track_url(self, storage_dir, track_id):
47fe42e1
S
17 data = self._download_json(
18 'http://music.yandex.ru/api/v1.5/handlers/api-jsonp.jsx?action=getTrackSrc&p=download-info/%s'
19 % storage_dir,
20 track_id, 'Downloading track location JSON')
4c603938 21
47fe42e1 22 key = hashlib.md5(('XGRlBW9FXlekgbPrRHuSiA' + data['path'][1:] + data['s']).encode('utf-8')).hexdigest()
4c603938
MA
23 storage = storage_dir.split('.')
24
47fe42e1
S
25 return ('http://%s/get-mp3/%s/%s?track-id=%s&from=service-10-track&similarities-experiment=default'
26 % (data['host'], key, data['ts'] + data['path'], storage[1]))
4c603938 27
47fe42e1
S
28 def _get_track_info(self, track):
29 return {
30 'id': track['id'],
31 'ext': 'mp3',
32 'url': self._get_track_url(track['storageDir'], track['id']),
33 'title': '%s - %s' % (track['artists'][0]['name'], track['title']),
34 'filesize': int_or_none(track.get('fileSize')),
35 'duration': float_or_none(track.get('durationMs'), 1000),
36 }
4c603938 37
47fe42e1
S
38
39class YandexMusicTrackIE(YandexMusicBaseIE):
40 IE_NAME = 'yandexmusic:track'
41 IE_DESC = 'Яндекс.Музыка - Трек'
42 _VALID_URL = r'https?://music\.yandex\.ru/album/(?P<album_id>\d+)/track/(?P<id>\d+)'
43
44 _TEST = {
45 'url': 'http://music.yandex.ru/album/540508/track/4878838',
46 'md5': 'f496818aa2f60b6c0062980d2e00dc20',
47 'info_dict': {
48 'id': '4878838',
49 'ext': 'mp3',
50 'title': 'Carlo Ambrosio - Gypsy Eyes 1',
51 'filesize': 4628061,
52 'duration': 193.04,
53 }
54 }
4c603938
MA
55
56 def _real_extract(self, url):
47fe42e1
S
57 mobj = re.match(self._VALID_URL, url)
58 album_id, track_id = mobj.group('album_id'), mobj.group('id')
4c603938 59
47fe42e1
S
60 track = self._download_json(
61 'http://music.yandex.ru/handlers/track.jsx?track=%s:%s' % (track_id, album_id),
62 track_id, 'Downloading track JSON')['track']
4c603938 63
47fe42e1 64 return self._get_track_info(track)
4c603938 65
4c603938 66
47fe42e1
S
67class YandexMusicAlbumIE(YandexMusicBaseIE):
68 IE_NAME = 'yandexmusic:album'
69 IE_DESC = 'Яндекс.Музыка - Альбом'
f5d8f58a 70 _VALID_URL = r'https?://music\.yandex\.ru/album/(?P<id>\d+)/?(\?|$)'
4c603938 71
47fe42e1
S
72 _TEST = {
73 'url': 'http://music.yandex.ru/album/540508',
74 'info_dict': {
75 'id': '540508',
76 'title': 'Carlo Ambrosio - Gypsy Soul (2009)',
77 },
78 'playlist_count': 50,
79 }
4c603938
MA
80
81 def _real_extract(self, url):
47fe42e1 82 album_id = self._match_id(url)
4c603938 83
47fe42e1
S
84 album = self._download_json(
85 'http://music.yandex.ru/handlers/album.jsx?album=%s' % album_id,
86 album_id, 'Downloading album JSON')
4c603938 87
47fe42e1
S
88 entries = [self._get_track_info(track) for track in album['volumes'][0]]
89
90 title = '%s - %s' % (album['artists'][0]['name'], album['title'])
91 year = album.get('year')
92 if year:
93 title += ' (%s)' % year
94
95 return self.playlist_result(entries, compat_str(album['id']), title)
4c603938 96
4c603938 97
47fe42e1
S
98class YandexMusicPlaylistIE(YandexMusicBaseIE):
99 IE_NAME = 'yandexmusic:playlist'
100 IE_DESC = 'Яндекс.Музыка - Плейлист'
101 _VALID_URL = r'https?://music\.yandex\.ru/users/[^/]+/playlists/(?P<id>\d+)'
102
4c603938 103 _TEST = {
47fe42e1 104 'url': 'http://music.yandex.ru/users/music.partners/playlists/1245',
4c603938 105 'info_dict': {
47fe42e1
S
106 'id': '1245',
107 'title': 'Что слушают Enter Shikari',
108 'description': 'md5:3b9f27b0efbe53f2ee1e844d07155cc9',
109 },
110 'playlist_count': 6,
4c603938
MA
111 }
112
113 def _real_extract(self, url):
47fe42e1 114 playlist_id = self._match_id(url)
4c603938 115
47fe42e1 116 webpage = self._download_webpage(url, playlist_id)
4c603938 117
47fe42e1
S
118 playlist = self._parse_json(
119 self._search_regex(
120 r'var\s+Mu\s*=\s*({.+?});\s*</script>', webpage, 'player'),
121 playlist_id)['pageData']['playlist']
122
123 entries = [self._get_track_info(track) for track in playlist['tracks']]
124
125 return self.playlist_result(
126 entries, compat_str(playlist_id),
127 playlist['title'], playlist.get('description'))