]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/xiami.py
[xiami] Add xiami extractor
[yt-dlp.git] / youtube_dl / extractor / xiami.py
CommitLineData
89c0dc9a
B
1# -*- coding: utf-8 -*-
2
3from __future__ import unicode_literals
4
5from .common import InfoExtractor
6from ..utils import (
7 xpath_element,
8 xpath_text,
9 xpath_with_ns,
10 int_or_none,
11 ExtractorError
12)
13from ..compat import compat_urllib_parse_unquote
14
15
16class XiamiBaseIE(InfoExtractor):
17
18 _XML_BASE_URL = 'http://www.xiami.com/song/playlist/id'
19 _NS_MAP = {'xm': 'http://xspf.org/ns/0/'}
20
21 def _extract_track(self, track):
22 artist = xpath_text(track, xpath_with_ns('xm:artist', self._NS_MAP), default='')
23 artist = artist.split(';')
24
25 ret = {
26 'id': xpath_text(track, xpath_with_ns('xm:song_id', self._NS_MAP)),
27 'title': xpath_text(track, xpath_with_ns('xm:title', self._NS_MAP)),
28 'album': xpath_text(track, xpath_with_ns('xm:album_name', self._NS_MAP)),
29 'artist': ';'.join(artist) if artist else None,
30 'creator': artist[0] if artist else None,
31 'url': self._decrypt(xpath_text(track, xpath_with_ns('xm:location', self._NS_MAP))),
32 'thumbnail': xpath_text(track, xpath_with_ns('xm:pic', self._NS_MAP), default=None),
33 'duration': int_or_none(xpath_text(track, xpath_with_ns('xm:length', self._NS_MAP))),
34 }
35
36 lyrics_url = xpath_text(track, xpath_with_ns('xm:lyric', self._NS_MAP))
37 if lyrics_url and lyrics_url.endswith('.lrc'):
38 ret['description'] = self._download_webpage(lyrics_url, ret['id'])
39 return ret
40
41 def _extract_xml(self, _id, typ=''):
42 playlist = self._download_xml('%s/%s%s' % (self._XML_BASE_URL, _id, typ), _id)
43 tracklist = xpath_element(playlist, xpath_with_ns('./xm:trackList', self._NS_MAP))
44
45 if not len(tracklist):
46 raise ExtractorError('No track found')
47 return [self._extract_track(track) for track in tracklist]
48
49 @staticmethod
50 def _decrypt(origin):
51 n = int(origin[0])
52 origin = origin[1:]
53 short_lenth = len(origin) // n
54 long_num = len(origin) - short_lenth * n
55 l = tuple()
56 for i in range(0, n):
57 length = short_lenth
58 if i < long_num:
59 length += 1
60 l += (origin[0:length], )
61 origin = origin[length:]
62 ans = ''
63 for i in range(0, short_lenth + 1):
64 for j in range(0, n):
65 if len(l[j])>i:
66 ans += l[j][i]
67 return compat_urllib_parse_unquote(ans).replace('^', '0')
68
69
70class XiamiIE(XiamiBaseIE):
71 IE_NAME = 'xiami:song'
72 IE_DESC = '虾米音乐'
73 _VALID_URL = r'http://www\.xiami\.com/song/(?P<id>[0-9]+)'
74 _TESTS = [
75 {
76 'url': 'http://www.xiami.com/song/1775610518',
77 'md5': '521dd6bea40fd5c9c69f913c232cb57e',
78 'info_dict': {
79 'id': '1775610518',
80 'ext': 'mp3',
81 'title': 'Woman',
82 'creator': 'HONNE',
83 'album': 'Woman',
84 'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg',
85 'description': 'md5:052ec7de41ca19f67e7fd70a1bfc4e0b',
86 }
87 },
88 {
89 'url': 'http://www.xiami.com/song/1775256504',
90 'md5': '932a3abd45c6aa2b1fdbe028fcb4c4fc',
91 'info_dict': {
92 'id': '1775256504',
93 'ext': 'mp3',
94 'title': '悟空',
95 'creator': '戴荃',
96 'album': '悟空',
97 'description': 'md5:206e67e84f9bed1d473d04196a00b990',
98 }
99 },
100 ]
101
102 def _real_extract(self, url):
103 _id = self._match_id(url)
104 return self._extract_xml(_id)[0]
105
106
107class XiamiAlbumIE(XiamiBaseIE):
108 IE_NAME = 'xiami:album'
109 IE_DESC = '虾米音乐 - 专辑'
110 _VALID_URL = r'http://www\.xiami\.com/album/(?P<id>[0-9]+)'
111 _TESTS = [
112 {
113 'url': 'http://www.xiami.com/album/2100300444',
114 'info_dict': {
115 'id': '2100300444',
116 },
117 'playlist_count': 10,
118 },
119 {
120 'url': 'http://www.xiami.com/album/512288?spm=a1z1s.6843761.1110925389.6.hhE9p9',
121 'only_matching': True,
122 }
123 ]
124
125 def _real_extract(self, url):
126 _id = self._match_id(url)
127 return self.playlist_result(self._extract_xml(_id, '/type/1'), _id)
128
129
130class XiamiArtistIE(XiamiBaseIE):
131 IE_NAME = 'xiami:artist'
132 IE_DESC = '虾米音乐 - 歌手'
133 _VALID_URL = r'http://www\.xiami\.com/artist/(?P<id>[0-9]+)'
134 _TEST = {
135 'url': 'http://www.xiami.com/artist/2132?spm=0.0.0.0.dKaScp',
136 'info_dict': {
137 'id': '2132',
138 },
139 'playlist_count': 20,
140 }
141
142 def _real_extract(self, url):
143 _id = self._match_id(url)
144 return self.playlist_result(self._extract_xml(_id, '/type/2'), _id)
145
146
147class XiamiCollectionIE(XiamiBaseIE):
148 IE_NAME = 'xiami:collection'
149 IE_DESC = '虾米音乐 - 精选集'
150 _VALID_URL = r'http://www\.xiami\.com/collect/(?P<id>[0-9]+)'
151 _TEST = {
152 'url': 'http://www.xiami.com/collect/156527391?spm=a1z1s.2943601.6856193.12.4jpBnr',
153 'info_dict': {
154 'id': '156527391',
155 },
156 'playlist_count': 26,
157 }
158
159 def _real_extract(self, url):
160 _id = self._match_id(url)
161 return self.playlist_result(self._extract_xml(_id, '/type/3'), _id)