]>
Commit | Line | Data |
---|---|---|
1 | # coding: utf-8 | |
2 | from __future__ import unicode_literals | |
3 | ||
4 | from hashlib import md5 | |
5 | from base64 import b64encode | |
6 | from datetime import datetime | |
7 | import re | |
8 | ||
9 | from .common import InfoExtractor | |
10 | from ..compat import ( | |
11 | compat_urllib_parse_urlencode, | |
12 | compat_str, | |
13 | compat_itertools_count, | |
14 | ) | |
15 | from ..utils import ( | |
16 | sanitized_Request, | |
17 | float_or_none, | |
18 | ) | |
19 | ||
20 | ||
21 | class NetEaseMusicBaseIE(InfoExtractor): | |
22 | _FORMATS = ['bMusic', 'mMusic', 'hMusic'] | |
23 | _NETEASE_SALT = '3go8&$8*3*3h0k(2)2' | |
24 | _API_BASE = 'http://music.163.com/api/' | |
25 | ||
26 | @classmethod | |
27 | def _encrypt(cls, dfsid): | |
28 | salt_bytes = bytearray(cls._NETEASE_SALT.encode('utf-8')) | |
29 | string_bytes = bytearray(compat_str(dfsid).encode('ascii')) | |
30 | salt_len = len(salt_bytes) | |
31 | for i in range(len(string_bytes)): | |
32 | string_bytes[i] = string_bytes[i] ^ salt_bytes[i % salt_len] | |
33 | m = md5() | |
34 | m.update(bytes(string_bytes)) | |
35 | result = b64encode(m.digest()).decode('ascii') | |
36 | return result.replace('/', '_').replace('+', '-') | |
37 | ||
38 | def extract_formats(self, info): | |
39 | formats = [] | |
40 | for song_format in self._FORMATS: | |
41 | details = info.get(song_format) | |
42 | if not details: | |
43 | continue | |
44 | song_file_path = '/%s/%s.%s' % ( | |
45 | self._encrypt(details['dfsId']), details['dfsId'], details['extension']) | |
46 | ||
47 | # 203.130.59.9, 124.40.233.182, 115.231.74.139, etc is a reverse proxy-like feature | |
48 | # from NetEase's CDN provider that can be used if m5.music.126.net does not | |
49 | # work, especially for users outside of Mainland China | |
50 | # via: https://github.com/JixunMoe/unblock-163/issues/3#issuecomment-163115880 | |
51 | for host in ('http://m5.music.126.net', 'http://115.231.74.139/m1.music.126.net', | |
52 | 'http://124.40.233.182/m1.music.126.net', 'http://203.130.59.9/m1.music.126.net'): | |
53 | song_url = host + song_file_path | |
54 | if self._is_valid_url(song_url, info['id'], 'song'): | |
55 | formats.append({ | |
56 | 'url': song_url, | |
57 | 'ext': details.get('extension'), | |
58 | 'abr': float_or_none(details.get('bitrate'), scale=1000), | |
59 | 'format_id': song_format, | |
60 | 'filesize': details.get('size'), | |
61 | 'asr': details.get('sr') | |
62 | }) | |
63 | break | |
64 | return formats | |
65 | ||
66 | @classmethod | |
67 | def convert_milliseconds(cls, ms): | |
68 | return int(round(ms / 1000.0)) | |
69 | ||
70 | def query_api(self, endpoint, video_id, note): | |
71 | req = sanitized_Request('%s%s' % (self._API_BASE, endpoint)) | |
72 | req.add_header('Referer', self._API_BASE) | |
73 | return self._download_json(req, video_id, note) | |
74 | ||
75 | ||
76 | class NetEaseMusicIE(NetEaseMusicBaseIE): | |
77 | IE_NAME = 'netease:song' | |
78 | IE_DESC = '网易云音乐' | |
79 | _VALID_URL = r'https?://music\.163\.com/(#/)?song\?id=(?P<id>[0-9]+)' | |
80 | _TESTS = [{ | |
81 | 'url': 'http://music.163.com/#/song?id=32102397', | |
82 | 'md5': 'f2e97280e6345c74ba9d5677dd5dcb45', | |
83 | 'info_dict': { | |
84 | 'id': '32102397', | |
85 | 'ext': 'mp3', | |
86 | 'title': 'Bad Blood (feat. Kendrick Lamar)', | |
87 | 'creator': 'Taylor Swift / Kendrick Lamar', | |
88 | 'upload_date': '20150517', | |
89 | 'timestamp': 1431878400, | |
90 | 'description': 'md5:a10a54589c2860300d02e1de821eb2ef', | |
91 | }, | |
92 | 'skip': 'Blocked outside Mainland China', | |
93 | }, { | |
94 | 'note': 'No lyrics translation.', | |
95 | 'url': 'http://music.163.com/#/song?id=29822014', | |
96 | 'info_dict': { | |
97 | 'id': '29822014', | |
98 | 'ext': 'mp3', | |
99 | 'title': '听见下雨的声音', | |
100 | 'creator': '周杰伦', | |
101 | 'upload_date': '20141225', | |
102 | 'timestamp': 1419523200, | |
103 | 'description': 'md5:a4d8d89f44656af206b7b2555c0bce6c', | |
104 | }, | |
105 | 'skip': 'Blocked outside Mainland China', | |
106 | }, { | |
107 | 'note': 'No lyrics.', | |
108 | 'url': 'http://music.163.com/song?id=17241424', | |
109 | 'info_dict': { | |
110 | 'id': '17241424', | |
111 | 'ext': 'mp3', | |
112 | 'title': 'Opus 28', | |
113 | 'creator': 'Dustin O\'Halloran', | |
114 | 'upload_date': '20080211', | |
115 | 'timestamp': 1202745600, | |
116 | }, | |
117 | 'skip': 'Blocked outside Mainland China', | |
118 | }, { | |
119 | 'note': 'Has translated name.', | |
120 | 'url': 'http://music.163.com/#/song?id=22735043', | |
121 | 'info_dict': { | |
122 | 'id': '22735043', | |
123 | 'ext': 'mp3', | |
124 | 'title': '소원을 말해봐 (Genie)', | |
125 | 'creator': '少女时代', | |
126 | 'description': 'md5:79d99cc560e4ca97e0c4d86800ee4184', | |
127 | 'upload_date': '20100127', | |
128 | 'timestamp': 1264608000, | |
129 | 'alt_title': '说出愿望吧(Genie)', | |
130 | }, | |
131 | 'skip': 'Blocked outside Mainland China', | |
132 | }] | |
133 | ||
134 | def _process_lyrics(self, lyrics_info): | |
135 | original = lyrics_info.get('lrc', {}).get('lyric') | |
136 | translated = lyrics_info.get('tlyric', {}).get('lyric') | |
137 | ||
138 | if not translated: | |
139 | return original | |
140 | ||
141 | lyrics_expr = r'(\[[0-9]{2}:[0-9]{2}\.[0-9]{2,}\])([^\n]+)' | |
142 | original_ts_texts = re.findall(lyrics_expr, original) | |
143 | translation_ts_dict = dict( | |
144 | (time_stamp, text) for time_stamp, text in re.findall(lyrics_expr, translated) | |
145 | ) | |
146 | lyrics = '\n'.join([ | |
147 | '%s%s / %s' % (time_stamp, text, translation_ts_dict.get(time_stamp, '')) | |
148 | for time_stamp, text in original_ts_texts | |
149 | ]) | |
150 | return lyrics | |
151 | ||
152 | def _real_extract(self, url): | |
153 | song_id = self._match_id(url) | |
154 | ||
155 | params = { | |
156 | 'id': song_id, | |
157 | 'ids': '[%s]' % song_id | |
158 | } | |
159 | info = self.query_api( | |
160 | 'song/detail?' + compat_urllib_parse_urlencode(params), | |
161 | song_id, 'Downloading song info')['songs'][0] | |
162 | ||
163 | formats = self.extract_formats(info) | |
164 | self._sort_formats(formats) | |
165 | ||
166 | lyrics_info = self.query_api( | |
167 | 'song/lyric?id=%s&lv=-1&tv=-1' % song_id, | |
168 | song_id, 'Downloading lyrics data') | |
169 | lyrics = self._process_lyrics(lyrics_info) | |
170 | ||
171 | alt_title = None | |
172 | if info.get('transNames'): | |
173 | alt_title = '/'.join(info.get('transNames')) | |
174 | ||
175 | return { | |
176 | 'id': song_id, | |
177 | 'title': info['name'], | |
178 | 'alt_title': alt_title, | |
179 | 'creator': ' / '.join([artist['name'] for artist in info.get('artists', [])]), | |
180 | 'timestamp': self.convert_milliseconds(info.get('album', {}).get('publishTime')), | |
181 | 'thumbnail': info.get('album', {}).get('picUrl'), | |
182 | 'duration': self.convert_milliseconds(info.get('duration', 0)), | |
183 | 'description': lyrics, | |
184 | 'formats': formats, | |
185 | } | |
186 | ||
187 | ||
188 | class NetEaseMusicAlbumIE(NetEaseMusicBaseIE): | |
189 | IE_NAME = 'netease:album' | |
190 | IE_DESC = '网易云音乐 - 专辑' | |
191 | _VALID_URL = r'https?://music\.163\.com/(#/)?album\?id=(?P<id>[0-9]+)' | |
192 | _TEST = { | |
193 | 'url': 'http://music.163.com/#/album?id=220780', | |
194 | 'info_dict': { | |
195 | 'id': '220780', | |
196 | 'title': 'B\'day', | |
197 | }, | |
198 | 'playlist_count': 23, | |
199 | 'skip': 'Blocked outside Mainland China', | |
200 | } | |
201 | ||
202 | def _real_extract(self, url): | |
203 | album_id = self._match_id(url) | |
204 | ||
205 | info = self.query_api( | |
206 | 'album/%s?id=%s' % (album_id, album_id), | |
207 | album_id, 'Downloading album data')['album'] | |
208 | ||
209 | name = info['name'] | |
210 | desc = info.get('description') | |
211 | entries = [ | |
212 | self.url_result('http://music.163.com/#/song?id=%s' % song['id'], | |
213 | 'NetEaseMusic', song['id']) | |
214 | for song in info['songs'] | |
215 | ] | |
216 | return self.playlist_result(entries, album_id, name, desc) | |
217 | ||
218 | ||
219 | class NetEaseMusicSingerIE(NetEaseMusicBaseIE): | |
220 | IE_NAME = 'netease:singer' | |
221 | IE_DESC = '网易云音乐 - 歌手' | |
222 | _VALID_URL = r'https?://music\.163\.com/(#/)?artist\?id=(?P<id>[0-9]+)' | |
223 | _TESTS = [{ | |
224 | 'note': 'Singer has aliases.', | |
225 | 'url': 'http://music.163.com/#/artist?id=10559', | |
226 | 'info_dict': { | |
227 | 'id': '10559', | |
228 | 'title': '张惠妹 - aMEI;阿密特', | |
229 | }, | |
230 | 'playlist_count': 50, | |
231 | 'skip': 'Blocked outside Mainland China', | |
232 | }, { | |
233 | 'note': 'Singer has translated name.', | |
234 | 'url': 'http://music.163.com/#/artist?id=124098', | |
235 | 'info_dict': { | |
236 | 'id': '124098', | |
237 | 'title': '李昇基 - 이승기', | |
238 | }, | |
239 | 'playlist_count': 50, | |
240 | 'skip': 'Blocked outside Mainland China', | |
241 | }] | |
242 | ||
243 | def _real_extract(self, url): | |
244 | singer_id = self._match_id(url) | |
245 | ||
246 | info = self.query_api( | |
247 | 'artist/%s?id=%s' % (singer_id, singer_id), | |
248 | singer_id, 'Downloading singer data') | |
249 | ||
250 | name = info['artist']['name'] | |
251 | if info['artist']['trans']: | |
252 | name = '%s - %s' % (name, info['artist']['trans']) | |
253 | if info['artist']['alias']: | |
254 | name = '%s - %s' % (name, ';'.join(info['artist']['alias'])) | |
255 | ||
256 | entries = [ | |
257 | self.url_result('http://music.163.com/#/song?id=%s' % song['id'], | |
258 | 'NetEaseMusic', song['id']) | |
259 | for song in info['hotSongs'] | |
260 | ] | |
261 | return self.playlist_result(entries, singer_id, name) | |
262 | ||
263 | ||
264 | class NetEaseMusicListIE(NetEaseMusicBaseIE): | |
265 | IE_NAME = 'netease:playlist' | |
266 | IE_DESC = '网易云音乐 - 歌单' | |
267 | _VALID_URL = r'https?://music\.163\.com/(#/)?(playlist|discover/toplist)\?id=(?P<id>[0-9]+)' | |
268 | _TESTS = [{ | |
269 | 'url': 'http://music.163.com/#/playlist?id=79177352', | |
270 | 'info_dict': { | |
271 | 'id': '79177352', | |
272 | 'title': 'Billboard 2007 Top 100', | |
273 | 'description': 'md5:12fd0819cab2965b9583ace0f8b7b022' | |
274 | }, | |
275 | 'playlist_count': 99, | |
276 | 'skip': 'Blocked outside Mainland China', | |
277 | }, { | |
278 | 'note': 'Toplist/Charts sample', | |
279 | 'url': 'http://music.163.com/#/discover/toplist?id=3733003', | |
280 | 'info_dict': { | |
281 | 'id': '3733003', | |
282 | 'title': 're:韩国Melon排行榜周榜 [0-9]{4}-[0-9]{2}-[0-9]{2}', | |
283 | 'description': 'md5:73ec782a612711cadc7872d9c1e134fc', | |
284 | }, | |
285 | 'playlist_count': 50, | |
286 | 'skip': 'Blocked outside Mainland China', | |
287 | }] | |
288 | ||
289 | def _real_extract(self, url): | |
290 | list_id = self._match_id(url) | |
291 | ||
292 | info = self.query_api( | |
293 | 'playlist/detail?id=%s&lv=-1&tv=-1' % list_id, | |
294 | list_id, 'Downloading playlist data')['result'] | |
295 | ||
296 | name = info['name'] | |
297 | desc = info.get('description') | |
298 | ||
299 | if info.get('specialType') == 10: # is a chart/toplist | |
300 | datestamp = datetime.fromtimestamp( | |
301 | self.convert_milliseconds(info['updateTime'])).strftime('%Y-%m-%d') | |
302 | name = '%s %s' % (name, datestamp) | |
303 | ||
304 | entries = [ | |
305 | self.url_result('http://music.163.com/#/song?id=%s' % song['id'], | |
306 | 'NetEaseMusic', song['id']) | |
307 | for song in info['tracks'] | |
308 | ] | |
309 | return self.playlist_result(entries, list_id, name, desc) | |
310 | ||
311 | ||
312 | class NetEaseMusicMvIE(NetEaseMusicBaseIE): | |
313 | IE_NAME = 'netease:mv' | |
314 | IE_DESC = '网易云音乐 - MV' | |
315 | _VALID_URL = r'https?://music\.163\.com/(#/)?mv\?id=(?P<id>[0-9]+)' | |
316 | _TEST = { | |
317 | 'url': 'http://music.163.com/#/mv?id=415350', | |
318 | 'info_dict': { | |
319 | 'id': '415350', | |
320 | 'ext': 'mp4', | |
321 | 'title': '이럴거면 그러지말지', | |
322 | 'description': '白雅言自作曲唱甜蜜爱情', | |
323 | 'creator': '白雅言', | |
324 | 'upload_date': '20150520', | |
325 | }, | |
326 | 'skip': 'Blocked outside Mainland China', | |
327 | } | |
328 | ||
329 | def _real_extract(self, url): | |
330 | mv_id = self._match_id(url) | |
331 | ||
332 | info = self.query_api( | |
333 | 'mv/detail?id=%s&type=mp4' % mv_id, | |
334 | mv_id, 'Downloading mv info')['data'] | |
335 | ||
336 | formats = [ | |
337 | {'url': mv_url, 'ext': 'mp4', 'format_id': '%sp' % brs, 'height': int(brs)} | |
338 | for brs, mv_url in info['brs'].items() | |
339 | ] | |
340 | self._sort_formats(formats) | |
341 | ||
342 | return { | |
343 | 'id': mv_id, | |
344 | 'title': info['name'], | |
345 | 'description': info.get('desc') or info.get('briefDesc'), | |
346 | 'creator': info['artistName'], | |
347 | 'upload_date': info['publishTime'].replace('-', ''), | |
348 | 'formats': formats, | |
349 | 'thumbnail': info.get('cover'), | |
350 | 'duration': self.convert_milliseconds(info.get('duration', 0)), | |
351 | } | |
352 | ||
353 | ||
354 | class NetEaseMusicProgramIE(NetEaseMusicBaseIE): | |
355 | IE_NAME = 'netease:program' | |
356 | IE_DESC = '网易云音乐 - 电台节目' | |
357 | _VALID_URL = r'https?://music\.163\.com/(#/?)program\?id=(?P<id>[0-9]+)' | |
358 | _TESTS = [{ | |
359 | 'url': 'http://music.163.com/#/program?id=10109055', | |
360 | 'info_dict': { | |
361 | 'id': '10109055', | |
362 | 'ext': 'mp3', | |
363 | 'title': '不丹足球背后的故事', | |
364 | 'description': '喜马拉雅人的足球梦 ...', | |
365 | 'creator': '大话西藏', | |
366 | 'timestamp': 1434179342, | |
367 | 'upload_date': '20150613', | |
368 | 'duration': 900, | |
369 | }, | |
370 | 'skip': 'Blocked outside Mainland China', | |
371 | }, { | |
372 | 'note': 'This program has accompanying songs.', | |
373 | 'url': 'http://music.163.com/#/program?id=10141022', | |
374 | 'info_dict': { | |
375 | 'id': '10141022', | |
376 | 'title': '25岁,你是自在如风的少年<27°C>', | |
377 | 'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b', | |
378 | }, | |
379 | 'playlist_count': 4, | |
380 | 'skip': 'Blocked outside Mainland China', | |
381 | }, { | |
382 | 'note': 'This program has accompanying songs.', | |
383 | 'url': 'http://music.163.com/#/program?id=10141022', | |
384 | 'info_dict': { | |
385 | 'id': '10141022', | |
386 | 'ext': 'mp3', | |
387 | 'title': '25岁,你是自在如风的少年<27°C>', | |
388 | 'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b', | |
389 | 'timestamp': 1434450841, | |
390 | 'upload_date': '20150616', | |
391 | }, | |
392 | 'params': { | |
393 | 'noplaylist': True | |
394 | }, | |
395 | 'skip': 'Blocked outside Mainland China', | |
396 | }] | |
397 | ||
398 | def _real_extract(self, url): | |
399 | program_id = self._match_id(url) | |
400 | ||
401 | info = self.query_api( | |
402 | 'dj/program/detail?id=%s' % program_id, | |
403 | program_id, 'Downloading program info')['program'] | |
404 | ||
405 | name = info['name'] | |
406 | description = info['description'] | |
407 | ||
408 | if not info['songs'] or self._downloader.params.get('noplaylist'): | |
409 | if info['songs']: | |
410 | self.to_screen( | |
411 | 'Downloading just the main audio %s because of --no-playlist' | |
412 | % info['mainSong']['id']) | |
413 | ||
414 | formats = self.extract_formats(info['mainSong']) | |
415 | self._sort_formats(formats) | |
416 | ||
417 | return { | |
418 | 'id': program_id, | |
419 | 'title': name, | |
420 | 'description': description, | |
421 | 'creator': info['dj']['brand'], | |
422 | 'timestamp': self.convert_milliseconds(info['createTime']), | |
423 | 'thumbnail': info['coverUrl'], | |
424 | 'duration': self.convert_milliseconds(info.get('duration', 0)), | |
425 | 'formats': formats, | |
426 | } | |
427 | ||
428 | self.to_screen( | |
429 | 'Downloading playlist %s - add --no-playlist to just download the main audio %s' | |
430 | % (program_id, info['mainSong']['id'])) | |
431 | ||
432 | song_ids = [info['mainSong']['id']] | |
433 | song_ids.extend([song['id'] for song in info['songs']]) | |
434 | entries = [ | |
435 | self.url_result('http://music.163.com/#/song?id=%s' % song_id, | |
436 | 'NetEaseMusic', song_id) | |
437 | for song_id in song_ids | |
438 | ] | |
439 | return self.playlist_result(entries, program_id, name, description) | |
440 | ||
441 | ||
442 | class NetEaseMusicDjRadioIE(NetEaseMusicBaseIE): | |
443 | IE_NAME = 'netease:djradio' | |
444 | IE_DESC = '网易云音乐 - 电台' | |
445 | _VALID_URL = r'https?://music\.163\.com/(#/)?djradio\?id=(?P<id>[0-9]+)' | |
446 | _TEST = { | |
447 | 'url': 'http://music.163.com/#/djradio?id=42', | |
448 | 'info_dict': { | |
449 | 'id': '42', | |
450 | 'title': '声音蔓延', | |
451 | 'description': 'md5:766220985cbd16fdd552f64c578a6b15' | |
452 | }, | |
453 | 'playlist_mincount': 40, | |
454 | 'skip': 'Blocked outside Mainland China', | |
455 | } | |
456 | _PAGE_SIZE = 1000 | |
457 | ||
458 | def _real_extract(self, url): | |
459 | dj_id = self._match_id(url) | |
460 | ||
461 | name = None | |
462 | desc = None | |
463 | entries = [] | |
464 | for offset in compat_itertools_count(start=0, step=self._PAGE_SIZE): | |
465 | info = self.query_api( | |
466 | 'dj/program/byradio?asc=false&limit=%d&radioId=%s&offset=%d' | |
467 | % (self._PAGE_SIZE, dj_id, offset), | |
468 | dj_id, 'Downloading dj programs - %d' % offset) | |
469 | ||
470 | entries.extend([ | |
471 | self.url_result( | |
472 | 'http://music.163.com/#/program?id=%s' % program['id'], | |
473 | 'NetEaseMusicProgram', program['id']) | |
474 | for program in info['programs'] | |
475 | ]) | |
476 | ||
477 | if name is None: | |
478 | radio = info['programs'][0]['radio'] | |
479 | name = radio['name'] | |
480 | desc = radio['desc'] | |
481 | ||
482 | if not info['more']: | |
483 | break | |
484 | ||
485 | return self.playlist_result(entries, dj_id, name, desc) |