]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/neteasemusic.py
[extractor] Deprecate `_sort_formats`
[yt-dlp.git] / yt_dlp / extractor / neteasemusic.py
CommitLineData
46d09f87 1import itertools
db4678e4 2import json
ac668111 3import re
db4678e4 4import time
02b386f8 5from base64 import b64encode
db4678e4 6from binascii import hexlify
af1fa623 7from datetime import datetime
ac668111 8from hashlib import md5
db4678e4 9from random import randint
af1fa623 10
11from .common import InfoExtractor
db4678e4 12from ..aes import aes_ecb_encrypt, pkcs7_padding
13from ..compat import compat_urllib_parse_urlencode
14from ..utils import (
15 ExtractorError,
16 bytes_to_intlist,
17 error_to_compat_str,
18 float_or_none,
19 int_or_none,
20 intlist_to_bytes,
21 sanitized_Request,
22 try_get,
23)
af1fa623 24
25
26class NetEaseMusicBaseIE(InfoExtractor):
27 _FORMATS = ['bMusic', 'mMusic', 'hMusic']
28 _NETEASE_SALT = '3go8&$8*3*3h0k(2)2'
29 _API_BASE = 'http://music.163.com/api/'
30
31 @classmethod
32 def _encrypt(cls, dfsid):
397a8ea9 33 salt_bytes = bytearray(cls._NETEASE_SALT.encode('utf-8'))
db4678e4 34 string_bytes = bytearray(str(dfsid).encode('ascii'))
af1fa623 35 salt_len = len(salt_bytes)
02b386f8 36 for i in range(len(string_bytes)):
af1fa623 37 string_bytes[i] = string_bytes[i] ^ salt_bytes[i % salt_len]
38 m = md5()
397a8ea9
YCH
39 m.update(bytes(string_bytes))
40 result = b64encode(m.digest()).decode('ascii')
af1fa623 41 return result.replace('/', '_').replace('+', '-')
42
46d09f87 43 def make_player_api_request_data_and_headers(self, song_id, bitrate):
db4678e4 44 KEY = b'e82ckenh8dichen8'
45 URL = '/api/song/enhance/player/url'
46 now = int(time.time() * 1000)
47 rand = randint(0, 1000)
48 cookie = {
49 'osver': None,
50 'deviceId': None,
51 'appver': '8.0.0',
52 'versioncode': '140',
53 'mobilename': None,
54 'buildver': '1623435496',
55 'resolution': '1920x1080',
56 '__csrf': '',
57 'os': 'pc',
58 'channel': None,
59 'requestId': '{0}_{1:04}'.format(now, rand),
60 }
61 request_text = json.dumps(
62 {'ids': '[{0}]'.format(song_id), 'br': bitrate, 'header': cookie},
63 separators=(',', ':'))
64 message = 'nobody{0}use{1}md5forencrypt'.format(
65 URL, request_text).encode('latin1')
66 msg_digest = md5(message).hexdigest()
67
68 data = '{0}-36cd479b6b5-{1}-36cd479b6b5-{2}'.format(
69 URL, request_text, msg_digest)
70 data = pkcs7_padding(bytes_to_intlist(data))
71 encrypted = intlist_to_bytes(aes_ecb_encrypt(data, bytes_to_intlist(KEY)))
72 encrypted_params = hexlify(encrypted).decode('ascii').upper()
73
74 cookie = '; '.join(
75 ['{0}={1}'.format(k, v if v is not None else 'undefined')
76 for [k, v] in cookie.items()])
77
78 headers = {
79 'User-Agent': self.extractor.get_param('http_headers')['User-Agent'],
80 'Content-Type': 'application/x-www-form-urlencoded',
81 'Referer': 'https://music.163.com',
82 'Cookie': cookie,
83 }
84 return ('params={0}'.format(encrypted_params), headers)
85
86 def _call_player_api(self, song_id, bitrate):
87 url = 'https://interface3.music.163.com/eapi/song/enhance/player/url'
88 data, headers = self.make_player_api_request_data_and_headers(song_id, bitrate)
89 try:
90 msg = 'empty result'
91 result = self._download_json(
92 url, song_id, data=data.encode('ascii'), headers=headers)
93 if result:
94 return result
95 except ExtractorError as e:
96 if type(e.cause) in (ValueError, TypeError):
97 # JSON load failure
98 raise
99 except Exception as e:
100 msg = error_to_compat_str(e)
101 self.report_warning('%s API call (%s) failed: %s' % (
102 song_id, bitrate, msg))
103 return {}
104
e0ef13dd 105 def extract_formats(self, info):
db4678e4 106 err = 0
af1fa623 107 formats = []
db4678e4 108 song_id = info['id']
e0ef13dd 109 for song_format in self._FORMATS:
af1fa623 110 details = info.get(song_format)
111 if not details:
112 continue
db4678e4 113
114 bitrate = int_or_none(details.get('bitrate')) or 999000
115 data = self._call_player_api(song_id, bitrate)
116 for song in try_get(data, lambda x: x['data'], list) or []:
117 song_url = try_get(song, lambda x: x['url'])
118 if not song_url:
119 continue
e0ef13dd 120 if self._is_valid_url(song_url, info['id'], 'song'):
121 formats.append({
122 'url': song_url,
123 'ext': details.get('extension'),
db4678e4 124 'abr': float_or_none(song.get('br'), scale=1000),
e0ef13dd 125 'format_id': song_format,
db4678e4 126 'filesize': int_or_none(song.get('size')),
127 'asr': int_or_none(details.get('sr')),
e0ef13dd 128 })
db4678e4 129 elif err == 0:
130 err = try_get(song, lambda x: x['code'], int)
131
132 if not formats:
133 msg = 'No media links found'
134 if err != 0 and (err < 200 or err >= 400):
135 raise ExtractorError(
136 '%s (site code %d)' % (msg, err, ), expected=True)
137 else:
138 self.raise_geo_restricted(
139 msg + ': probably this video is not available from your location due to geo restriction.',
140 countries=['CN'])
141
af1fa623 142 return formats
143
2da0cad6 144 @classmethod
145 def convert_milliseconds(cls, ms):
15830339 146 return int(round(ms / 1000.0))
2da0cad6 147
af1fa623 148 def query_api(self, endpoint, video_id, note):
5c2266df 149 req = sanitized_Request('%s%s' % (self._API_BASE, endpoint))
af1fa623 150 req.add_header('Referer', self._API_BASE)
151 return self._download_json(req, video_id, note)
152
153
154class NetEaseMusicIE(NetEaseMusicBaseIE):
155 IE_NAME = 'netease:song'
6ce89aec 156 IE_DESC = '网易云音乐'
db4678e4 157 _VALID_URL = r'https?://(y\.)?music\.163\.com/(?:[#m]/)?song\?.*?\bid=(?P<id>[0-9]+)'
af1fa623 158 _TESTS = [{
159 'url': 'http://music.163.com/#/song?id=32102397',
db4678e4 160 'md5': '3e909614ce09b1ccef4a3eb205441190',
af1fa623 161 'info_dict': {
162 'id': '32102397',
163 'ext': 'mp3',
db4678e4 164 'title': 'Bad Blood',
af1fa623 165 'creator': 'Taylor Swift / Kendrick Lamar',
db4678e4 166 'upload_date': '20150516',
167 'timestamp': 1431792000,
168 'description': 'md5:25fc5f27e47aad975aa6d36382c7833c',
af1fa623 169 },
af1fa623 170 }, {
171 'note': 'No lyrics.',
172 'url': 'http://music.163.com/song?id=17241424',
173 'info_dict': {
174 'id': '17241424',
175 'ext': 'mp3',
176 'title': 'Opus 28',
177 'creator': 'Dustin O\'Halloran',
178 'upload_date': '20080211',
db4678e4 179 'description': 'md5:f12945b0f6e0365e3b73c5032e1b0ff4',
af1fa623 180 'timestamp': 1202745600,
181 },
02b386f8 182 }, {
183 'note': 'Has translated name.',
184 'url': 'http://music.163.com/#/song?id=22735043',
185 'info_dict': {
186 'id': '22735043',
187 'ext': 'mp3',
188 'title': '소원을 말해봐 (Genie)',
189 'creator': '少女时代',
190 'description': 'md5:79d99cc560e4ca97e0c4d86800ee4184',
191 'upload_date': '20100127',
192 'timestamp': 1264608000,
193 'alt_title': '说出愿望吧(Genie)',
75af5d59 194 },
db4678e4 195 }, {
196 'url': 'https://y.music.163.com/m/song?app_version=8.8.45&id=95670&uct2=sKnvS4+0YStsWkqsPhFijw%3D%3D&dlt=0846',
197 'md5': '95826c73ea50b1c288b22180ec9e754d',
198 'info_dict': {
199 'id': '95670',
200 'ext': 'mp3',
201 'title': '国际歌',
202 'creator': '马备',
203 'upload_date': '19911130',
204 'timestamp': 691516800,
205 'description': 'md5:1ba2f911a2b0aa398479f595224f2141',
206 },
af1fa623 207 }]
208
209 def _process_lyrics(self, lyrics_info):
210 original = lyrics_info.get('lrc', {}).get('lyric')
211 translated = lyrics_info.get('tlyric', {}).get('lyric')
212
213 if not translated:
214 return original
215
216 lyrics_expr = r'(\[[0-9]{2}:[0-9]{2}\.[0-9]{2,}\])([^\n]+)'
217 original_ts_texts = re.findall(lyrics_expr, original)
02b386f8 218 translation_ts_dict = dict(
219 (time_stamp, text) for time_stamp, text in re.findall(lyrics_expr, translated)
220 )
af1fa623 221 lyrics = '\n'.join([
222 '%s%s / %s' % (time_stamp, text, translation_ts_dict.get(time_stamp, ''))
223 for time_stamp, text in original_ts_texts
224 ])
225 return lyrics
226
227 def _real_extract(self, url):
228 song_id = self._match_id(url)
229
230 params = {
231 'id': song_id,
232 'ids': '[%s]' % song_id
233 }
234 info = self.query_api(
15707c7e 235 'song/detail?' + compat_urllib_parse_urlencode(params),
af1fa623 236 song_id, 'Downloading song info')['songs'][0]
237
238 formats = self.extract_formats(info)
af1fa623 239
240 lyrics_info = self.query_api(
15830339 241 'song/lyric?id=%s&lv=-1&tv=-1' % song_id,
af1fa623 242 song_id, 'Downloading lyrics data')
243 lyrics = self._process_lyrics(lyrics_info)
244
245 alt_title = None
02b386f8 246 if info.get('transNames'):
247 alt_title = '/'.join(info.get('transNames'))
af1fa623 248
249 return {
250 'id': song_id,
251 'title': info['name'],
252 'alt_title': alt_title,
253 'creator': ' / '.join([artist['name'] for artist in info.get('artists', [])]),
2da0cad6 254 'timestamp': self.convert_milliseconds(info.get('album', {}).get('publishTime')),
af1fa623 255 'thumbnail': info.get('album', {}).get('picUrl'),
2da0cad6 256 'duration': self.convert_milliseconds(info.get('duration', 0)),
af1fa623 257 'description': lyrics,
258 'formats': formats,
259 }
260
261
262class NetEaseMusicAlbumIE(NetEaseMusicBaseIE):
263 IE_NAME = 'netease:album'
ed848087 264 IE_DESC = '网易云音乐 - 专辑'
af1fa623 265 _VALID_URL = r'https?://music\.163\.com/(#/)?album\?id=(?P<id>[0-9]+)'
266 _TEST = {
267 'url': 'http://music.163.com/#/album?id=220780',
268 'info_dict': {
269 'id': '220780',
270 'title': 'B\'day',
271 },
272 'playlist_count': 23,
75af5d59 273 'skip': 'Blocked outside Mainland China',
af1fa623 274 }
275
276 def _real_extract(self, url):
277 album_id = self._match_id(url)
278
279 info = self.query_api(
15830339 280 'album/%s?id=%s' % (album_id, album_id),
af1fa623 281 album_id, 'Downloading album data')['album']
282
283 name = info['name']
284 desc = info.get('description')
285 entries = [
286 self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
287 'NetEaseMusic', song['id'])
288 for song in info['songs']
289 ]
290 return self.playlist_result(entries, album_id, name, desc)
291
292
293class NetEaseMusicSingerIE(NetEaseMusicBaseIE):
294 IE_NAME = 'netease:singer'
ed848087 295 IE_DESC = '网易云音乐 - 歌手'
af1fa623 296 _VALID_URL = r'https?://music\.163\.com/(#/)?artist\?id=(?P<id>[0-9]+)'
297 _TESTS = [{
298 'note': 'Singer has aliases.',
299 'url': 'http://music.163.com/#/artist?id=10559',
300 'info_dict': {
301 'id': '10559',
302 'title': '张惠妹 - aMEI;阿密特',
303 },
304 'playlist_count': 50,
75af5d59 305 'skip': 'Blocked outside Mainland China',
af1fa623 306 }, {
307 'note': 'Singer has translated name.',
308 'url': 'http://music.163.com/#/artist?id=124098',
309 'info_dict': {
310 'id': '124098',
311 'title': '李昇基 - 이승기',
312 },
313 'playlist_count': 50,
75af5d59 314 'skip': 'Blocked outside Mainland China',
af1fa623 315 }]
316
317 def _real_extract(self, url):
318 singer_id = self._match_id(url)
319
320 info = self.query_api(
15830339 321 'artist/%s?id=%s' % (singer_id, singer_id),
af1fa623 322 singer_id, 'Downloading singer data')
323
324 name = info['artist']['name']
325 if info['artist']['trans']:
326 name = '%s - %s' % (name, info['artist']['trans'])
327 if info['artist']['alias']:
a31e3e7d 328 name = '%s - %s' % (name, ';'.join(info['artist']['alias']))
af1fa623 329
330 entries = [
331 self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
332 'NetEaseMusic', song['id'])
333 for song in info['hotSongs']
334 ]
335 return self.playlist_result(entries, singer_id, name)
336
337
338class NetEaseMusicListIE(NetEaseMusicBaseIE):
339 IE_NAME = 'netease:playlist'
ed848087 340 IE_DESC = '网易云音乐 - 歌单'
af1fa623 341 _VALID_URL = r'https?://music\.163\.com/(#/)?(playlist|discover/toplist)\?id=(?P<id>[0-9]+)'
342 _TESTS = [{
343 'url': 'http://music.163.com/#/playlist?id=79177352',
344 'info_dict': {
345 'id': '79177352',
346 'title': 'Billboard 2007 Top 100',
347 'description': 'md5:12fd0819cab2965b9583ace0f8b7b022'
348 },
349 'playlist_count': 99,
75af5d59 350 'skip': 'Blocked outside Mainland China',
af1fa623 351 }, {
352 'note': 'Toplist/Charts sample',
353 'url': 'http://music.163.com/#/discover/toplist?id=3733003',
354 'info_dict': {
355 'id': '3733003',
356 'title': 're:韩国Melon排行榜周榜 [0-9]{4}-[0-9]{2}-[0-9]{2}',
357 'description': 'md5:73ec782a612711cadc7872d9c1e134fc',
358 },
359 'playlist_count': 50,
75af5d59 360 'skip': 'Blocked outside Mainland China',
af1fa623 361 }]
362
363 def _real_extract(self, url):
364 list_id = self._match_id(url)
365
366 info = self.query_api(
15830339 367 'playlist/detail?id=%s&lv=-1&tv=-1' % list_id,
af1fa623 368 list_id, 'Downloading playlist data')['result']
369
370 name = info['name']
371 desc = info.get('description')
372
373 if info.get('specialType') == 10: # is a chart/toplist
2da0cad6 374 datestamp = datetime.fromtimestamp(
375 self.convert_milliseconds(info['updateTime'])).strftime('%Y-%m-%d')
af1fa623 376 name = '%s %s' % (name, datestamp)
377
378 entries = [
379 self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
380 'NetEaseMusic', song['id'])
381 for song in info['tracks']
382 ]
383 return self.playlist_result(entries, list_id, name, desc)
384
385
386class NetEaseMusicMvIE(NetEaseMusicBaseIE):
387 IE_NAME = 'netease:mv'
ed848087 388 IE_DESC = '网易云音乐 - MV'
af1fa623 389 _VALID_URL = r'https?://music\.163\.com/(#/)?mv\?id=(?P<id>[0-9]+)'
390 _TEST = {
391 'url': 'http://music.163.com/#/mv?id=415350',
392 'info_dict': {
393 'id': '415350',
394 'ext': 'mp4',
395 'title': '이럴거면 그러지말지',
396 'description': '白雅言自作曲唱甜蜜爱情',
397 'creator': '白雅言',
398 'upload_date': '20150520',
399 },
75af5d59 400 'skip': 'Blocked outside Mainland China',
af1fa623 401 }
402
403 def _real_extract(self, url):
404 mv_id = self._match_id(url)
405
406 info = self.query_api(
407 'mv/detail?id=%s&type=mp4' % mv_id,
408 mv_id, 'Downloading mv info')['data']
409
410 formats = [
02b386f8 411 {'url': mv_url, 'ext': 'mp4', 'format_id': '%sp' % brs, 'height': int(brs)}
af1fa623 412 for brs, mv_url in info['brs'].items()
413 ]
af1fa623 414
415 return {
416 'id': mv_id,
417 'title': info['name'],
418 'description': info.get('desc') or info.get('briefDesc'),
419 'creator': info['artistName'],
420 'upload_date': info['publishTime'].replace('-', ''),
421 'formats': formats,
422 'thumbnail': info.get('cover'),
2da0cad6 423 'duration': self.convert_milliseconds(info.get('duration', 0)),
af1fa623 424 }
425
426
427class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
428 IE_NAME = 'netease:program'
ed848087 429 IE_DESC = '网易云音乐 - 电台节目'
af1fa623 430 _VALID_URL = r'https?://music\.163\.com/(#/?)program\?id=(?P<id>[0-9]+)'
431 _TESTS = [{
432 'url': 'http://music.163.com/#/program?id=10109055',
433 'info_dict': {
434 'id': '10109055',
435 'ext': 'mp3',
436 'title': '不丹足球背后的故事',
437 'description': '喜马拉雅人的足球梦 ...',
438 'creator': '大话西藏',
2da0cad6 439 'timestamp': 1434179342,
af1fa623 440 'upload_date': '20150613',
441 'duration': 900,
442 },
75af5d59 443 'skip': 'Blocked outside Mainland China',
af1fa623 444 }, {
445 'note': 'This program has accompanying songs.',
446 'url': 'http://music.163.com/#/program?id=10141022',
447 'info_dict': {
448 'id': '10141022',
449 'title': '25岁,你是自在如风的少年<27°C>',
450 'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
451 },
452 'playlist_count': 4,
75af5d59 453 'skip': 'Blocked outside Mainland China',
af1fa623 454 }, {
455 'note': 'This program has accompanying songs.',
456 'url': 'http://music.163.com/#/program?id=10141022',
457 'info_dict': {
458 'id': '10141022',
459 'ext': 'mp3',
460 'title': '25岁,你是自在如风的少年<27°C>',
461 'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
2da0cad6 462 'timestamp': 1434450841,
af1fa623 463 'upload_date': '20150616',
464 },
465 'params': {
466 'noplaylist': True
75af5d59
YCH
467 },
468 'skip': 'Blocked outside Mainland China',
af1fa623 469 }]
470
471 def _real_extract(self, url):
472 program_id = self._match_id(url)
473
474 info = self.query_api(
475 'dj/program/detail?id=%s' % program_id,
476 program_id, 'Downloading program info')['program']
477
478 name = info['name']
479 description = info['description']
480
f40ee5e9 481 if not self._yes_playlist(info['songs'] and program_id, info['mainSong']['id']):
af1fa623 482 formats = self.extract_formats(info['mainSong'])
af1fa623 483
484 return {
f40ee5e9 485 'id': info['mainSong']['id'],
af1fa623 486 'title': name,
487 'description': description,
488 'creator': info['dj']['brand'],
2da0cad6 489 'timestamp': self.convert_milliseconds(info['createTime']),
af1fa623 490 'thumbnail': info['coverUrl'],
2da0cad6 491 'duration': self.convert_milliseconds(info.get('duration', 0)),
af1fa623 492 'formats': formats,
493 }
494
af1fa623 495 song_ids = [info['mainSong']['id']]
496 song_ids.extend([song['id'] for song in info['songs']])
497 entries = [
498 self.url_result('http://music.163.com/#/song?id=%s' % song_id,
499 'NetEaseMusic', song_id)
500 for song_id in song_ids
501 ]
502 return self.playlist_result(entries, program_id, name, description)
503
504
505class NetEaseMusicDjRadioIE(NetEaseMusicBaseIE):
506 IE_NAME = 'netease:djradio'
ed848087 507 IE_DESC = '网易云音乐 - 电台'
af1fa623 508 _VALID_URL = r'https?://music\.163\.com/(#/)?djradio\?id=(?P<id>[0-9]+)'
509 _TEST = {
510 'url': 'http://music.163.com/#/djradio?id=42',
511 'info_dict': {
512 'id': '42',
513 'title': '声音蔓延',
514 'description': 'md5:766220985cbd16fdd552f64c578a6b15'
515 },
516 'playlist_mincount': 40,
75af5d59 517 'skip': 'Blocked outside Mainland China',
af1fa623 518 }
519 _PAGE_SIZE = 1000
520
521 def _real_extract(self, url):
522 dj_id = self._match_id(url)
523
524 name = None
525 desc = None
526 entries = []
ac668111 527 for offset in itertools.count(start=0, step=self._PAGE_SIZE):
af1fa623 528 info = self.query_api(
15830339 529 'dj/program/byradio?asc=false&limit=%d&radioId=%s&offset=%d'
af1fa623 530 % (self._PAGE_SIZE, dj_id, offset),
531 dj_id, 'Downloading dj programs - %d' % offset)
532
533 entries.extend([
534 self.url_result(
535 'http://music.163.com/#/program?id=%s' % program['id'],
536 'NetEaseMusicProgram', program['id'])
537 for program in info['programs']
538 ])
539
540 if name is None:
541 radio = info['programs'][0]['radio']
542 name = radio['name']
543 desc = radio['desc']
544
545 if not info['more']:
546 break
547
548 return self.playlist_result(entries, dj_id, name, desc)