]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/neteasemusic.py
[cleanup] Lint and misc cleanup
[yt-dlp.git] / yt_dlp / extractor / neteasemusic.py
CommitLineData
46d09f87 1import itertools
db4678e4 2import json
ac668111 3import re
db4678e4 4import time
02b386f8 5from base64 import b64encode
db4678e4 6from binascii import hexlify
af1fa623 7from datetime import datetime
ac668111 8from hashlib import md5
db4678e4 9from random import randint
af1fa623 10
11from .common import InfoExtractor
db4678e4 12from ..aes import aes_ecb_encrypt, pkcs7_padding
13from ..compat import compat_urllib_parse_urlencode
14from ..utils import (
15 ExtractorError,
16 bytes_to_intlist,
17 error_to_compat_str,
18 float_or_none,
19 int_or_none,
20 intlist_to_bytes,
21 sanitized_Request,
22 try_get,
23)
af1fa623 24
25
26class NetEaseMusicBaseIE(InfoExtractor):
27 _FORMATS = ['bMusic', 'mMusic', 'hMusic']
28 _NETEASE_SALT = '3go8&$8*3*3h0k(2)2'
29 _API_BASE = 'http://music.163.com/api/'
30
31 @classmethod
32 def _encrypt(cls, dfsid):
397a8ea9 33 salt_bytes = bytearray(cls._NETEASE_SALT.encode('utf-8'))
db4678e4 34 string_bytes = bytearray(str(dfsid).encode('ascii'))
af1fa623 35 salt_len = len(salt_bytes)
02b386f8 36 for i in range(len(string_bytes)):
af1fa623 37 string_bytes[i] = string_bytes[i] ^ salt_bytes[i % salt_len]
38 m = md5()
397a8ea9
YCH
39 m.update(bytes(string_bytes))
40 result = b64encode(m.digest()).decode('ascii')
af1fa623 41 return result.replace('/', '_').replace('+', '-')
42
46d09f87 43 def make_player_api_request_data_and_headers(self, song_id, bitrate):
db4678e4 44 KEY = b'e82ckenh8dichen8'
45 URL = '/api/song/enhance/player/url'
46 now = int(time.time() * 1000)
47 rand = randint(0, 1000)
48 cookie = {
49 'osver': None,
50 'deviceId': None,
51 'appver': '8.0.0',
52 'versioncode': '140',
53 'mobilename': None,
54 'buildver': '1623435496',
55 'resolution': '1920x1080',
56 '__csrf': '',
57 'os': 'pc',
58 'channel': None,
59 'requestId': '{0}_{1:04}'.format(now, rand),
60 }
61 request_text = json.dumps(
62 {'ids': '[{0}]'.format(song_id), 'br': bitrate, 'header': cookie},
63 separators=(',', ':'))
64 message = 'nobody{0}use{1}md5forencrypt'.format(
65 URL, request_text).encode('latin1')
66 msg_digest = md5(message).hexdigest()
67
68 data = '{0}-36cd479b6b5-{1}-36cd479b6b5-{2}'.format(
69 URL, request_text, msg_digest)
70 data = pkcs7_padding(bytes_to_intlist(data))
71 encrypted = intlist_to_bytes(aes_ecb_encrypt(data, bytes_to_intlist(KEY)))
72 encrypted_params = hexlify(encrypted).decode('ascii').upper()
73
74 cookie = '; '.join(
75 ['{0}={1}'.format(k, v if v is not None else 'undefined')
76 for [k, v] in cookie.items()])
77
78 headers = {
79 'User-Agent': self.extractor.get_param('http_headers')['User-Agent'],
80 'Content-Type': 'application/x-www-form-urlencoded',
81 'Referer': 'https://music.163.com',
82 'Cookie': cookie,
83 }
84 return ('params={0}'.format(encrypted_params), headers)
85
86 def _call_player_api(self, song_id, bitrate):
87 url = 'https://interface3.music.163.com/eapi/song/enhance/player/url'
88 data, headers = self.make_player_api_request_data_and_headers(song_id, bitrate)
89 try:
90 msg = 'empty result'
91 result = self._download_json(
92 url, song_id, data=data.encode('ascii'), headers=headers)
93 if result:
94 return result
95 except ExtractorError as e:
96 if type(e.cause) in (ValueError, TypeError):
97 # JSON load failure
98 raise
99 except Exception as e:
100 msg = error_to_compat_str(e)
101 self.report_warning('%s API call (%s) failed: %s' % (
102 song_id, bitrate, msg))
103 return {}
104
e0ef13dd 105 def extract_formats(self, info):
db4678e4 106 err = 0
af1fa623 107 formats = []
db4678e4 108 song_id = info['id']
e0ef13dd 109 for song_format in self._FORMATS:
af1fa623 110 details = info.get(song_format)
111 if not details:
112 continue
db4678e4 113
114 bitrate = int_or_none(details.get('bitrate')) or 999000
115 data = self._call_player_api(song_id, bitrate)
116 for song in try_get(data, lambda x: x['data'], list) or []:
117 song_url = try_get(song, lambda x: x['url'])
118 if not song_url:
119 continue
e0ef13dd 120 if self._is_valid_url(song_url, info['id'], 'song'):
121 formats.append({
122 'url': song_url,
123 'ext': details.get('extension'),
db4678e4 124 'abr': float_or_none(song.get('br'), scale=1000),
e0ef13dd 125 'format_id': song_format,
db4678e4 126 'filesize': int_or_none(song.get('size')),
127 'asr': int_or_none(details.get('sr')),
e0ef13dd 128 })
db4678e4 129 elif err == 0:
130 err = try_get(song, lambda x: x['code'], int)
131
132 if not formats:
133 msg = 'No media links found'
134 if err != 0 and (err < 200 or err >= 400):
135 raise ExtractorError(
136 '%s (site code %d)' % (msg, err, ), expected=True)
137 else:
138 self.raise_geo_restricted(
139 msg + ': probably this video is not available from your location due to geo restriction.',
140 countries=['CN'])
141
af1fa623 142 return formats
143
2da0cad6 144 @classmethod
145 def convert_milliseconds(cls, ms):
15830339 146 return int(round(ms / 1000.0))
2da0cad6 147
af1fa623 148 def query_api(self, endpoint, video_id, note):
5c2266df 149 req = sanitized_Request('%s%s' % (self._API_BASE, endpoint))
af1fa623 150 req.add_header('Referer', self._API_BASE)
151 return self._download_json(req, video_id, note)
152
153
154class NetEaseMusicIE(NetEaseMusicBaseIE):
155 IE_NAME = 'netease:song'
6ce89aec 156 IE_DESC = '网易云音乐'
db4678e4 157 _VALID_URL = r'https?://(y\.)?music\.163\.com/(?:[#m]/)?song\?.*?\bid=(?P<id>[0-9]+)'
af1fa623 158 _TESTS = [{
159 'url': 'http://music.163.com/#/song?id=32102397',
db4678e4 160 'md5': '3e909614ce09b1ccef4a3eb205441190',
af1fa623 161 'info_dict': {
162 'id': '32102397',
163 'ext': 'mp3',
db4678e4 164 'title': 'Bad Blood',
af1fa623 165 'creator': 'Taylor Swift / Kendrick Lamar',
db4678e4 166 'upload_date': '20150516',
167 'timestamp': 1431792000,
168 'description': 'md5:25fc5f27e47aad975aa6d36382c7833c',
af1fa623 169 },
af1fa623 170 }, {
171 'note': 'No lyrics.',
172 'url': 'http://music.163.com/song?id=17241424',
173 'info_dict': {
174 'id': '17241424',
175 'ext': 'mp3',
176 'title': 'Opus 28',
177 'creator': 'Dustin O\'Halloran',
178 'upload_date': '20080211',
db4678e4 179 'description': 'md5:f12945b0f6e0365e3b73c5032e1b0ff4',
af1fa623 180 'timestamp': 1202745600,
181 },
02b386f8 182 }, {
183 'note': 'Has translated name.',
184 'url': 'http://music.163.com/#/song?id=22735043',
185 'info_dict': {
186 'id': '22735043',
187 'ext': 'mp3',
188 'title': '소원을 말해봐 (Genie)',
189 'creator': '少女时代',
190 'description': 'md5:79d99cc560e4ca97e0c4d86800ee4184',
191 'upload_date': '20100127',
192 'timestamp': 1264608000,
193 'alt_title': '说出愿望吧(Genie)',
75af5d59 194 },
db4678e4 195 }, {
196 'url': 'https://y.music.163.com/m/song?app_version=8.8.45&id=95670&uct2=sKnvS4+0YStsWkqsPhFijw%3D%3D&dlt=0846',
197 'md5': '95826c73ea50b1c288b22180ec9e754d',
198 'info_dict': {
199 'id': '95670',
200 'ext': 'mp3',
201 'title': '国际歌',
202 'creator': '马备',
203 'upload_date': '19911130',
204 'timestamp': 691516800,
205 'description': 'md5:1ba2f911a2b0aa398479f595224f2141',
206 },
af1fa623 207 }]
208
209 def _process_lyrics(self, lyrics_info):
210 original = lyrics_info.get('lrc', {}).get('lyric')
211 translated = lyrics_info.get('tlyric', {}).get('lyric')
212
213 if not translated:
214 return original
215
216 lyrics_expr = r'(\[[0-9]{2}:[0-9]{2}\.[0-9]{2,}\])([^\n]+)'
217 original_ts_texts = re.findall(lyrics_expr, original)
02b386f8 218 translation_ts_dict = dict(
219 (time_stamp, text) for time_stamp, text in re.findall(lyrics_expr, translated)
220 )
af1fa623 221 lyrics = '\n'.join([
222 '%s%s / %s' % (time_stamp, text, translation_ts_dict.get(time_stamp, ''))
223 for time_stamp, text in original_ts_texts
224 ])
225 return lyrics
226
227 def _real_extract(self, url):
228 song_id = self._match_id(url)
229
230 params = {
231 'id': song_id,
232 'ids': '[%s]' % song_id
233 }
234 info = self.query_api(
15707c7e 235 'song/detail?' + compat_urllib_parse_urlencode(params),
af1fa623 236 song_id, 'Downloading song info')['songs'][0]
237
238 formats = self.extract_formats(info)
239 self._sort_formats(formats)
240
241 lyrics_info = self.query_api(
15830339 242 'song/lyric?id=%s&lv=-1&tv=-1' % song_id,
af1fa623 243 song_id, 'Downloading lyrics data')
244 lyrics = self._process_lyrics(lyrics_info)
245
246 alt_title = None
02b386f8 247 if info.get('transNames'):
248 alt_title = '/'.join(info.get('transNames'))
af1fa623 249
250 return {
251 'id': song_id,
252 'title': info['name'],
253 'alt_title': alt_title,
254 'creator': ' / '.join([artist['name'] for artist in info.get('artists', [])]),
2da0cad6 255 'timestamp': self.convert_milliseconds(info.get('album', {}).get('publishTime')),
af1fa623 256 'thumbnail': info.get('album', {}).get('picUrl'),
2da0cad6 257 'duration': self.convert_milliseconds(info.get('duration', 0)),
af1fa623 258 'description': lyrics,
259 'formats': formats,
260 }
261
262
263class NetEaseMusicAlbumIE(NetEaseMusicBaseIE):
264 IE_NAME = 'netease:album'
ed848087 265 IE_DESC = '网易云音乐 - 专辑'
af1fa623 266 _VALID_URL = r'https?://music\.163\.com/(#/)?album\?id=(?P<id>[0-9]+)'
267 _TEST = {
268 'url': 'http://music.163.com/#/album?id=220780',
269 'info_dict': {
270 'id': '220780',
271 'title': 'B\'day',
272 },
273 'playlist_count': 23,
75af5d59 274 'skip': 'Blocked outside Mainland China',
af1fa623 275 }
276
277 def _real_extract(self, url):
278 album_id = self._match_id(url)
279
280 info = self.query_api(
15830339 281 'album/%s?id=%s' % (album_id, album_id),
af1fa623 282 album_id, 'Downloading album data')['album']
283
284 name = info['name']
285 desc = info.get('description')
286 entries = [
287 self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
288 'NetEaseMusic', song['id'])
289 for song in info['songs']
290 ]
291 return self.playlist_result(entries, album_id, name, desc)
292
293
294class NetEaseMusicSingerIE(NetEaseMusicBaseIE):
295 IE_NAME = 'netease:singer'
ed848087 296 IE_DESC = '网易云音乐 - 歌手'
af1fa623 297 _VALID_URL = r'https?://music\.163\.com/(#/)?artist\?id=(?P<id>[0-9]+)'
298 _TESTS = [{
299 'note': 'Singer has aliases.',
300 'url': 'http://music.163.com/#/artist?id=10559',
301 'info_dict': {
302 'id': '10559',
303 'title': '张惠妹 - aMEI;阿密特',
304 },
305 'playlist_count': 50,
75af5d59 306 'skip': 'Blocked outside Mainland China',
af1fa623 307 }, {
308 'note': 'Singer has translated name.',
309 'url': 'http://music.163.com/#/artist?id=124098',
310 'info_dict': {
311 'id': '124098',
312 'title': '李昇基 - 이승기',
313 },
314 'playlist_count': 50,
75af5d59 315 'skip': 'Blocked outside Mainland China',
af1fa623 316 }]
317
318 def _real_extract(self, url):
319 singer_id = self._match_id(url)
320
321 info = self.query_api(
15830339 322 'artist/%s?id=%s' % (singer_id, singer_id),
af1fa623 323 singer_id, 'Downloading singer data')
324
325 name = info['artist']['name']
326 if info['artist']['trans']:
327 name = '%s - %s' % (name, info['artist']['trans'])
328 if info['artist']['alias']:
a31e3e7d 329 name = '%s - %s' % (name, ';'.join(info['artist']['alias']))
af1fa623 330
331 entries = [
332 self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
333 'NetEaseMusic', song['id'])
334 for song in info['hotSongs']
335 ]
336 return self.playlist_result(entries, singer_id, name)
337
338
339class NetEaseMusicListIE(NetEaseMusicBaseIE):
340 IE_NAME = 'netease:playlist'
ed848087 341 IE_DESC = '网易云音乐 - 歌单'
af1fa623 342 _VALID_URL = r'https?://music\.163\.com/(#/)?(playlist|discover/toplist)\?id=(?P<id>[0-9]+)'
343 _TESTS = [{
344 'url': 'http://music.163.com/#/playlist?id=79177352',
345 'info_dict': {
346 'id': '79177352',
347 'title': 'Billboard 2007 Top 100',
348 'description': 'md5:12fd0819cab2965b9583ace0f8b7b022'
349 },
350 'playlist_count': 99,
75af5d59 351 'skip': 'Blocked outside Mainland China',
af1fa623 352 }, {
353 'note': 'Toplist/Charts sample',
354 'url': 'http://music.163.com/#/discover/toplist?id=3733003',
355 'info_dict': {
356 'id': '3733003',
357 'title': 're:韩国Melon排行榜周榜 [0-9]{4}-[0-9]{2}-[0-9]{2}',
358 'description': 'md5:73ec782a612711cadc7872d9c1e134fc',
359 },
360 'playlist_count': 50,
75af5d59 361 'skip': 'Blocked outside Mainland China',
af1fa623 362 }]
363
364 def _real_extract(self, url):
365 list_id = self._match_id(url)
366
367 info = self.query_api(
15830339 368 'playlist/detail?id=%s&lv=-1&tv=-1' % list_id,
af1fa623 369 list_id, 'Downloading playlist data')['result']
370
371 name = info['name']
372 desc = info.get('description')
373
374 if info.get('specialType') == 10: # is a chart/toplist
2da0cad6 375 datestamp = datetime.fromtimestamp(
376 self.convert_milliseconds(info['updateTime'])).strftime('%Y-%m-%d')
af1fa623 377 name = '%s %s' % (name, datestamp)
378
379 entries = [
380 self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
381 'NetEaseMusic', song['id'])
382 for song in info['tracks']
383 ]
384 return self.playlist_result(entries, list_id, name, desc)
385
386
387class NetEaseMusicMvIE(NetEaseMusicBaseIE):
388 IE_NAME = 'netease:mv'
ed848087 389 IE_DESC = '网易云音乐 - MV'
af1fa623 390 _VALID_URL = r'https?://music\.163\.com/(#/)?mv\?id=(?P<id>[0-9]+)'
391 _TEST = {
392 'url': 'http://music.163.com/#/mv?id=415350',
393 'info_dict': {
394 'id': '415350',
395 'ext': 'mp4',
396 'title': '이럴거면 그러지말지',
397 'description': '白雅言自作曲唱甜蜜爱情',
398 'creator': '白雅言',
399 'upload_date': '20150520',
400 },
75af5d59 401 'skip': 'Blocked outside Mainland China',
af1fa623 402 }
403
404 def _real_extract(self, url):
405 mv_id = self._match_id(url)
406
407 info = self.query_api(
408 'mv/detail?id=%s&type=mp4' % mv_id,
409 mv_id, 'Downloading mv info')['data']
410
411 formats = [
02b386f8 412 {'url': mv_url, 'ext': 'mp4', 'format_id': '%sp' % brs, 'height': int(brs)}
af1fa623 413 for brs, mv_url in info['brs'].items()
414 ]
415 self._sort_formats(formats)
416
417 return {
418 'id': mv_id,
419 'title': info['name'],
420 'description': info.get('desc') or info.get('briefDesc'),
421 'creator': info['artistName'],
422 'upload_date': info['publishTime'].replace('-', ''),
423 'formats': formats,
424 'thumbnail': info.get('cover'),
2da0cad6 425 'duration': self.convert_milliseconds(info.get('duration', 0)),
af1fa623 426 }
427
428
429class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
430 IE_NAME = 'netease:program'
ed848087 431 IE_DESC = '网易云音乐 - 电台节目'
af1fa623 432 _VALID_URL = r'https?://music\.163\.com/(#/?)program\?id=(?P<id>[0-9]+)'
433 _TESTS = [{
434 'url': 'http://music.163.com/#/program?id=10109055',
435 'info_dict': {
436 'id': '10109055',
437 'ext': 'mp3',
438 'title': '不丹足球背后的故事',
439 'description': '喜马拉雅人的足球梦 ...',
440 'creator': '大话西藏',
2da0cad6 441 'timestamp': 1434179342,
af1fa623 442 'upload_date': '20150613',
443 'duration': 900,
444 },
75af5d59 445 'skip': 'Blocked outside Mainland China',
af1fa623 446 }, {
447 'note': 'This program has accompanying songs.',
448 'url': 'http://music.163.com/#/program?id=10141022',
449 'info_dict': {
450 'id': '10141022',
451 'title': '25岁,你是自在如风的少年<27°C>',
452 'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
453 },
454 'playlist_count': 4,
75af5d59 455 'skip': 'Blocked outside Mainland China',
af1fa623 456 }, {
457 'note': 'This program has accompanying songs.',
458 'url': 'http://music.163.com/#/program?id=10141022',
459 'info_dict': {
460 'id': '10141022',
461 'ext': 'mp3',
462 'title': '25岁,你是自在如风的少年<27°C>',
463 'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
2da0cad6 464 'timestamp': 1434450841,
af1fa623 465 'upload_date': '20150616',
466 },
467 'params': {
468 'noplaylist': True
75af5d59
YCH
469 },
470 'skip': 'Blocked outside Mainland China',
af1fa623 471 }]
472
473 def _real_extract(self, url):
474 program_id = self._match_id(url)
475
476 info = self.query_api(
477 'dj/program/detail?id=%s' % program_id,
478 program_id, 'Downloading program info')['program']
479
480 name = info['name']
481 description = info['description']
482
f40ee5e9 483 if not self._yes_playlist(info['songs'] and program_id, info['mainSong']['id']):
af1fa623 484 formats = self.extract_formats(info['mainSong'])
485 self._sort_formats(formats)
486
487 return {
f40ee5e9 488 'id': info['mainSong']['id'],
af1fa623 489 'title': name,
490 'description': description,
491 'creator': info['dj']['brand'],
2da0cad6 492 'timestamp': self.convert_milliseconds(info['createTime']),
af1fa623 493 'thumbnail': info['coverUrl'],
2da0cad6 494 'duration': self.convert_milliseconds(info.get('duration', 0)),
af1fa623 495 'formats': formats,
496 }
497
af1fa623 498 song_ids = [info['mainSong']['id']]
499 song_ids.extend([song['id'] for song in info['songs']])
500 entries = [
501 self.url_result('http://music.163.com/#/song?id=%s' % song_id,
502 'NetEaseMusic', song_id)
503 for song_id in song_ids
504 ]
505 return self.playlist_result(entries, program_id, name, description)
506
507
508class NetEaseMusicDjRadioIE(NetEaseMusicBaseIE):
509 IE_NAME = 'netease:djradio'
ed848087 510 IE_DESC = '网易云音乐 - 电台'
af1fa623 511 _VALID_URL = r'https?://music\.163\.com/(#/)?djradio\?id=(?P<id>[0-9]+)'
512 _TEST = {
513 'url': 'http://music.163.com/#/djradio?id=42',
514 'info_dict': {
515 'id': '42',
516 'title': '声音蔓延',
517 'description': 'md5:766220985cbd16fdd552f64c578a6b15'
518 },
519 'playlist_mincount': 40,
75af5d59 520 'skip': 'Blocked outside Mainland China',
af1fa623 521 }
522 _PAGE_SIZE = 1000
523
524 def _real_extract(self, url):
525 dj_id = self._match_id(url)
526
527 name = None
528 desc = None
529 entries = []
ac668111 530 for offset in itertools.count(start=0, step=self._PAGE_SIZE):
af1fa623 531 info = self.query_api(
15830339 532 'dj/program/byradio?asc=false&limit=%d&radioId=%s&offset=%d'
af1fa623 533 % (self._PAGE_SIZE, dj_id, offset),
534 dj_id, 'Downloading dj programs - %d' % offset)
535
536 entries.extend([
537 self.url_result(
538 'http://music.163.com/#/program?id=%s' % program['id'],
539 'NetEaseMusicProgram', program['id'])
540 for program in info['programs']
541 ])
542
543 if name is None:
544 radio = info['programs'][0]['radio']
545 name = radio['name']
546 desc = radio['desc']
547
548 if not info['more']:
549 break
550
551 return self.playlist_result(entries, dj_id, name, desc)