]> jfr.im git - yt-dlp.git/blame_incremental - yt_dlp/extractor/neteasemusic.py
[cleanup] Lint and misc cleanup
[yt-dlp.git] / yt_dlp / extractor / neteasemusic.py
... / ...
CommitLineData
1import itertools
2import json
3import re
4import time
5from base64 import b64encode
6from binascii import hexlify
7from datetime import datetime
8from hashlib import md5
9from random import randint
10
11from .common import InfoExtractor
12from ..aes import aes_ecb_encrypt, pkcs7_padding
13from ..compat import compat_urllib_parse_urlencode
14from ..utils import (
15 ExtractorError,
16 bytes_to_intlist,
17 error_to_compat_str,
18 float_or_none,
19 int_or_none,
20 intlist_to_bytes,
21 sanitized_Request,
22 try_get,
23)
24
25
26class NetEaseMusicBaseIE(InfoExtractor):
27 _FORMATS = ['bMusic', 'mMusic', 'hMusic']
28 _NETEASE_SALT = '3go8&$8*3*3h0k(2)2'
29 _API_BASE = 'http://music.163.com/api/'
30
31 @classmethod
32 def _encrypt(cls, dfsid):
33 salt_bytes = bytearray(cls._NETEASE_SALT.encode('utf-8'))
34 string_bytes = bytearray(str(dfsid).encode('ascii'))
35 salt_len = len(salt_bytes)
36 for i in range(len(string_bytes)):
37 string_bytes[i] = string_bytes[i] ^ salt_bytes[i % salt_len]
38 m = md5()
39 m.update(bytes(string_bytes))
40 result = b64encode(m.digest()).decode('ascii')
41 return result.replace('/', '_').replace('+', '-')
42
43 def make_player_api_request_data_and_headers(self, song_id, bitrate):
44 KEY = b'e82ckenh8dichen8'
45 URL = '/api/song/enhance/player/url'
46 now = int(time.time() * 1000)
47 rand = randint(0, 1000)
48 cookie = {
49 'osver': None,
50 'deviceId': None,
51 'appver': '8.0.0',
52 'versioncode': '140',
53 'mobilename': None,
54 'buildver': '1623435496',
55 'resolution': '1920x1080',
56 '__csrf': '',
57 'os': 'pc',
58 'channel': None,
59 'requestId': '{0}_{1:04}'.format(now, rand),
60 }
61 request_text = json.dumps(
62 {'ids': '[{0}]'.format(song_id), 'br': bitrate, 'header': cookie},
63 separators=(',', ':'))
64 message = 'nobody{0}use{1}md5forencrypt'.format(
65 URL, request_text).encode('latin1')
66 msg_digest = md5(message).hexdigest()
67
68 data = '{0}-36cd479b6b5-{1}-36cd479b6b5-{2}'.format(
69 URL, request_text, msg_digest)
70 data = pkcs7_padding(bytes_to_intlist(data))
71 encrypted = intlist_to_bytes(aes_ecb_encrypt(data, bytes_to_intlist(KEY)))
72 encrypted_params = hexlify(encrypted).decode('ascii').upper()
73
74 cookie = '; '.join(
75 ['{0}={1}'.format(k, v if v is not None else 'undefined')
76 for [k, v] in cookie.items()])
77
78 headers = {
79 'User-Agent': self.extractor.get_param('http_headers')['User-Agent'],
80 'Content-Type': 'application/x-www-form-urlencoded',
81 'Referer': 'https://music.163.com',
82 'Cookie': cookie,
83 }
84 return ('params={0}'.format(encrypted_params), headers)
85
86 def _call_player_api(self, song_id, bitrate):
87 url = 'https://interface3.music.163.com/eapi/song/enhance/player/url'
88 data, headers = self.make_player_api_request_data_and_headers(song_id, bitrate)
89 try:
90 msg = 'empty result'
91 result = self._download_json(
92 url, song_id, data=data.encode('ascii'), headers=headers)
93 if result:
94 return result
95 except ExtractorError as e:
96 if type(e.cause) in (ValueError, TypeError):
97 # JSON load failure
98 raise
99 except Exception as e:
100 msg = error_to_compat_str(e)
101 self.report_warning('%s API call (%s) failed: %s' % (
102 song_id, bitrate, msg))
103 return {}
104
105 def extract_formats(self, info):
106 err = 0
107 formats = []
108 song_id = info['id']
109 for song_format in self._FORMATS:
110 details = info.get(song_format)
111 if not details:
112 continue
113
114 bitrate = int_or_none(details.get('bitrate')) or 999000
115 data = self._call_player_api(song_id, bitrate)
116 for song in try_get(data, lambda x: x['data'], list) or []:
117 song_url = try_get(song, lambda x: x['url'])
118 if not song_url:
119 continue
120 if self._is_valid_url(song_url, info['id'], 'song'):
121 formats.append({
122 'url': song_url,
123 'ext': details.get('extension'),
124 'abr': float_or_none(song.get('br'), scale=1000),
125 'format_id': song_format,
126 'filesize': int_or_none(song.get('size')),
127 'asr': int_or_none(details.get('sr')),
128 })
129 elif err == 0:
130 err = try_get(song, lambda x: x['code'], int)
131
132 if not formats:
133 msg = 'No media links found'
134 if err != 0 and (err < 200 or err >= 400):
135 raise ExtractorError(
136 '%s (site code %d)' % (msg, err, ), expected=True)
137 else:
138 self.raise_geo_restricted(
139 msg + ': probably this video is not available from your location due to geo restriction.',
140 countries=['CN'])
141
142 return formats
143
144 @classmethod
145 def convert_milliseconds(cls, ms):
146 return int(round(ms / 1000.0))
147
148 def query_api(self, endpoint, video_id, note):
149 req = sanitized_Request('%s%s' % (self._API_BASE, endpoint))
150 req.add_header('Referer', self._API_BASE)
151 return self._download_json(req, video_id, note)
152
153
154class NetEaseMusicIE(NetEaseMusicBaseIE):
155 IE_NAME = 'netease:song'
156 IE_DESC = '网易云音乐'
157 _VALID_URL = r'https?://(y\.)?music\.163\.com/(?:[#m]/)?song\?.*?\bid=(?P<id>[0-9]+)'
158 _TESTS = [{
159 'url': 'http://music.163.com/#/song?id=32102397',
160 'md5': '3e909614ce09b1ccef4a3eb205441190',
161 'info_dict': {
162 'id': '32102397',
163 'ext': 'mp3',
164 'title': 'Bad Blood',
165 'creator': 'Taylor Swift / Kendrick Lamar',
166 'upload_date': '20150516',
167 'timestamp': 1431792000,
168 'description': 'md5:25fc5f27e47aad975aa6d36382c7833c',
169 },
170 }, {
171 'note': 'No lyrics.',
172 'url': 'http://music.163.com/song?id=17241424',
173 'info_dict': {
174 'id': '17241424',
175 'ext': 'mp3',
176 'title': 'Opus 28',
177 'creator': 'Dustin O\'Halloran',
178 'upload_date': '20080211',
179 'description': 'md5:f12945b0f6e0365e3b73c5032e1b0ff4',
180 'timestamp': 1202745600,
181 },
182 }, {
183 'note': 'Has translated name.',
184 'url': 'http://music.163.com/#/song?id=22735043',
185 'info_dict': {
186 'id': '22735043',
187 'ext': 'mp3',
188 'title': '소원을 말해봐 (Genie)',
189 'creator': '少女时代',
190 'description': 'md5:79d99cc560e4ca97e0c4d86800ee4184',
191 'upload_date': '20100127',
192 'timestamp': 1264608000,
193 'alt_title': '说出愿望吧(Genie)',
194 },
195 }, {
196 'url': 'https://y.music.163.com/m/song?app_version=8.8.45&id=95670&uct2=sKnvS4+0YStsWkqsPhFijw%3D%3D&dlt=0846',
197 'md5': '95826c73ea50b1c288b22180ec9e754d',
198 'info_dict': {
199 'id': '95670',
200 'ext': 'mp3',
201 'title': '国际歌',
202 'creator': '马备',
203 'upload_date': '19911130',
204 'timestamp': 691516800,
205 'description': 'md5:1ba2f911a2b0aa398479f595224f2141',
206 },
207 }]
208
209 def _process_lyrics(self, lyrics_info):
210 original = lyrics_info.get('lrc', {}).get('lyric')
211 translated = lyrics_info.get('tlyric', {}).get('lyric')
212
213 if not translated:
214 return original
215
216 lyrics_expr = r'(\[[0-9]{2}:[0-9]{2}\.[0-9]{2,}\])([^\n]+)'
217 original_ts_texts = re.findall(lyrics_expr, original)
218 translation_ts_dict = dict(
219 (time_stamp, text) for time_stamp, text in re.findall(lyrics_expr, translated)
220 )
221 lyrics = '\n'.join([
222 '%s%s / %s' % (time_stamp, text, translation_ts_dict.get(time_stamp, ''))
223 for time_stamp, text in original_ts_texts
224 ])
225 return lyrics
226
227 def _real_extract(self, url):
228 song_id = self._match_id(url)
229
230 params = {
231 'id': song_id,
232 'ids': '[%s]' % song_id
233 }
234 info = self.query_api(
235 'song/detail?' + compat_urllib_parse_urlencode(params),
236 song_id, 'Downloading song info')['songs'][0]
237
238 formats = self.extract_formats(info)
239 self._sort_formats(formats)
240
241 lyrics_info = self.query_api(
242 'song/lyric?id=%s&lv=-1&tv=-1' % song_id,
243 song_id, 'Downloading lyrics data')
244 lyrics = self._process_lyrics(lyrics_info)
245
246 alt_title = None
247 if info.get('transNames'):
248 alt_title = '/'.join(info.get('transNames'))
249
250 return {
251 'id': song_id,
252 'title': info['name'],
253 'alt_title': alt_title,
254 'creator': ' / '.join([artist['name'] for artist in info.get('artists', [])]),
255 'timestamp': self.convert_milliseconds(info.get('album', {}).get('publishTime')),
256 'thumbnail': info.get('album', {}).get('picUrl'),
257 'duration': self.convert_milliseconds(info.get('duration', 0)),
258 'description': lyrics,
259 'formats': formats,
260 }
261
262
263class NetEaseMusicAlbumIE(NetEaseMusicBaseIE):
264 IE_NAME = 'netease:album'
265 IE_DESC = '网易云音乐 - 专辑'
266 _VALID_URL = r'https?://music\.163\.com/(#/)?album\?id=(?P<id>[0-9]+)'
267 _TEST = {
268 'url': 'http://music.163.com/#/album?id=220780',
269 'info_dict': {
270 'id': '220780',
271 'title': 'B\'day',
272 },
273 'playlist_count': 23,
274 'skip': 'Blocked outside Mainland China',
275 }
276
277 def _real_extract(self, url):
278 album_id = self._match_id(url)
279
280 info = self.query_api(
281 'album/%s?id=%s' % (album_id, album_id),
282 album_id, 'Downloading album data')['album']
283
284 name = info['name']
285 desc = info.get('description')
286 entries = [
287 self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
288 'NetEaseMusic', song['id'])
289 for song in info['songs']
290 ]
291 return self.playlist_result(entries, album_id, name, desc)
292
293
294class NetEaseMusicSingerIE(NetEaseMusicBaseIE):
295 IE_NAME = 'netease:singer'
296 IE_DESC = '网易云音乐 - 歌手'
297 _VALID_URL = r'https?://music\.163\.com/(#/)?artist\?id=(?P<id>[0-9]+)'
298 _TESTS = [{
299 'note': 'Singer has aliases.',
300 'url': 'http://music.163.com/#/artist?id=10559',
301 'info_dict': {
302 'id': '10559',
303 'title': '张惠妹 - aMEI;阿密特',
304 },
305 'playlist_count': 50,
306 'skip': 'Blocked outside Mainland China',
307 }, {
308 'note': 'Singer has translated name.',
309 'url': 'http://music.163.com/#/artist?id=124098',
310 'info_dict': {
311 'id': '124098',
312 'title': '李昇基 - 이승기',
313 },
314 'playlist_count': 50,
315 'skip': 'Blocked outside Mainland China',
316 }]
317
318 def _real_extract(self, url):
319 singer_id = self._match_id(url)
320
321 info = self.query_api(
322 'artist/%s?id=%s' % (singer_id, singer_id),
323 singer_id, 'Downloading singer data')
324
325 name = info['artist']['name']
326 if info['artist']['trans']:
327 name = '%s - %s' % (name, info['artist']['trans'])
328 if info['artist']['alias']:
329 name = '%s - %s' % (name, ';'.join(info['artist']['alias']))
330
331 entries = [
332 self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
333 'NetEaseMusic', song['id'])
334 for song in info['hotSongs']
335 ]
336 return self.playlist_result(entries, singer_id, name)
337
338
339class NetEaseMusicListIE(NetEaseMusicBaseIE):
340 IE_NAME = 'netease:playlist'
341 IE_DESC = '网易云音乐 - 歌单'
342 _VALID_URL = r'https?://music\.163\.com/(#/)?(playlist|discover/toplist)\?id=(?P<id>[0-9]+)'
343 _TESTS = [{
344 'url': 'http://music.163.com/#/playlist?id=79177352',
345 'info_dict': {
346 'id': '79177352',
347 'title': 'Billboard 2007 Top 100',
348 'description': 'md5:12fd0819cab2965b9583ace0f8b7b022'
349 },
350 'playlist_count': 99,
351 'skip': 'Blocked outside Mainland China',
352 }, {
353 'note': 'Toplist/Charts sample',
354 'url': 'http://music.163.com/#/discover/toplist?id=3733003',
355 'info_dict': {
356 'id': '3733003',
357 'title': 're:韩国Melon排行榜周榜 [0-9]{4}-[0-9]{2}-[0-9]{2}',
358 'description': 'md5:73ec782a612711cadc7872d9c1e134fc',
359 },
360 'playlist_count': 50,
361 'skip': 'Blocked outside Mainland China',
362 }]
363
364 def _real_extract(self, url):
365 list_id = self._match_id(url)
366
367 info = self.query_api(
368 'playlist/detail?id=%s&lv=-1&tv=-1' % list_id,
369 list_id, 'Downloading playlist data')['result']
370
371 name = info['name']
372 desc = info.get('description')
373
374 if info.get('specialType') == 10: # is a chart/toplist
375 datestamp = datetime.fromtimestamp(
376 self.convert_milliseconds(info['updateTime'])).strftime('%Y-%m-%d')
377 name = '%s %s' % (name, datestamp)
378
379 entries = [
380 self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
381 'NetEaseMusic', song['id'])
382 for song in info['tracks']
383 ]
384 return self.playlist_result(entries, list_id, name, desc)
385
386
387class NetEaseMusicMvIE(NetEaseMusicBaseIE):
388 IE_NAME = 'netease:mv'
389 IE_DESC = '网易云音乐 - MV'
390 _VALID_URL = r'https?://music\.163\.com/(#/)?mv\?id=(?P<id>[0-9]+)'
391 _TEST = {
392 'url': 'http://music.163.com/#/mv?id=415350',
393 'info_dict': {
394 'id': '415350',
395 'ext': 'mp4',
396 'title': '이럴거면 그러지말지',
397 'description': '白雅言自作曲唱甜蜜爱情',
398 'creator': '白雅言',
399 'upload_date': '20150520',
400 },
401 'skip': 'Blocked outside Mainland China',
402 }
403
404 def _real_extract(self, url):
405 mv_id = self._match_id(url)
406
407 info = self.query_api(
408 'mv/detail?id=%s&type=mp4' % mv_id,
409 mv_id, 'Downloading mv info')['data']
410
411 formats = [
412 {'url': mv_url, 'ext': 'mp4', 'format_id': '%sp' % brs, 'height': int(brs)}
413 for brs, mv_url in info['brs'].items()
414 ]
415 self._sort_formats(formats)
416
417 return {
418 'id': mv_id,
419 'title': info['name'],
420 'description': info.get('desc') or info.get('briefDesc'),
421 'creator': info['artistName'],
422 'upload_date': info['publishTime'].replace('-', ''),
423 'formats': formats,
424 'thumbnail': info.get('cover'),
425 'duration': self.convert_milliseconds(info.get('duration', 0)),
426 }
427
428
429class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
430 IE_NAME = 'netease:program'
431 IE_DESC = '网易云音乐 - 电台节目'
432 _VALID_URL = r'https?://music\.163\.com/(#/?)program\?id=(?P<id>[0-9]+)'
433 _TESTS = [{
434 'url': 'http://music.163.com/#/program?id=10109055',
435 'info_dict': {
436 'id': '10109055',
437 'ext': 'mp3',
438 'title': '不丹足球背后的故事',
439 'description': '喜马拉雅人的足球梦 ...',
440 'creator': '大话西藏',
441 'timestamp': 1434179342,
442 'upload_date': '20150613',
443 'duration': 900,
444 },
445 'skip': 'Blocked outside Mainland China',
446 }, {
447 'note': 'This program has accompanying songs.',
448 'url': 'http://music.163.com/#/program?id=10141022',
449 'info_dict': {
450 'id': '10141022',
451 'title': '25岁,你是自在如风的少年<27°C>',
452 'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
453 },
454 'playlist_count': 4,
455 'skip': 'Blocked outside Mainland China',
456 }, {
457 'note': 'This program has accompanying songs.',
458 'url': 'http://music.163.com/#/program?id=10141022',
459 'info_dict': {
460 'id': '10141022',
461 'ext': 'mp3',
462 'title': '25岁,你是自在如风的少年<27°C>',
463 'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
464 'timestamp': 1434450841,
465 'upload_date': '20150616',
466 },
467 'params': {
468 'noplaylist': True
469 },
470 'skip': 'Blocked outside Mainland China',
471 }]
472
473 def _real_extract(self, url):
474 program_id = self._match_id(url)
475
476 info = self.query_api(
477 'dj/program/detail?id=%s' % program_id,
478 program_id, 'Downloading program info')['program']
479
480 name = info['name']
481 description = info['description']
482
483 if not self._yes_playlist(info['songs'] and program_id, info['mainSong']['id']):
484 formats = self.extract_formats(info['mainSong'])
485 self._sort_formats(formats)
486
487 return {
488 'id': info['mainSong']['id'],
489 'title': name,
490 'description': description,
491 'creator': info['dj']['brand'],
492 'timestamp': self.convert_milliseconds(info['createTime']),
493 'thumbnail': info['coverUrl'],
494 'duration': self.convert_milliseconds(info.get('duration', 0)),
495 'formats': formats,
496 }
497
498 song_ids = [info['mainSong']['id']]
499 song_ids.extend([song['id'] for song in info['songs']])
500 entries = [
501 self.url_result('http://music.163.com/#/song?id=%s' % song_id,
502 'NetEaseMusic', song_id)
503 for song_id in song_ids
504 ]
505 return self.playlist_result(entries, program_id, name, description)
506
507
508class NetEaseMusicDjRadioIE(NetEaseMusicBaseIE):
509 IE_NAME = 'netease:djradio'
510 IE_DESC = '网易云音乐 - 电台'
511 _VALID_URL = r'https?://music\.163\.com/(#/)?djradio\?id=(?P<id>[0-9]+)'
512 _TEST = {
513 'url': 'http://music.163.com/#/djradio?id=42',
514 'info_dict': {
515 'id': '42',
516 'title': '声音蔓延',
517 'description': 'md5:766220985cbd16fdd552f64c578a6b15'
518 },
519 'playlist_mincount': 40,
520 'skip': 'Blocked outside Mainland China',
521 }
522 _PAGE_SIZE = 1000
523
524 def _real_extract(self, url):
525 dj_id = self._match_id(url)
526
527 name = None
528 desc = None
529 entries = []
530 for offset in itertools.count(start=0, step=self._PAGE_SIZE):
531 info = self.query_api(
532 'dj/program/byradio?asc=false&limit=%d&radioId=%s&offset=%d'
533 % (self._PAGE_SIZE, dj_id, offset),
534 dj_id, 'Downloading dj programs - %d' % offset)
535
536 entries.extend([
537 self.url_result(
538 'http://music.163.com/#/program?id=%s' % program['id'],
539 'NetEaseMusicProgram', program['id'])
540 for program in info['programs']
541 ])
542
543 if name is None:
544 radio = info['programs'][0]['radio']
545 name = radio['name']
546 desc = radio['desc']
547
548 if not info['more']:
549 break
550
551 return self.playlist_result(entries, dj_id, name, desc)