jfr.im git - yt-dlp.git/blame_incremental - youtube

... / ...

Commit	Line	Data
	1	# coding: utf-8
	2	from __future__ import unicode_literals
	3
	4	import random
	5	import time
	6	import re
	7
	8	from .common import InfoExtractor
	9	from ..utils import (
	10	strip_jsonp,
	11	unescapeHTML,
	12	clean_html,
	13	)
	14	from ..compat import compat_urllib_request
	15
	16
	17	class QQMusicIE(InfoExtractor):
	18	IE_NAME = 'qqmusic'
	19	IE_DESC = 'QQ音乐'
	20	_VALID_URL = r'http://y.qq.com/#type=song&mid=(?P<id>[0-9A-Za-z]+)'
	21	_TESTS = [{
	22	'url': 'http://y.qq.com/#type=song&mid=004295Et37taLD',
	23	'md5': '9ce1c1c8445f561506d2e3cfb0255705',
	24	'info_dict': {
	25	'id': '004295Et37taLD',
	26	'ext': 'mp3',
	27	'title': '可惜没如果',
	28	'upload_date': '20141227',
	29	'creator': '林俊杰',
	30	'description': 'md5:d327722d0361576fde558f1ac68a7065',
	31	'thumbnail': 're:^https?://.*\.jpg$',
	32	}
	33	}, {
	34	'note': 'There is no mp3-320 version of this song.',
	35	'url': 'http://y.qq.com/#type=song&mid=004MsGEo3DdNxV',
	36	'md5': 'fa3926f0c585cda0af8fa4f796482e3e',
	37	'info_dict': {
	38	'id': '004MsGEo3DdNxV',
	39	'ext': 'mp3',
	40	'title': '如果',
	41	'upload_date': '20050626',
	42	'creator': '李季美',
	43	'description': 'md5:46857d5ed62bc4ba84607a805dccf437',
	44	'thumbnail': 're:^https?://.*\.jpg$',
	45	}
	46	}]
	47
	48	_FORMATS = {
	49	'mp3-320': {'prefix': 'M800', 'ext': 'mp3', 'preference': 40, 'abr': 320},
	50	'mp3-128': {'prefix': 'M500', 'ext': 'mp3', 'preference': 30, 'abr': 128},
	51	'm4a': {'prefix': 'C200', 'ext': 'm4a', 'preference': 10}
	52	}
	53
	54	# Reference: m_r_GetRUin() in top_player.js
	55	# http://imgcache.gtimg.cn/music/portal_v3/y/top_player.js
	56	@staticmethod
	57	def m_r_get_ruin():
	58	curMs = int(time.time() * 1000) % 1000
	59	return int(round(random.random() * 2147483647) * curMs % 1E10)
	60
	61	def _real_extract(self, url):
	62	mid = self._match_id(url)
	63
	64	detail_info_page = self._download_webpage(
	65	'http://s.plcloud.music.qq.com/fcgi-bin/fcg_yqq_song_detail_info.fcg?songmid=%s&play=0' % mid,
	66	mid, note='Download song detail info',
	67	errnote='Unable to get song detail info', encoding='gbk')
	68
	69	song_name = self._html_search_regex(
	70	r"songname:\s*'([^']+)'", detail_info_page, 'song name')
	71
	72	publish_time = self._html_search_regex(
	73	r'发行时间：(\d{4}-\d{2}-\d{2})', detail_info_page,
	74	'publish time', default=None)
	75	if publish_time:
	76	publish_time = publish_time.replace('-', '')
	77
	78	singer = self._html_search_regex(
	79	r"singer:\s*'([^']+)", detail_info_page, 'singer', default=None)
	80
	81	lrc_content = self._html_search_regex(
	82	r'<div class="content" id="lrc_content"[^<>]*>([^<>]+)</div>',
	83	detail_info_page, 'LRC lyrics', default=None)
	84	if lrc_content:
	85	lrc_content = lrc_content.replace('\\n', '\n')
	86
	87	thumbnail_url = None
	88	albummid = self._search_regex(
	89	[r'albummid:\'([0-9a-zA-Z]+)\'', r'"albummid":"([0-9a-zA-Z]+)"'],
	90	detail_info_page, 'album mid', default=None)
	91	if albummid:
	92	thumbnail_url = "http://i.gtimg.cn/music/photo/mid_album_500/%s/%s/%s.jpg" \
	93	% (albummid[-2:-1], albummid[-1], albummid)
	94
	95	guid = self.m_r_get_ruin()
	96
	97	vkey = self._download_json(
	98	'http://base.music.qq.com/fcgi-bin/fcg_musicexpress.fcg?json=3&guid=%s' % guid,
	99	mid, note='Retrieve vkey', errnote='Unable to get vkey',
	100	transform_source=strip_jsonp)['key']
	101
	102	formats = []
	103	for format_id, details in self._FORMATS.items():
	104	formats.append({
	105	'url': 'http://cc.stream.qqmusic.qq.com/%s%s.%s?vkey=%s&guid=%s&fromtag=0'
	106	% (details['prefix'], mid, details['ext'], vkey, guid),
	107	'format': format_id,
	108	'format_id': format_id,
	109	'preference': details['preference'],
	110	'abr': details.get('abr'),
	111	})
	112	self._check_formats(formats, mid)
	113	self._sort_formats(formats)
	114
	115	return {
	116	'id': mid,
	117	'formats': formats,
	118	'title': song_name,
	119	'upload_date': publish_time,
	120	'creator': singer,
	121	'description': lrc_content,
	122	'thumbnail': thumbnail_url,
	123	}
	124
	125
	126	class QQPlaylistBaseIE(InfoExtractor):
	127	@staticmethod
	128	def qq_static_url(category, mid):
	129	return 'http://y.qq.com/y/static/%s/%s/%s/%s.html' % (category, mid[-2], mid[-1], mid)
	130
	131	@classmethod
	132	def get_entries_from_page(cls, page):
	133	entries = []
	134
	135	for item in re.findall(r'class="data"[^<>]*>([^<>]+)</', page):
	136	song_mid = unescapeHTML(item).split('\|')[-5]
	137	entries.append(cls.url_result(
	138	'http://y.qq.com/#type=song&mid=' + song_mid, 'QQMusic',
	139	song_mid))
	140
	141	return entries
	142
	143
	144	class QQMusicSingerIE(QQPlaylistBaseIE):
	145	IE_NAME = 'qqmusic:singer'
	146	IE_DESC = 'QQ音乐 - 歌手'
	147	_VALID_URL = r'http://y.qq.com/#type=singer&mid=(?P<id>[0-9A-Za-z]+)'
	148	_TEST = {
	149	'url': 'http://y.qq.com/#type=singer&mid=001BLpXF2DyJe2',
	150	'info_dict': {
	151	'id': '001BLpXF2DyJe2',
	152	'title': '林俊杰',
	153	'description': 'md5:2a222d89ba4455a3af19940c0481bb78',
	154	},
	155	'playlist_count': 12,
	156	}
	157
	158	def _real_extract(self, url):
	159	mid = self._match_id(url)
	160
	161	singer_page = self._download_webpage(
	162	self.qq_static_url('singer', mid), mid, 'Download singer page')
	163
	164	entries = self.get_entries_from_page(singer_page)
	165
	166	singer_name = self._html_search_regex(
	167	r"singername\s:\s'([^']+)'", singer_page, 'singer name',
	168	default=None)
	169
	170	singer_id = self._html_search_regex(
	171	r"singerid\s:\s'([0-9]+)'", singer_page, 'singer id',
	172	default=None)
	173
	174	singer_desc = None
	175
	176	if singer_id:
	177	req = compat_urllib_request.Request(
	178	'http://s.plcloud.music.qq.com/fcgi-bin/fcg_get_singer_desc.fcg?utf8=1&outCharset=utf-8&format=xml&singerid=%s' % singer_id)
	179	req.add_header(
	180	'Referer', 'http://s.plcloud.music.qq.com/xhr_proxy_utf8.html')
	181	singer_desc_page = self._download_xml(
	182	req, mid, 'Donwload singer description XML')
	183
	184	singer_desc = singer_desc_page.find('./data/info/desc').text
	185
	186	return self.playlist_result(entries, mid, singer_name, singer_desc)
	187
	188
	189	class QQMusicAlbumIE(QQPlaylistBaseIE):
	190	IE_NAME = 'qqmusic:album'
	191	IE_DESC = 'QQ音乐 - 专辑'
	192	_VALID_URL = r'http://y.qq.com/#type=album&mid=(?P<id>[0-9A-Za-z]+)'
	193
	194	_TESTS = [{
	195	'url': 'http://y.qq.com/#type=album&mid=000gXCTb2AhRR1',
	196	'info_dict': {
	197	'id': '000gXCTb2AhRR1',
	198	'title': '我们都是这样长大的',
	199	'description': 'md5:179c5dce203a5931970d306aa9607ea6',
	200	},
	201	'playlist_count': 4,
	202	}, {
	203	'url': 'http://y.qq.com/#type=album&mid=002Y5a3b3AlCu3',
	204	'info_dict': {
	205	'id': '002Y5a3b3AlCu3',
	206	'title': '그리고...',
	207	'description': 'md5:a48823755615508a95080e81b51ba729',
	208	},
	209	'playlist_count': 8,
	210	}]
	211
	212	def _real_extract(self, url):
	213	mid = self._match_id(url)
	214
	215	album = self._download_json(
	216	'http://i.y.qq.com/v8/fcg-bin/fcg_v8_album_info_cp.fcg?albummid=%s&format=json' % mid,
	217	mid, 'Download album page')['data']
	218
	219	entries = [
	220	self.url_result(
	221	'http://y.qq.com/#type=song&mid=' + song['songmid'], 'QQMusic', song['songmid']
	222	) for song in album['list']
	223	]
	224	album_name = album.get('name')
	225	album_detail = album.get('desc')
	226	if album_detail is not None:
	227	album_detail = album_detail.strip()
	228
	229	return self.playlist_result(entries, mid, album_name, album_detail)
	230
	231
	232	class QQMusicToplistIE(QQPlaylistBaseIE):
	233	IE_NAME = 'qqmusic:toplist'
	234	IE_DESC = 'QQ音乐 - 排行榜'
	235	_VALID_URL = r'http://y\.qq\.com/#type=toplist&p=(?P<id>(top\|global)_[0-9]+)'
	236
	237	_TESTS = [{
	238	'url': 'http://y.qq.com/#type=toplist&p=global_123',
	239	'info_dict': {
	240	'id': 'global_123',
	241	'title': '美国iTunes榜',
	242	},
	243	'playlist_count': 10,
	244	}, {
	245	'url': 'http://y.qq.com/#type=toplist&p=top_3',
	246	'info_dict': {
	247	'id': 'top_3',
	248	'title': 'QQ音乐巅峰榜·欧美',
	249	'description': 'QQ音乐巅峰榜·欧美根据用户收听行为自动生成，集结当下最流行的欧美新歌！:更新时间：每周四22点\|统'
	250	'计周期：一周（上周四至本周三）\|统计对象：三个月内发行的欧美歌曲\|统计数量：100首\|统计算法：根据'
	251	'歌曲在一周内的有效播放次数，由高到低取前100名（同一歌手最多允许5首歌曲同时上榜）\|有效播放次数：'
	252	'登录用户完整播放一首歌曲，记为一次有效播放；同一用户收听同一首歌曲，每天记录为1次有效播放'
	253	},
	254	'playlist_count': 100,
	255	}, {
	256	'url': 'http://y.qq.com/#type=toplist&p=global_106',
	257	'info_dict': {
	258	'id': 'global_106',
	259	'title': '韩国Mnet榜',
	260	},
	261	'playlist_count': 50,
	262	}]
	263
	264	def _real_extract(self, url):
	265	list_id = self._match_id(url)
	266
	267	list_type, num_id = list_id.split("_")
	268
	269	toplist_json = self._download_json(
	270	'http://i.y.qq.com/v8/fcg-bin/fcg_v8_toplist_cp.fcg?type=%s&topid=%s&format=json'
	271	% (list_type, num_id),
	272	list_id, 'Download toplist page')
	273
	274	entries = [
	275	self.url_result(
	276	'http://y.qq.com/#type=song&mid=' + song['data']['songmid'], 'QQMusic', song['data']['songmid']
	277	) for song in toplist_json['songlist']
	278	]
	279
	280	topinfo = toplist_json.get('topinfo', {})
	281	list_name = topinfo.get('ListName')
	282	list_description = topinfo.get('info')
	283	return self.playlist_result(entries, list_id, list_name, list_description)
	284
	285
	286	class QQMusicPlaylistIE(QQPlaylistBaseIE):
	287	IE_NAME = 'qqmusic:playlist'
	288	IE_DESC = 'QQ音乐 - 歌单'
	289	_VALID_URL = r'http://y\.qq\.com/#type=taoge&id=(?P<id>[0-9]+)'
	290
	291	_TEST = {
	292	'url': 'http://y.qq.com/#type=taoge&id=3462654915',
	293	'info_dict': {
	294	'id': '3462654915',
	295	'title': '韩国5月新歌精选下旬',
	296	'description': 'md5:d2c9d758a96b9888cf4fe82f603121d4',
	297	},
	298	'playlist_count': 40,
	299	}
	300
	301	def _real_extract(self, url):
	302	list_id = self._match_id(url)
	303
	304	list_json = self._download_json(
	305	'http://i.y.qq.com/qzone-music/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg?type=1&json=1&utf8=1&onlysong=0&disstid=%s'
	306	% list_id, list_id, 'Download list page',
	307	transform_source=strip_jsonp)['cdlist'][0]
	308
	309	entries = [
	310	self.url_result(
	311	'http://y.qq.com/#type=song&mid=' + song['songmid'], 'QQMusic', song['songmid']
	312	) for song in list_json['songlist']
	313	]
	314
	315	list_name = list_json.get('dissname')
	316	list_description = clean_html(unescapeHTML(list_json.get('desc')))
	317	return self.playlist_result(entries, list_id, list_name, list_description)