jfr.im git - yt-dlp.git/blame_incremental - yt

... / ...

Commit	Line	Data
	1	import time
	2	import hashlib
	3	import urllib
	4	import uuid
	5
	6	from .common import InfoExtractor
	7	from .openload import PhantomJSwrapper
	8	from ..utils import (
	9	ExtractorError,
	10	UserNotLive,
	11	determine_ext,
	12	int_or_none,
	13	js_to_json,
	14	parse_resolution,
	15	str_or_none,
	16	traverse_obj,
	17	unescapeHTML,
	18	url_or_none,
	19	urlencode_postdata,
	20	urljoin,
	21	)
	22
	23
	24	class DouyuBaseIE(InfoExtractor):
	25	def _download_cryptojs_md5(self, video_id):
	26	for url in [
	27	'https://cdnjs.cloudflare.com/ajax/libs/crypto-js/3.1.2/rollups/md5.js',
	28	'https://cdn.bootcdn.net/ajax/libs/crypto-js/3.1.2/rollups/md5.js',
	29	]:
	30	js_code = self._download_webpage(
	31	url, video_id, note='Downloading signing dependency', fatal=False)
	32	if js_code:
	33	self.cache.store('douyu', 'crypto-js-md5', js_code)
	34	return js_code
	35	raise ExtractorError('Unable to download JS dependency (crypto-js/md5)')
	36
	37	def _get_cryptojs_md5(self, video_id):
	38	return self.cache.load('douyu', 'crypto-js-md5') or self._download_cryptojs_md5(video_id)
	39
	40	def _calc_sign(self, sign_func, video_id, a):
	41	b = uuid.uuid4().hex
	42	c = round(time.time())
	43	js_script = f'{self._get_cryptojs_md5(video_id)};{sign_func};console.log(ub98484234("{a}","{b}","{c}"))'
	44	phantom = PhantomJSwrapper(self)
	45	result = phantom.execute(js_script, video_id,
	46	note='Executing JS signing script').strip()
	47	return {i: v[0] for i, v in urllib.parse.parse_qs(result).items()}
	48
	49	def _search_js_sign_func(self, webpage, fatal=True):
	50	# The greedy look-behind ensures last possible script tag is matched
	51	return self._search_regex(
	52	r'(?:<script.)?<script[^>]>(.?ub98484234.?)</script>', webpage, 'JS sign func', fatal=fatal)
	53
	54
	55	class DouyuTVIE(DouyuBaseIE):
	56	IE_DESC = '斗鱼直播'
	57	_VALID_URL = r'https?://(?:www\.)?douyu(?:tv)?\.com/(topic/\w+\?rid=\|(?:[^/]+/))*(?P<id>[A-Za-z0-9]+)'
	58	_TESTS = [{
	59	'url': 'https://www.douyu.com/pigff',
	60	'info_dict': {
	61	'id': '24422',
	62	'display_id': 'pigff',
	63	'ext': 'mp4',
	64	'title': 're:^【PIGFF】.* [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
	65	'description': r'≥15级牌子看鱼吧置顶帖进粉丝vx群',
	66	'thumbnail': str,
	67	'uploader': 'pigff',
	68	'is_live': True,
	69	'live_status': 'is_live',
	70	},
	71	'params': {
	72	'skip_download': True,
	73	},
	74	}, {
	75	'url': 'http://www.douyutv.com/85982',
	76	'info_dict': {
	77	'id': '85982',
	78	'display_id': '85982',
	79	'ext': 'flv',
	80	'title': 're:^小漠从零单排记！——CSOL2躲猫猫 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
	81	'description': 'md5:746a2f7a253966a06755a912f0acc0d2',
	82	'thumbnail': r're:^https?://.*\.png',
	83	'uploader': 'douyu小漠',
	84	'is_live': True,
	85	},
	86	'params': {
	87	'skip_download': True,
	88	},
	89	'skip': 'Room not found',
	90	}, {
	91	'url': 'http://www.douyutv.com/17732',
	92	'info_dict': {
	93	'id': '17732',
	94	'display_id': '17732',
	95	'ext': 'flv',
	96	'title': 're:^清晨醒脑！根本停不下来！ [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
	97	'description': r're:.m7show@163\.com.',
	98	'thumbnail': r're:^https?://.*\.png',
	99	'uploader': '7师傅',
	100	'is_live': True,
	101	},
	102	'params': {
	103	'skip_download': True,
	104	},
	105	}, {
	106	'url': 'https://www.douyu.com/topic/ydxc?rid=6560603',
	107	'info_dict': {
	108	'id': '6560603',
	109	'display_id': '6560603',
	110	'ext': 'flv',
	111	'title': 're:^阿余：新年快乐恭喜发财！ [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
	112	'description': 're:.直播时间.',
	113	'thumbnail': r're:^https?://.*\.png',
	114	'uploader': '阿涛皎月Carry',
	115	'live_status': 'is_live',
	116	},
	117	'params': {
	118	'skip_download': True,
	119	},
	120	}, {
	121	'url': 'http://www.douyu.com/xiaocang',
	122	'only_matching': True,
	123	}, {
	124	# \"room_id\"
	125	'url': 'http://www.douyu.com/t/lpl',
	126	'only_matching': True,
	127	}]
	128
	129	def _get_sign_func(self, room_id, video_id):
	130	return self._download_json(
	131	f'https://www.douyu.com/swf_api/homeH5Enc?rids={room_id}', video_id,
	132	note='Getting signing script')['data'][f'room{room_id}']
	133
	134	def _extract_stream_formats(self, stream_formats):
	135	formats = []
	136	for stream_info in traverse_obj(stream_formats, (..., 'data')):
	137	stream_url = urljoin(
	138	traverse_obj(stream_info, 'rtmp_url'), traverse_obj(stream_info, 'rtmp_live'))
	139	if stream_url:
	140	rate_id = traverse_obj(stream_info, ('rate', {int_or_none}))
	141	rate_info = traverse_obj(stream_info, ('multirates', lambda _, v: v['rate'] == rate_id), get_all=False)
	142	ext = determine_ext(stream_url)
	143	formats.append({
	144	'url': stream_url,
	145	'format_id': str_or_none(rate_id),
	146	'ext': 'mp4' if ext == 'm3u8' else ext,
	147	'protocol': 'm3u8_native' if ext == 'm3u8' else 'https',
	148	'quality': rate_id % -10000 if rate_id is not None else None,
	149	**traverse_obj(rate_info, {
	150	'format': ('name', {str_or_none}),
	151	'tbr': ('bit', {int_or_none}),
	152	}),
	153	})
	154	return formats
	155
	156	def _real_extract(self, url):
	157	video_id = self._match_id(url)
	158
	159	webpage = self._download_webpage(url, video_id)
	160	room_id = self._search_regex(r'\$ROOM\.room_id\s=\s(\d+)', webpage, 'room id')
	161
	162	if self._search_regex(r'"videoLoop"\s:\s(\d+)', webpage, 'loop', default='') == '1':
	163	raise UserNotLive('The channel is auto-playing VODs', video_id=video_id)
	164	if self._search_regex(r'\$ROOM\.show_status\s=\s(\d+)', webpage, 'status', default='') == '2':
	165	raise UserNotLive(video_id=video_id)
	166
	167	# Grab metadata from API
	168	params = {
	169	'aid': 'wp',
	170	'client_sys': 'wp',
	171	'time': int(time.time()),
	172	}
	173	params['auth'] = hashlib.md5(
	174	f'room/{room_id}?{urllib.parse.urlencode(params)}zNzMV1y4EMxOHS6I5WKm'.encode()).hexdigest()
	175	room = traverse_obj(self._download_json(
	176	f'http://www.douyutv.com/api/v1/room/{room_id}', video_id,
	177	note='Downloading room info', query=params, fatal=False), 'data')
	178
	179	# 1 = live, 2 = offline
	180	if traverse_obj(room, 'show_status') == '2':
	181	raise UserNotLive(video_id=video_id)
	182
	183	js_sign_func = self._search_js_sign_func(webpage, fatal=False) or self._get_sign_func(room_id, video_id)
	184	form_data = {
	185	'rate': 0,
	186	**self._calc_sign(js_sign_func, video_id, room_id),
	187	}
	188	stream_formats = [self._download_json(
	189	f'https://www.douyu.com/lapi/live/getH5Play/{room_id}',
	190	video_id, note="Downloading livestream format",
	191	data=urlencode_postdata(form_data))]
	192
	193	for rate_id in traverse_obj(stream_formats[0], ('data', 'multirates', ..., 'rate')):
	194	if rate_id != traverse_obj(stream_formats[0], ('data', 'rate')):
	195	form_data['rate'] = rate_id
	196	stream_formats.append(self._download_json(
	197	f'https://www.douyu.com/lapi/live/getH5Play/{room_id}',
	198	video_id, note=f'Downloading livestream format {rate_id}',
	199	data=urlencode_postdata(form_data)))
	200
	201	return {
	202	'id': room_id,
	203	'formats': self._extract_stream_formats(stream_formats),
	204	'is_live': True,
	205	**traverse_obj(room, {
	206	'display_id': ('url', {str}, {lambda i: i[1:]}),
	207	'title': ('room_name', {unescapeHTML}),
	208	'description': ('show_details', {str}),
	209	'uploader': ('nickname', {str}),
	210	'thumbnail': ('room_src', {url_or_none}),
	211	})
	212	}
	213
	214
	215	class DouyuShowIE(DouyuBaseIE):
	216	_VALID_URL = r'https?://v(?:mobile)?\.douyu\.com/show/(?P<id>[0-9a-zA-Z]+)'
	217
	218	_TESTS = [{
	219	'url': 'https://v.douyu.com/show/mPyq7oVNe5Yv1gLY',
	220	'info_dict': {
	221	'id': 'mPyq7oVNe5Yv1gLY',
	222	'ext': 'mp4',
	223	'title': '四川人小时候的味道“蒜苗回锅肉”，传统菜不能丢，要常做来吃',
	224	'duration': 633,
	225	'thumbnail': str,
	226	'uploader': '美食作家王刚V',
	227	'uploader_id': 'OVAO4NVx1m7Q',
	228	'timestamp': 1661850002,
	229	'upload_date': '20220830',
	230	'view_count': int,
	231	'tags': ['美食', '美食综合'],
	232	},
	233	}, {
	234	'url': 'https://vmobile.douyu.com/show/rjNBdvnVXNzvE2yw',
	235	'only_matching': True,
	236	}]
	237
	238	_FORMATS = {
	239	'super': '原画',
	240	'high': '超清',
	241	'normal': '高清',
	242	}
	243
	244	_QUALITIES = {
	245	'super': -1,
	246	'high': -2,
	247	'normal': -3,
	248	}
	249
	250	_RESOLUTIONS = {
	251	'super': '1920x1080',
	252	'high': '1280x720',
	253	'normal': '852x480',
	254	}
	255
	256	def _real_extract(self, url):
	257	url = url.replace('vmobile.', 'v.')
	258	video_id = self._match_id(url)
	259
	260	webpage = self._download_webpage(url, video_id)
	261
	262	video_info = self._search_json(
	263	r'<script>\swindow\.\$DATA\s=', webpage,
	264	'video info', video_id, transform_source=js_to_json)
	265
	266	js_sign_func = self._search_js_sign_func(webpage)
	267	form_data = {
	268	'vid': video_id,
	269	**self._calc_sign(js_sign_func, video_id, video_info['ROOM']['point_id']),
	270	}
	271	url_info = self._download_json(
	272	'https://v.douyu.com/api/stream/getStreamUrl', video_id,
	273	data=urlencode_postdata(form_data), note="Downloading video formats")
	274
	275	formats = []
	276	for name, url in traverse_obj(url_info, ('data', 'thumb_video', {dict.items}, ...)):
	277	video_url = traverse_obj(url, ('url', {url_or_none}))
	278	if video_url:
	279	ext = determine_ext(video_url)
	280	formats.append({
	281	'format': self._FORMATS.get(name),
	282	'format_id': name,
	283	'url': video_url,
	284	'quality': self._QUALITIES.get(name),
	285	'ext': 'mp4' if ext == 'm3u8' else ext,
	286	'protocol': 'm3u8_native' if ext == 'm3u8' else 'https',
	287	**parse_resolution(self._RESOLUTIONS.get(name))
	288	})
	289	else:
	290	self.to_screen(
	291	f'"{self._FORMATS.get(name, name)}" format may require logging in. {self._login_hint()}')
	292
	293	return {
	294	'id': video_id,
	295	'formats': formats,
	296	**traverse_obj(video_info, ('DATA', {
	297	'title': ('content', 'title', {str}),
	298	'uploader': ('content', 'author', {str}),
	299	'uploader_id': ('content', 'up_id', {str_or_none}),
	300	'duration': ('content', 'video_duration', {int_or_none}),
	301	'thumbnail': ('content', 'video_pic', {url_or_none}),
	302	'timestamp': ('content', 'create_time', {int_or_none}),
	303	'view_count': ('content', 'view_num', {int_or_none}),
	304	'tags': ('videoTag', ..., 'tagName', {str}),
	305	}))
	306	}