jfr.im git - yt-dlp.git/blame_incremental

... / ...

Commit	Line	Data
	1	import base64
	2	import time
	3	import uuid
	4
	5	from .common import InfoExtractor
	6	from ..compat import (
	7	compat_HTTPError,
	8	compat_str,
	9	)
	10	from ..utils import (
	11	ExtractorError,
	12	int_or_none,
	13	try_get,
	14	url_or_none,
	15	)
	16
	17
	18	class MGTVIE(InfoExtractor):
	19	_VALID_URL = r'https?://(?:w(?:ww)?\.)?mgtv\.com/(v\|b)/(?:[^/]+/)*(?P<id>\d+)\.html'
	20	IE_DESC = '芒果TV'
	21	IE_NAME = 'MangoTV'
	22
	23	_TESTS = [{
	24	'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html',
	25	'info_dict': {
	26	'id': '3116640',
	27	'ext': 'mp4',
	28	'title': '我是歌手第四季',
	29	'description': '我是歌手第四季双年巅峰会',
	30	'duration': 7461,
	31	'thumbnail': r're:^https?://.*\.jpg$',
	32	},
	33	}, {
	34	'url': 'https://w.mgtv.com/b/427837/15588271.html',
	35	'info_dict': {
	36	'id': '15588271',
	37	'ext': 'mp4',
	38	'title': '春日迟迟再出发沉浸版',
	39	'description': 'md5:a7a05a05b1aa87bd50cae619b19bbca6',
	40	'thumbnail': r're:^https?://.+\.jpg',
	41	'duration': 4026,
	42	},
	43	}, {
	44	'url': 'https://w.mgtv.com/b/333652/7329822.html',
	45	'info_dict': {
	46	'id': '7329822',
	47	'ext': 'mp4',
	48	'title': '拜托，请你爱我',
	49	'description': 'md5:cd81be6499bafe32e4d143abd822bf9c',
	50	'thumbnail': r're:^https?://.+\.jpg',
	51	'duration': 2656,
	52	},
	53	}, {
	54	'url': 'https://w.mgtv.com/b/427837/15591647.html',
	55	'only_matching': True,
	56	}, {
	57	'url': 'https://w.mgtv.com/b/388252/15634192.html?fpa=33318&fpos=4&lastp=ch_home',
	58	'only_matching': True,
	59	}, {
	60	'url': 'http://www.mgtv.com/b/301817/3826653.html',
	61	'only_matching': True,
	62	}, {
	63	'url': 'https://w.mgtv.com/b/301817/3826653.html',
	64	'only_matching': True,
	65	}]
	66
	67	def _real_extract(self, url):
	68	video_id = self._match_id(url)
	69	tk2 = base64.urlsafe_b64encode(
	70	f'did={str(uuid.uuid4())}\|pno=1030\|ver=0.3.0301\|clit={int(time.time())}'.encode())[::-1]
	71	try:
	72	api_data = self._download_json(
	73	'https://pcweb.api.mgtv.com/player/video', video_id, query={
	74	'tk2': tk2,
	75	'video_id': video_id,
	76	'type': 'pch5'
	77	}, headers=self.geo_verification_headers())['data']
	78	except ExtractorError as e:
	79	if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
	80	error = self._parse_json(e.cause.read().decode(), None)
	81	if error.get('code') == 40005:
	82	self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
	83	raise ExtractorError(error['msg'], expected=True)
	84	raise
	85	info = api_data['info']
	86	title = info['title'].strip()
	87	stream_data = self._download_json(
	88	'https://pcweb.api.mgtv.com/player/getSource', video_id, query={
	89	'pm2': api_data['atc']['pm2'],
	90	'tk2': tk2,
	91	'video_id': video_id,
	92	'src': 'intelmgtv',
	93	}, headers=self.geo_verification_headers())['data']
	94	stream_domain = stream_data['stream_domain'][0]
	95
	96	formats = []
	97	for idx, stream in enumerate(stream_data['stream']):
	98	stream_path = stream.get('url')
	99	if not stream_path:
	100	continue
	101	format_data = self._download_json(
	102	stream_domain + stream_path, video_id,
	103	note=f'Download video info for format #{idx}')
	104	format_url = format_data.get('info')
	105	if not format_url:
	106	continue
	107	tbr = int_or_none(stream.get('filebitrate') or self._search_regex(
	108	r'_(\d+)_mp4/', format_url, 'tbr', default=None))
	109	formats.append({
	110	'format_id': compat_str(tbr or idx),
	111	'url': url_or_none(format_url),
	112	'ext': 'mp4',
	113	'tbr': tbr,
	114	'protocol': 'm3u8_native',
	115	'http_headers': {
	116	'Referer': url,
	117	},
	118	'format_note': stream.get('name'),
	119	})
	120	self._sort_formats(formats)
	121
	122	return {
	123	'id': video_id,
	124	'title': title,
	125	'formats': formats,
	126	'description': info.get('desc'),
	127	'duration': int_or_none(info.get('duration')),
	128	'thumbnail': info.get('thumb'),
	129	'subtitles': self.extract_subtitles(video_id, stream_domain),
	130	}
	131
	132	def _get_subtitles(self, video_id, domain):
	133	info = self._download_json(f'https://pcweb.api.mgtv.com/video/title?videoId={video_id}',
	134	video_id, fatal=False) or {}
	135	subtitles = {}
	136	for sub in try_get(info, lambda x: x['data']['title']) or []:
	137	url_sub = sub.get('url')
	138	if not url_sub:
	139	continue
	140	locale = sub.get('captionSimpleName') or 'en'
	141	sub = self._download_json(f'{domain}{url_sub}', video_id, fatal=False,
	142	note=f'Download subtitle for locale {sub.get("name")} ({locale})') or {}
	143	sub_url = url_or_none(sub.get('info'))
	144	if not sub_url:
	145	continue
	146	subtitles.setdefault(locale.lower(), []).append({
	147	'url': sub_url,
	148	'name': sub.get('name'),
	149	'ext': 'srt'
	150	})
	151	return subtitles