jfr.im git - yt-dlp.git/blame_incremental

... / ...

Commit	Line	Data
	1	import json
	2
	3	from .common import InfoExtractor
	4	from ..utils import (
	5	ExtractorError,
	6	int_or_none,
	7	smuggle_url,
	8	traverse_obj,
	9	try_call,
	10	unsmuggle_url,
	11	)
	12
	13
	14	class LiTVIE(InfoExtractor):
	15	_VALID_URL = r'https?://(?:www\.)?litv\.tv/(?:vod\|promo)/[^/]+/(?:content\.do)?\?.*?\b(?:content_)?id=(?P<id>[^&]+)'
	16
	17	_URL_TEMPLATE = 'https://www.litv.tv/vod/%s/content.do?content_id=%s'
	18
	19	_TESTS = [{
	20	'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1',
	21	'info_dict': {
	22	'id': 'VOD00041606',
	23	'title': '花千骨',
	24	},
	25	'playlist_count': 51, # 50 episodes + 1 trailer
	26	}, {
	27	'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1',
	28	'md5': 'b90ff1e9f1d8f5cfcd0a44c3e2b34c7a',
	29	'info_dict': {
	30	'id': 'VOD00041610',
	31	'ext': 'mp4',
	32	'title': '花千骨第1集',
	33	'thumbnail': r're:https?://.*\.jpg$',
	34	'description': '《花千骨》陸劇線上看。十六年前，平靜的村莊內，一名女嬰隨異相出生，途徑此地的蜀山掌門清虛道長算出此女命運非同一般，她體內散發的異香易招惹妖魔。一念慈悲下，他在村莊周邊設下結界阻擋妖魔入侵，讓其年滿十六後去蜀山，並賜名花千骨。',
	35	'categories': ['奇幻', '愛情', '中國', '仙俠'],
	36	'episode': 'Episode 1',
	37	'episode_number': 1,
	38	},
	39	'params': {
	40	'noplaylist': True,
	41	},
	42	'skip': 'Georestricted to Taiwan',
	43	}, {
	44	'url': 'https://www.litv.tv/promo/miyuezhuan/?content_id=VOD00044841&',
	45	'md5': '88322ea132f848d6e3e18b32a832b918',
	46	'info_dict': {
	47	'id': 'VOD00044841',
	48	'ext': 'mp4',
	49	'title': '芈月傳第1集　霸星芈月降世楚國',
	50	'description': '楚威王二年，太史令唐昧夜觀星象，發現霸星即將現世。王后得知霸星的預言後，想盡辦法不讓孩子順利出生，幸得莒姬相護化解危機。沒想到眾人期待下出生的霸星卻是位公主，楚威王對此失望至極。楚王后命人將女嬰丟棄河中，居然奇蹟似的被少司命像攔下，楚威王認為此女非同凡響，為她取名芈月。',
	51	},
	52	'skip': 'No longer exists',
	53	}]
	54
	55	def _extract_playlist(self, playlist_data, content_type):
	56	all_episodes = [
	57	self.url_result(smuggle_url(
	58	self._URL_TEMPLATE % (content_type, episode['contentId']),
	59	{'force_noplaylist': True})) # To prevent infinite recursion
	60	for episode in traverse_obj(playlist_data, ('seasons', ..., 'episode', lambda _, v: v['contentId']))]
	61
	62	return self.playlist_result(all_episodes, playlist_data['contentId'], playlist_data.get('title'))
	63
	64	def _real_extract(self, url):
	65	url, smuggled_data = unsmuggle_url(url, {})
	66
	67	video_id = self._match_id(url)
	68
	69	webpage = self._download_webpage(url, video_id)
	70
	71	if self._search_regex(
	72	r'(?i)<meta\s[^>]http-equiv="refresh"\s[^>]content="[0-9]+;\s*url=https://www\.litv\.tv/"',
	73	webpage, 'meta refresh redirect', default=False, group=0):
	74	raise ExtractorError('No such content found', expected=True)
	75
	76	program_info = self._parse_json(self._search_regex(
	77	r'var\s+programInfo\s=\s([^;]+)', webpage, 'VOD data', default='{}'),
	78	video_id)
	79
	80	# In browsers `getProgramInfo` request is always issued. Usually this
	81	# endpoint gives the same result as the data embedded in the webpage.
	82	# If, for some reason, there are no embedded data, we do an extra request.
	83	if 'assetId' not in program_info:
	84	program_info = self._download_json(
	85	'https://www.litv.tv/vod/ajax/getProgramInfo', video_id,
	86	query={'contentId': video_id},
	87	headers={'Accept': 'application/json'})
	88
	89	series_id = program_info['seriesId']
	90	if self._yes_playlist(series_id, video_id, smuggled_data):
	91	playlist_data = self._download_json(
	92	'https://www.litv.tv/vod/ajax/getSeriesTree', video_id,
	93	query={'seriesId': series_id}, headers={'Accept': 'application/json'})
	94	return self._extract_playlist(playlist_data, program_info['contentType'])
	95
	96	video_data = self._parse_json(self._search_regex(
	97	r'uiHlsUrl\s=\stestBackendData$([^;]+)$;',
	98	webpage, 'video data', default='{}'), video_id)
	99	if not video_data:
	100	payload = {'assetId': program_info['assetId']}
	101	puid = try_call(lambda: self._get_cookies('https://www.litv.tv/')['PUID'].value)
	102	if puid:
	103	payload.update({
	104	'type': 'auth',
	105	'puid': puid,
	106	})
	107	endpoint = 'getUrl'
	108	else:
	109	payload.update({
	110	'watchDevices': program_info['watchDevices'],
	111	'contentType': program_info['contentType'],
	112	})
	113	endpoint = 'getMainUrlNoAuth'
	114	video_data = self._download_json(
	115	f'https://www.litv.tv/vod/ajax/{endpoint}', video_id,
	116	data=json.dumps(payload).encode('utf-8'),
	117	headers={'Content-Type': 'application/json'})
	118
	119	if not video_data.get('fullpath'):
	120	error_msg = video_data.get('errorMessage')
	121	if error_msg == 'vod.error.outsideregionerror':
	122	self.raise_geo_restricted('This video is available in Taiwan only')
	123	if error_msg:
	124	raise ExtractorError('%s said: %s' % (self.IE_NAME, error_msg), expected=True)
	125	raise ExtractorError('Unexpected result from %s' % self.IE_NAME)
	126
	127	formats = self._extract_m3u8_formats(
	128	video_data['fullpath'], video_id, ext='mp4',
	129	entry_protocol='m3u8_native', m3u8_id='hls')
	130	for a_format in formats:
	131	# LiTV HLS segments doesn't like compressions
	132	a_format.setdefault('http_headers', {})['Accept-Encoding'] = 'identity'
	133
	134	title = program_info['title'] + program_info.get('secondaryMark', '')
	135	description = program_info.get('description')
	136	thumbnail = program_info.get('imageFile')
	137	categories = [item['name'] for item in program_info.get('category', [])]
	138	episode = int_or_none(program_info.get('episode'))
	139
	140	return {
	141	'id': video_id,
	142	'formats': formats,
	143	'title': title,
	144	'description': description,
	145	'thumbnail': thumbnail,
	146	'categories': categories,
	147	'episode_number': episode,
	148	}