jfr.im git - yt-dlp.git/blame_incremental

... / ...

Commit	Line	Data
	1	import re
	2
	3	from .common import InfoExtractor
	4	from ..compat import compat_HTTPError
	5	from ..utils import (
	6	dict_get,
	7	ExtractorError,
	8	int_or_none,
	9	js_to_json,
	10	parse_iso8601,
	11	)
	12
	13
	14	class ZypeIE(InfoExtractor):
	15	_ID_RE = r'[\da-fA-F]+'
	16	_COMMON_RE = r'//player\.zype\.com/embed/%s\.(?:js\|json\|html)\?.*?(?:access_token\|(?:ap[ip]\|player)_key)='
	17	_VALID_URL = r'https?:%s[^&]+' % (_COMMON_RE % ('(?P<id>%s)' % _ID_RE))
	18	_EMBED_REGEX = [fr'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?{_COMMON_RE % _ID_RE}.+?)\1']
	19	_TEST = {
	20	'url': 'https://player.zype.com/embed/5b400b834b32992a310622b9.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ&autoplay=false&controls=true&da=false',
	21	'md5': 'eaee31d474c76a955bdaba02a505c595',
	22	'info_dict': {
	23	'id': '5b400b834b32992a310622b9',
	24	'ext': 'mp4',
	25	'title': 'Smoky Barbecue Favorites',
	26	'thumbnail': r're:^https?://.*\.jpe?g',
	27	'description': 'md5:5ff01e76316bd8d46508af26dc86023b',
	28	'timestamp': 1504915200,
	29	'upload_date': '20170909',
	30	},
	31	}
	32
	33	def _real_extract(self, url):
	34	video_id = self._match_id(url)
	35
	36	try:
	37	response = self._download_json(re.sub(
	38	r'\.(?:js\|html)\?', '.json?', url), video_id)['response']
	39	except ExtractorError as e:
	40	if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 401, 403):
	41	raise ExtractorError(self._parse_json(
	42	e.cause.read().decode(), video_id)['message'], expected=True)
	43	raise
	44
	45	body = response['body']
	46	video = response['video']
	47	title = video['title']
	48
	49	subtitles = {}
	50
	51	if isinstance(body, dict):
	52	formats = []
	53	for output in body.get('outputs', []):
	54	output_url = output.get('url')
	55	if not output_url:
	56	continue
	57	name = output.get('name')
	58	if name == 'm3u8':
	59	formats, subtitles = self._extract_m3u8_formats_and_subtitles(
	60	output_url, video_id, 'mp4',
	61	'm3u8_native', m3u8_id='hls', fatal=False)
	62	else:
	63	f = {
	64	'format_id': name,
	65	'tbr': int_or_none(output.get('bitrate')),
	66	'url': output_url,
	67	}
	68	if name in ('m4a', 'mp3'):
	69	f['vcodec'] = 'none'
	70	else:
	71	f.update({
	72	'height': int_or_none(output.get('height')),
	73	'width': int_or_none(output.get('width')),
	74	})
	75	formats.append(f)
	76	text_tracks = body.get('subtitles') or []
	77	else:
	78	m3u8_url = self._search_regex(
	79	r'(["\'])(?P<url>(?:(?!\1).)+\.m3u8(?:(?!\1).)*)\1',
	80	body, 'm3u8 url', group='url', default=None)
	81	if not m3u8_url:
	82	source = self._search_regex(
	83	r'(?s)sources\s:\s\[\s({.+?})\s\]', body, 'source')
	84
	85	def get_attr(key):
	86	return self._search_regex(
	87	r'\b%s\s:\s([\'"])(?P<val>(?:(?!\1).)+)\1' % key,
	88	source, key, group='val')
	89
	90	if get_attr('integration') == 'verizon-media':
	91	m3u8_url = 'https://content.uplynk.com/%s.m3u8' % get_attr('id')
	92	formats, subtitles = self._extract_m3u8_formats_and_subtitles(
	93	m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
	94	text_tracks = self._search_regex(
	95	r'textTracks\s:\s(\[[^]]+\])',
	96	body, 'text tracks', default=None)
	97	if text_tracks:
	98	text_tracks = self._parse_json(
	99	text_tracks, video_id, js_to_json, False)
	100	self._sort_formats(formats)
	101
	102	if text_tracks:
	103	for text_track in text_tracks:
	104	tt_url = dict_get(text_track, ('file', 'src'))
	105	if not tt_url:
	106	continue
	107	subtitles.setdefault(text_track.get('label') or 'English', []).append({
	108	'url': tt_url,
	109	})
	110
	111	thumbnails = []
	112	for thumbnail in video.get('thumbnails', []):
	113	thumbnail_url = thumbnail.get('url')
	114	if not thumbnail_url:
	115	continue
	116	thumbnails.append({
	117	'url': thumbnail_url,
	118	'width': int_or_none(thumbnail.get('width')),
	119	'height': int_or_none(thumbnail.get('height')),
	120	})
	121
	122	return {
	123	'id': video_id,
	124	'display_id': video.get('friendly_title'),
	125	'title': title,
	126	'thumbnails': thumbnails,
	127	'description': dict_get(video, ('description', 'ott_description', 'short_description')),
	128	'timestamp': parse_iso8601(video.get('published_at')),
	129	'duration': int_or_none(video.get('duration')),
	130	'view_count': int_or_none(video.get('request_count')),
	131	'average_rating': int_or_none(video.get('rating')),
	132	'season_number': int_or_none(video.get('season')),
	133	'episode_number': int_or_none(video.get('episode')),
	134	'formats': formats,
	135	'subtitles': subtitles,
	136	}