jfr.im git - yt-dlp.git/blame_incremental

... / ...

Commit	Line	Data
	1	from __future__ import unicode_literals
	2
	3	import re
	4	import io
	5	import binascii
	6
	7	from ..downloader import _get_real_downloader
	8	from .fragment import FragmentFD, can_decrypt_frag
	9	from .external import FFmpegFD
	10
	11	from ..compat import (
	12	compat_urlparse,
	13	)
	14	from ..utils import (
	15	parse_m3u8_attributes,
	16	update_url_query,
	17	bug_reports_message,
	18	)
	19	from .. import webvtt
	20
	21
	22	class HlsFD(FragmentFD):
	23	"""
	24	Download segments in a m3u8 manifest. External downloaders can take over
	25	the fragment downloads by supporting the 'm3u8_frag_urls' protocol and
	26	re-defining 'supports_manifest' function
	27	"""
	28
	29	FD_NAME = 'hlsnative'
	30
	31	@staticmethod
	32	def can_download(manifest, info_dict, allow_unplayable_formats=False, with_crypto=can_decrypt_frag):
	33	UNSUPPORTED_FEATURES = [
	34	# r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2]
	35
	36	# Live streams heuristic does not always work (e.g. geo restricted to Germany
	37	# http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0)
	38	# r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', # live streams [3]
	39
	40	# This heuristic also is not correct since segments may not be appended as well.
	41	# Twitch vods of finished streams have EXT-X-PLAYLIST-TYPE:EVENT despite
	42	# no segments will definitely be appended to the end of the playlist.
	43	# r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of
	44	# # event media playlists [4]
	45	# r'#EXT-X-MAP:', # media initialization [5]
	46	# 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4
	47	# 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2
	48	# 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2
	49	# 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5
	50	# 5. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.5
	51	]
	52	if not allow_unplayable_formats:
	53	UNSUPPORTED_FEATURES += [
	54	r'#EXT-X-KEY:METHOD=(?!NONE\|AES-128)', # encrypted streams [1]
	55	]
	56
	57	def check_results():
	58	yield not info_dict.get('is_live')
	59	is_aes128_enc = '#EXT-X-KEY:METHOD=AES-128' in manifest
	60	yield with_crypto or not is_aes128_enc
	61	yield not (is_aes128_enc and r'#EXT-X-BYTERANGE' in manifest)
	62	for feature in UNSUPPORTED_FEATURES:
	63	yield not re.search(feature, manifest)
	64	return all(check_results())
	65
	66	def real_download(self, filename, info_dict):
	67	man_url = info_dict['url']
	68	self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
	69
	70	urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
	71	man_url = urlh.geturl()
	72	s = urlh.read().decode('utf-8', 'ignore')
	73
	74	if not self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')):
	75	if info_dict.get('extra_param_to_segment_url') or info_dict.get('_decryption_key_url'):
	76	self.report_error('pycryptodome not found. Please install')
	77	return False
	78	if self.can_download(s, info_dict, with_crypto=True):
	79	self.report_warning('pycryptodome is needed to download this file natively')
	80	fd = FFmpegFD(self.ydl, self.params)
	81	self.report_warning(
	82	'%s detected unsupported features; extraction will be delegated to %s' % (self.FD_NAME, fd.get_basename()))
	83	# TODO: Make progress updates work without hooking twice
	84	# for ph in self._progress_hooks:
	85	# fd.add_progress_hook(ph)
	86	return fd.real_download(filename, info_dict)
	87
	88	is_webvtt = info_dict['ext'] == 'vtt'
	89	if is_webvtt:
	90	real_downloader = None # Packing the fragments is not currently supported for external downloader
	91	else:
	92	real_downloader = _get_real_downloader(info_dict, 'm3u8_frag_urls', self.params, None)
	93	if real_downloader and not real_downloader.supports_manifest(s):
	94	real_downloader = None
	95	if real_downloader:
	96	self.to_screen(
	97	'[%s] Fragment downloads will be delegated to %s' % (self.FD_NAME, real_downloader.get_basename()))
	98
	99	def is_ad_fragment_start(s):
	100	return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s
	101	or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad'))
	102
	103	def is_ad_fragment_end(s):
	104	return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s
	105	or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment'))
	106
	107	fragments = []
	108
	109	media_frags = 0
	110	ad_frags = 0
	111	ad_frag_next = False
	112	for line in s.splitlines():
	113	line = line.strip()
	114	if not line:
	115	continue
	116	if line.startswith('#'):
	117	if is_ad_fragment_start(line):
	118	ad_frag_next = True
	119	elif is_ad_fragment_end(line):
	120	ad_frag_next = False
	121	continue
	122	if ad_frag_next:
	123	ad_frags += 1
	124	continue
	125	media_frags += 1
	126
	127	ctx = {
	128	'filename': filename,
	129	'total_frags': media_frags,
	130	'ad_frags': ad_frags,
	131	}
	132
	133	if real_downloader:
	134	self._prepare_external_frag_download(ctx)
	135	else:
	136	self._prepare_and_start_frag_download(ctx, info_dict)
	137
	138	extra_state = ctx.setdefault('extra_state', {})
	139
	140	format_index = info_dict.get('format_index')
	141	extra_query = None
	142	extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url')
	143	if extra_param_to_segment_url:
	144	extra_query = compat_urlparse.parse_qs(extra_param_to_segment_url)
	145	i = 0
	146	media_sequence = 0
	147	decrypt_info = {'METHOD': 'NONE'}
	148	byte_range = {}
	149	discontinuity_count = 0
	150	frag_index = 0
	151	ad_frag_next = False
	152	for line in s.splitlines():
	153	line = line.strip()
	154	if line:
	155	if not line.startswith('#'):
	156	if format_index and discontinuity_count != format_index:
	157	continue
	158	if ad_frag_next:
	159	continue
	160	frag_index += 1
	161	if frag_index <= ctx['fragment_index']:
	162	continue
	163	frag_url = (
	164	line
	165	if re.match(r'^https?://', line)
	166	else compat_urlparse.urljoin(man_url, line))
	167	if extra_query:
	168	frag_url = update_url_query(frag_url, extra_query)
	169
	170	fragments.append({
	171	'frag_index': frag_index,
	172	'url': frag_url,
	173	'decrypt_info': decrypt_info,
	174	'byte_range': byte_range,
	175	'media_sequence': media_sequence,
	176	})
	177
	178	elif line.startswith('#EXT-X-MAP'):
	179	if format_index and discontinuity_count != format_index:
	180	continue
	181	if frag_index > 0:
	182	self.report_error(
	183	'Initialization fragment found after media fragments, unable to download')
	184	return False
	185	frag_index += 1
	186	map_info = parse_m3u8_attributes(line[11:])
	187	frag_url = (
	188	map_info.get('URI')
	189	if re.match(r'^https?://', map_info.get('URI'))
	190	else compat_urlparse.urljoin(man_url, map_info.get('URI')))
	191	if extra_query:
	192	frag_url = update_url_query(frag_url, extra_query)
	193
	194	fragments.append({
	195	'frag_index': frag_index,
	196	'url': frag_url,
	197	'decrypt_info': decrypt_info,
	198	'byte_range': byte_range,
	199	'media_sequence': media_sequence
	200	})
	201
	202	if map_info.get('BYTERANGE'):
	203	splitted_byte_range = map_info.get('BYTERANGE').split('@')
	204	sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end']
	205	byte_range = {
	206	'start': sub_range_start,
	207	'end': sub_range_start + int(splitted_byte_range[0]),
	208	}
	209
	210	elif line.startswith('#EXT-X-KEY'):
	211	decrypt_url = decrypt_info.get('URI')
	212	decrypt_info = parse_m3u8_attributes(line[11:])
	213	if decrypt_info['METHOD'] == 'AES-128':
	214	if 'IV' in decrypt_info:
	215	decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:].zfill(32))
	216	if not re.match(r'^https?://', decrypt_info['URI']):
	217	decrypt_info['URI'] = compat_urlparse.urljoin(
	218	man_url, decrypt_info['URI'])
	219	if extra_query:
	220	decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query)
	221	if decrypt_url != decrypt_info['URI']:
	222	decrypt_info['KEY'] = None
	223
	224	elif line.startswith('#EXT-X-MEDIA-SEQUENCE'):
	225	media_sequence = int(line[22:])
	226	elif line.startswith('#EXT-X-BYTERANGE'):
	227	splitted_byte_range = line[17:].split('@')
	228	sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end']
	229	byte_range = {
	230	'start': sub_range_start,
	231	'end': sub_range_start + int(splitted_byte_range[0]),
	232	}
	233	elif is_ad_fragment_start(line):
	234	ad_frag_next = True
	235	elif is_ad_fragment_end(line):
	236	ad_frag_next = False
	237	elif line.startswith('#EXT-X-DISCONTINUITY'):
	238	discontinuity_count += 1
	239	i += 1
	240	media_sequence += 1
	241
	242	# We only download the first fragment during the test
	243	if self.params.get('test', False):
	244	fragments = [fragments[0] if fragments else None]
	245
	246	if real_downloader:
	247	info_copy = info_dict.copy()
	248	info_copy['fragments'] = fragments
	249	fd = real_downloader(self.ydl, self.params)
	250	# TODO: Make progress updates work without hooking twice
	251	# for ph in self._progress_hooks:
	252	# fd.add_progress_hook(ph)
	253	return fd.real_download(filename, info_copy)
	254
	255	if is_webvtt:
	256	def pack_fragment(frag_content, frag_index):
	257	output = io.StringIO()
	258	adjust = 0
	259	for block in webvtt.parse_fragment(frag_content):
	260	if isinstance(block, webvtt.CueBlock):
	261	block.start += adjust
	262	block.end += adjust
	263
	264	dedup_window = extra_state.setdefault('webvtt_dedup_window', [])
	265	cue = block.as_json
	266
	267	# skip the cue if an identical one appears
	268	# in the window of potential duplicates
	269	# and prune the window of unviable candidates
	270	i = 0
	271	skip = True
	272	while i < len(dedup_window):
	273	window_cue = dedup_window[i]
	274	if window_cue == cue:
	275	break
	276	if window_cue['end'] >= cue['start']:
	277	i += 1
	278	continue
	279	del dedup_window[i]
	280	else:
	281	skip = False
	282
	283	if skip:
	284	continue
	285
	286	# add the cue to the window
	287	dedup_window.append(cue)
	288	elif isinstance(block, webvtt.Magic):
	289	# take care of MPEG PES timestamp overflow
	290	if block.mpegts is None:
	291	block.mpegts = 0
	292	extra_state.setdefault('webvtt_mpegts_adjust', 0)
	293	block.mpegts += extra_state['webvtt_mpegts_adjust'] << 33
	294	if block.mpegts < extra_state.get('webvtt_mpegts_last', 0):
	295	extra_state['webvtt_mpegts_adjust'] += 1
	296	block.mpegts += 1 << 33
	297	extra_state['webvtt_mpegts_last'] = block.mpegts
	298
	299	if frag_index == 1:
	300	extra_state['webvtt_mpegts'] = block.mpegts or 0
	301	extra_state['webvtt_local'] = block.local or 0
	302	# XXX: block.local = block.mpegts = None ?
	303	else:
	304	if block.mpegts is not None and block.local is not None:
	305	adjust = (
	306	(block.mpegts - extra_state.get('webvtt_mpegts', 0))
	307	- (block.local - extra_state.get('webvtt_local', 0))
	308	)
	309	continue
	310	elif isinstance(block, webvtt.HeaderBlock):
	311	if frag_index != 1:
	312	# XXX: this should probably be silent as well
	313	# or verify that all segments contain the same data
	314	self.report_warning(bug_reports_message(
	315	'Discarding a %s block found in the middle of the stream; '
	316	'if the subtitles display incorrectly,'
	317	% (type(block).__name__)))
	318	continue
	319	block.write_into(output)
	320
	321	return output.getvalue().encode('utf-8')
	322	else:
	323	pack_fragment = None
	324	return self.download_and_append_fragments(ctx, fragments, info_dict, pack_fragment)