[yt-dlp.git] / yt_dlp / downloader / hls.py

import binascii
import io
import re
import urllib.parse

from . import get_suitable_downloader
from .external import FFmpegFD
from .fragment import FragmentFD
from .. import webvtt
from ..dependencies import Cryptodome_AES
from ..utils import bug_reports_message, parse_m3u8_attributes, update_url_query


class HlsFD(FragmentFD):
    """
    Download segments in a m3u8 manifest. External downloaders can take over
    the fragment downloads by supporting the 'm3u8_frag_urls' protocol and
    re-defining 'supports_manifest' function
    """

    FD_NAME = 'hlsnative'

    @staticmethod
    def can_download(manifest, info_dict, allow_unplayable_formats=False):
        UNSUPPORTED_FEATURES = [
            # r'#EXT-X-BYTERANGE',  # playlists composed of byte ranges of media files [2]

            # Live streams heuristic does not always work (e.g. geo restricted to Germany
            # http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0)
            # r'#EXT-X-MEDIA-SEQUENCE:(?!0$)',  # live streams [3]

            # This heuristic also is not correct since segments may not be appended as well.
            # Twitch vods of finished streams have EXT-X-PLAYLIST-TYPE:EVENT despite
            # no segments will definitely be appended to the end of the playlist.
            # r'#EXT-X-PLAYLIST-TYPE:EVENT',  # media segments may be appended to the end of
            #                                 # event media playlists [4]
            # r'#EXT-X-MAP:',  # media initialization [5]
            # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4
            # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2
            # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2
            # 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5
            # 5. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.5
        ]
        if not allow_unplayable_formats:
            UNSUPPORTED_FEATURES += [
                r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)',  # encrypted streams [1]
            ]

        def check_results():
            yield not info_dict.get('is_live')
            for feature in UNSUPPORTED_FEATURES:
                yield not re.search(feature, manifest)
        return all(check_results())

    def real_download(self, filename, info_dict):
        man_url = info_dict['url']
        self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)

        urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
        man_url = urlh.geturl()
        s = urlh.read().decode('utf-8', 'ignore')

        can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None
        if can_download:
            has_ffmpeg = FFmpegFD.available()
            no_crypto = not Cryptodome_AES and '#EXT-X-KEY:METHOD=AES-128' in s
            if no_crypto and has_ffmpeg:
                can_download, message = False, 'The stream has AES-128 encryption and pycryptodomex is not available'
            elif no_crypto:
                message = ('The stream has AES-128 encryption and neither ffmpeg nor pycryptodomex are available; '
                           'Decryption will be performed natively, but will be extremely slow')
            elif info_dict.get('extractor_key') == 'Generic' and re.search(r'(?m)#EXT-X-MEDIA-SEQUENCE:(?!0$)', s):
                install_ffmpeg = '' if has_ffmpeg else 'install ffmpeg and '
                message = ('Live HLS streams are not supported by the native downloader. If this is a livestream, '
                           f'please {install_ffmpeg}add "--downloader ffmpeg --hls-use-mpegts" to your command')
        if not can_download:
            has_drm = re.search('|'.join([
                r'#EXT-X-FAXS-CM:',  # Adobe Flash Access
                r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://',  # Apple FairPlay
            ]), s)
            if has_drm and not self.params.get('allow_unplayable_formats'):
                self.report_error(
                    'This video is DRM protected; Try selecting another format with --format or '
                    'add --check-formats to automatically fallback to the next best format')
                return False
            message = message or 'Unsupported features have been detected'
            fd = FFmpegFD(self.ydl, self.params)
            self.report_warning(f'{message}; extraction will be delegated to {fd.get_basename()}')
            return fd.real_download(filename, info_dict)
        elif message:
            self.report_warning(message)

        is_webvtt = info_dict['ext'] == 'vtt'
        if is_webvtt:
            real_downloader = None  # Packing the fragments is not currently supported for external downloader
        else:
            real_downloader = get_suitable_downloader(
                info_dict, self.params, None, protocol='m3u8_frag_urls', to_stdout=(filename == '-'))
        if real_downloader and not real_downloader.supports_manifest(s):
            real_downloader = None
        if real_downloader:
            self.to_screen(f'[{self.FD_NAME}] Fragment downloads will be delegated to {real_downloader.get_basename()}')

        def is_ad_fragment_start(s):
            return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s
                    or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad'))

        def is_ad_fragment_end(s):
            return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s
                    or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment'))

        fragments = []

        media_frags = 0
        ad_frags = 0
        ad_frag_next = False
        for line in s.splitlines():
            line = line.strip()
            if not line:
                continue
            if line.startswith('#'):
                if is_ad_fragment_start(line):
                    ad_frag_next = True
                elif is_ad_fragment_end(line):
                    ad_frag_next = False
                continue
            if ad_frag_next:
                ad_frags += 1
                continue
            media_frags += 1

        ctx = {
            'filename': filename,
            'total_frags': media_frags,
            'ad_frags': ad_frags,
        }

        if real_downloader:
            self._prepare_external_frag_download(ctx)
        else:
            self._prepare_and_start_frag_download(ctx, info_dict)

        extra_state = ctx.setdefault('extra_state', {})

        format_index = info_dict.get('format_index')
        extra_query = None
        extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url')
        if extra_param_to_segment_url:
            extra_query = urllib.parse.parse_qs(extra_param_to_segment_url)
        i = 0
        media_sequence = 0
        decrypt_info = {'METHOD': 'NONE'}
        byte_range = {}
        discontinuity_count = 0
        frag_index = 0
        ad_frag_next = False
        for line in s.splitlines():
            line = line.strip()
            if line:
                if not line.startswith('#'):
                    if format_index and discontinuity_count != format_index:
                        continue
                    if ad_frag_next:
                        continue
                    frag_index += 1
                    if frag_index <= ctx['fragment_index']:
                        continue
                    frag_url = (
                        line
                        if re.match(r'^https?://', line)
                        else urllib.parse.urljoin(man_url, line))
                    if extra_query:
                        frag_url = update_url_query(frag_url, extra_query)

                    fragments.append({
                        'frag_index': frag_index,
                        'url': frag_url,
                        'decrypt_info': decrypt_info,
                        'byte_range': byte_range,
                        'media_sequence': media_sequence,
                    })
                    media_sequence += 1

                elif line.startswith('#EXT-X-MAP'):
                    if format_index and discontinuity_count != format_index:
                        continue
                    if frag_index > 0:
                        self.report_error(
                            'Initialization fragment found after media fragments, unable to download')
                        return False
                    frag_index += 1
                    map_info = parse_m3u8_attributes(line[11:])
                    frag_url = (
                        map_info.get('URI')
                        if re.match(r'^https?://', map_info.get('URI'))
                        else urllib.parse.urljoin(man_url, map_info.get('URI')))
                    if extra_query:
                        frag_url = update_url_query(frag_url, extra_query)

                    if map_info.get('BYTERANGE'):
                        splitted_byte_range = map_info.get('BYTERANGE').split('@')
                        sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end']
                        byte_range = {
                            'start': sub_range_start,
                            'end': sub_range_start + int(splitted_byte_range[0]),
                        }

                    fragments.append({
                        'frag_index': frag_index,
                        'url': frag_url,
                        'decrypt_info': decrypt_info,
                        'byte_range': byte_range,
                        'media_sequence': media_sequence
                    })
                    media_sequence += 1

                elif line.startswith('#EXT-X-KEY'):
                    decrypt_url = decrypt_info.get('URI')
                    decrypt_info = parse_m3u8_attributes(line[11:])
                    if decrypt_info['METHOD'] == 'AES-128':
                        if 'IV' in decrypt_info:
                            decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:].zfill(32))
                        if not re.match(r'^https?://', decrypt_info['URI']):
                            decrypt_info['URI'] = urllib.parse.urljoin(
                                man_url, decrypt_info['URI'])
                        if extra_query:
                            decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query)
                        if decrypt_url != decrypt_info['URI']:
                            decrypt_info['KEY'] = None

                elif line.startswith('#EXT-X-MEDIA-SEQUENCE'):
                    media_sequence = int(line[22:])
                elif line.startswith('#EXT-X-BYTERANGE'):
                    splitted_byte_range = line[17:].split('@')
                    sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end']
                    byte_range = {
                        'start': sub_range_start,
                        'end': sub_range_start + int(splitted_byte_range[0]),
                    }
                elif is_ad_fragment_start(line):
                    ad_frag_next = True
                elif is_ad_fragment_end(line):
                    ad_frag_next = False
                elif line.startswith('#EXT-X-DISCONTINUITY'):
                    discontinuity_count += 1
                i += 1

        # We only download the first fragment during the test
        if self.params.get('test', False):
            fragments = [fragments[0] if fragments else None]

        if real_downloader:
            info_dict['fragments'] = fragments
            fd = real_downloader(self.ydl, self.params)
            # TODO: Make progress updates work without hooking twice
            # for ph in self._progress_hooks:
            #     fd.add_progress_hook(ph)
            return fd.real_download(filename, info_dict)

        if is_webvtt:
            def pack_fragment(frag_content, frag_index):
                output = io.StringIO()
                adjust = 0
                overflow = False
                mpegts_last = None
                for block in webvtt.parse_fragment(frag_content):
                    if isinstance(block, webvtt.CueBlock):
                        extra_state['webvtt_mpegts_last'] = mpegts_last
                        if overflow:
                            extra_state['webvtt_mpegts_adjust'] += 1
                            overflow = False
                        block.start += adjust
                        block.end += adjust

                        dedup_window = extra_state.setdefault('webvtt_dedup_window', [])

                        ready = []

                        i = 0
                        is_new = True
                        while i < len(dedup_window):
                            wcue = dedup_window[i]
                            wblock = webvtt.CueBlock.from_json(wcue)
                            i += 1
                            if wblock.hinges(block):
                                wcue['end'] = block.end
                                is_new = False
                                continue
                            if wblock == block:
                                is_new = False
                                continue
                            if wblock.end > block.start:
                                continue
                            ready.append(wblock)
                            i -= 1
                            del dedup_window[i]

                        if is_new:
                            dedup_window.append(block.as_json)
                        for block in ready:
                            block.write_into(output)

                        # we only emit cues once they fall out of the duplicate window
                        continue
                    elif isinstance(block, webvtt.Magic):
                        # take care of MPEG PES timestamp overflow
                        if block.mpegts is None:
                            block.mpegts = 0
                        extra_state.setdefault('webvtt_mpegts_adjust', 0)
                        block.mpegts += extra_state['webvtt_mpegts_adjust'] << 33
                        if block.mpegts < extra_state.get('webvtt_mpegts_last', 0):
                            overflow = True
                            block.mpegts += 1 << 33
                        mpegts_last = block.mpegts

                        if frag_index == 1:
                            extra_state['webvtt_mpegts'] = block.mpegts or 0
                            extra_state['webvtt_local'] = block.local or 0
                            # XXX: block.local = block.mpegts = None ?
                        else:
                            if block.mpegts is not None and block.local is not None:
                                adjust = (
                                    (block.mpegts - extra_state.get('webvtt_mpegts', 0))
                                    - (block.local - extra_state.get('webvtt_local', 0))
                                )
                            continue
                    elif isinstance(block, webvtt.HeaderBlock):
                        if frag_index != 1:
                            # XXX: this should probably be silent as well
                            # or verify that all segments contain the same data
                            self.report_warning(bug_reports_message(
                                'Discarding a %s block found in the middle of the stream; '
                                'if the subtitles display incorrectly,'
                                % (type(block).__name__)))
                            continue
                    block.write_into(output)

                return output.getvalue().encode()

            def fin_fragments():
                dedup_window = extra_state.get('webvtt_dedup_window')
                if not dedup_window:
                    return b''

                output = io.StringIO()
                for cue in dedup_window:
                    webvtt.CueBlock.from_json(cue).write_into(output)

                return output.getvalue().encode()

            self.download_and_append_fragments(
                ctx, fragments, info_dict, pack_func=pack_fragment, finish_func=fin_fragments)
        else:
            return self.download_and_append_fragments(ctx, fragments, info_dict)
Commit	Line	Data
e154c651	1	import binascii
f8271158	2	import io
f8271158	3	import re
14f25df2	4	import urllib.parse
3bc2ddcc	5
c487cf00	6	from . import get_suitable_downloader
0d66bd0e	7	from .external import FFmpegFD
f8271158	8	from .fragment import FragmentFD
4a2f19ab	9	from .. import webvtt
9b8ee23b	10	from ..dependencies import Cryptodome_AES
f8271158	11	from ..utils import bug_reports_message, parse_m3u8_attributes, update_url_query
3bc2ddcc JMF	12
3bc2ddcc JMF	13
12b84ac8	14	class HlsFD(FragmentFD):
0a473f2f	15	"""
0a473f2f	16	Download segments in a m3u8 manifest. External downloaders can take over
52a8a1e1	17	the fragment downloads by supporting the 'm3u8_frag_urls' protocol and
0a473f2f	18	re-defining 'supports_manifest' function
0a473f2f	19	"""
f0b5d6af	20
f9a5affa S	21	FD_NAME = 'hlsnative'
f9a5affa S	22
0d66bd0e	23	@staticmethod
edf65256	24	def can_download(manifest, info_dict, allow_unplayable_formats=False):
63ad4d43	25	UNSUPPORTED_FEATURES = [
f5974637	26	# r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2]
1e236d7e	27
c15c47d1 S	28	# Live streams heuristic does not always work (e.g. geo restricted to Germany
c15c47d1 S	29	# http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0)
2937590e	30	# r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', # live streams [3]
1e236d7e S	31
1e236d7e S	32	# This heuristic also is not correct since segments may not be appended as well.
633b444f S	33	# Twitch vods of finished streams have EXT-X-PLAYLIST-TYPE:EVENT despite
633b444f S	34	# no segments will definitely be appended to the end of the playlist.
1e236d7e	35	# r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of
51c4d85c	36	# # event media playlists [4]
b1bb77d7	37	# r'#EXT-X-MAP:', # media initialization [5]
0d66bd0e S	38	# 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4
	39	# 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2
	40	# 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2
6104cc29	41	# 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5
29f7c58a	42	# 5. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.5
63ad4d43	43	]
	44	if not allow_unplayable_formats:
	45	UNSUPPORTED_FEATURES += [
	46	r'#EXT-X-KEY:METHOD=(?!NONE\|AES-128)', # encrypted streams [1]
	47	]
0a473f2f	48
	49	def check_results():
	50	yield not info_dict.get('is_live')
0a473f2f	51	for feature in UNSUPPORTED_FEATURES:
	52	yield not re.search(feature, manifest)
	53	return all(check_results())
0d66bd0e	54
f0b5d6af	55	def real_download(self, filename, info_dict):
f9a5affa S	56	man_url = info_dict['url']
f9a5affa S	57	self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
69035555	58
c5a49ff0 S	59	urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
	60	man_url = urlh.geturl()
	61	s = urlh.read().decode('utf-8', 'ignore')
0d66bd0e	62
7687c8ac	63	can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None
bbae4377	64	if can_download:
	65	has_ffmpeg = FFmpegFD.available()
	66	no_crypto = not Cryptodome_AES and '#EXT-X-KEY:METHOD=AES-128' in s
	67	if no_crypto and has_ffmpeg:
49e7e9c3	68	can_download, message = False, 'The stream has AES-128 encryption and pycryptodomex is not available'
bbae4377	69	elif no_crypto:
49e7e9c3	70	message = ('The stream has AES-128 encryption and neither ffmpeg nor pycryptodomex are available; '
7687c8ac	71	'Decryption will be performed natively, but will be extremely slow')
ae61d108	72	elif info_dict.get('extractor_key') == 'Generic' and re.search(r'(?m)#EXT-X-MEDIA-SEQUENCE:(?!0$)', s):
bbae4377	73	install_ffmpeg = '' if has_ffmpeg else 'install ffmpeg and '
	74	message = ('Live HLS streams are not supported by the native downloader. If this is a livestream, '
	75	f'please {install_ffmpeg}add "--downloader ffmpeg --hls-use-mpegts" to your command')
7687c8ac	76	if not can_download:
6b993ca7	77	has_drm = re.search('\|'.join([
	78	r'#EXT-X-FAXS-CM:', # Adobe Flash Access
	79	r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', # Apple FairPlay
	80	]), s)
	81	if has_drm and not self.params.get('allow_unplayable_formats'):
	82	self.report_error(
	83	'This video is DRM protected; Try selecting another format with --format or '
	84	'add --check-formats to automatically fallback to the next best format')
	85	return False
7687c8ac	86	message = message or 'Unsupported features have been detected'
2bfaf89b	87	fd = FFmpegFD(self.ydl, self.params)
7687c8ac	88	self.report_warning(f'{message}; extraction will be delegated to {fd.get_basename()}')
2bfaf89b	89	return fd.real_download(filename, info_dict)
7687c8ac	90	elif message:
7687c8ac	91	self.report_warning(message)
0d66bd0e	92
5dcd8e1d	93	is_webvtt = info_dict['ext'] == 'vtt'
	94	if is_webvtt:
	95	real_downloader = None # Packing the fragments is not currently supported for external downloader
	96	else:
96fccc10	97	real_downloader = get_suitable_downloader(
a46a815b	98	info_dict, self.params, None, protocol='m3u8_frag_urls', to_stdout=(filename == '-'))
0a473f2f	99	if real_downloader and not real_downloader.supports_manifest(s):
0a473f2f	100	real_downloader = None
beb4b92a	101	if real_downloader:
86e5f3ed	102	self.to_screen(f'[{self.FD_NAME}] Fragment downloads will be delegated to {real_downloader.get_basename()}')
0a473f2f	103
f1ab3b7d	104	def is_ad_fragment_start(s):
3089bc74 S	105	return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s
3089bc74 S	106	or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad'))
74c42d9e	107
f1ab3b7d	108	def is_ad_fragment_end(s):
3089bc74 S	109	return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s
3089bc74 S	110	or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment'))
f1ab3b7d	111
d7009caa	112	fragments = []
5219cb3e	113
74c42d9e S	114	media_frags = 0
	115	ad_frags = 0
	116	ad_frag_next = False
f0b5d6af PH	117	for line in s.splitlines():
f0b5d6af PH	118	line = line.strip()
74c42d9e S	119	if not line:
	120	continue
	121	if line.startswith('#'):
f1ab3b7d	122	if is_ad_fragment_start(line):
a9ee4f6e	123	ad_frag_next = True
f1ab3b7d RA	124	elif is_ad_fragment_end(line):
f1ab3b7d RA	125	ad_frag_next = False
74c42d9e S	126	continue
74c42d9e S	127	if ad_frag_next:
f1ab3b7d	128	ad_frags += 1
74c42d9e S	129	continue
74c42d9e S	130	media_frags += 1
f0b5d6af	131
f9a5affa	132	ctx = {
f0b5d6af	133	'filename': filename,
74c42d9e S	134	'total_frags': media_frags,
74c42d9e S	135	'ad_frags': ad_frags,
f9a5affa S	136	}
f9a5affa S	137
5219cb3e	138	if real_downloader:
	139	self._prepare_external_frag_download(ctx)
	140	else:
3ba7740d	141	self._prepare_and_start_frag_download(ctx, info_dict)
f9a5affa	142
4a2f19ab F	143	extra_state = ctx.setdefault('extra_state', {})
4a2f19ab F	144
310c2ed2	145	format_index = info_dict.get('format_index')
b8079a40	146	extra_query = None
aaf44a2f	147	extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url')
b8079a40	148	if extra_param_to_segment_url:
14f25df2	149	extra_query = urllib.parse.parse_qs(extra_param_to_segment_url)
e154c651	150	i = 0
	151	media_sequence = 0
	152	decrypt_info = {'METHOD': 'NONE'}
f5974637	153	byte_range = {}
310c2ed2	154	discontinuity_count = 0
75a24854	155	frag_index = 0
74c42d9e	156	ad_frag_next = False
e154c651	157	for line in s.splitlines():
	158	line = line.strip()
	159	if line:
	160	if not line.startswith('#'):
310c2ed2	161	if format_index and discontinuity_count != format_index:
310c2ed2	162	continue
74c42d9e	163	if ad_frag_next:
74c42d9e	164	continue
75a24854	165	frag_index += 1
3e0304fe	166	if frag_index <= ctx['fragment_index']:
75a24854	167	continue
e154c651	168	frag_url = (
	169	line
	170	if re.match(r'^https?://', line)
14f25df2	171	else urllib.parse.urljoin(man_url, line))
b8079a40 RA	172	if extra_query:
b8079a40 RA	173	frag_url = update_url_query(frag_url, extra_query)
5219cb3e	174
4cf1e5d2	175	fragments.append({
	176	'frag_index': frag_index,
	177	'url': frag_url,
	178	'decrypt_info': decrypt_info,
	179	'byte_range': byte_range,
	180	'media_sequence': media_sequence,
	181	})
d9d8b857	182	media_sequence += 1
5219cb3e	183
b1bb77d7	184	elif line.startswith('#EXT-X-MAP'):
310c2ed2	185	if format_index and discontinuity_count != format_index:
310c2ed2	186	continue
b1bb77d7	187	if frag_index > 0:
b1bb77d7	188	self.report_error(
beb4b92a	189	'Initialization fragment found after media fragments, unable to download')
b1bb77d7	190	return False
	191	frag_index += 1
	192	map_info = parse_m3u8_attributes(line[11:])
	193	frag_url = (
	194	map_info.get('URI')
	195	if re.match(r'^https?://', map_info.get('URI'))
14f25df2	196	else urllib.parse.urljoin(man_url, map_info.get('URI')))
b1bb77d7	197	if extra_query:
b1bb77d7	198	frag_url = update_url_query(frag_url, extra_query)
4cf1e5d2	199
e4fa34a1	200	if map_info.get('BYTERANGE'):
	201	splitted_byte_range = map_info.get('BYTERANGE').split('@')
	202	sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end']
	203	byte_range = {
	204	'start': sub_range_start,
	205	'end': sub_range_start + int(splitted_byte_range[0]),
	206	}
	207
4cf1e5d2	208	fragments.append({
	209	'frag_index': frag_index,
	210	'url': frag_url,
	211	'decrypt_info': decrypt_info,
	212	'byte_range': byte_range,
	213	'media_sequence': media_sequence
	214	})
d9d8b857	215	media_sequence += 1
b1bb77d7	216
b1bb77d7	217	elif line.startswith('#EXT-X-KEY'):
	218	decrypt_url = decrypt_info.get('URI')
	219	decrypt_info = parse_m3u8_attributes(line[11:])
	220	if decrypt_info['METHOD'] == 'AES-128':
	221	if 'IV' in decrypt_info:
	222	decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:].zfill(32))
	223	if not re.match(r'^https?://', decrypt_info['URI']):
14f25df2	224	decrypt_info['URI'] = urllib.parse.urljoin(
b1bb77d7	225	man_url, decrypt_info['URI'])
	226	if extra_query:
	227	decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query)
	228	if decrypt_url != decrypt_info['URI']:
	229	decrypt_info['KEY'] = None
b1bb77d7	230
	231	elif line.startswith('#EXT-X-MEDIA-SEQUENCE'):
	232	media_sequence = int(line[22:])
	233	elif line.startswith('#EXT-X-BYTERANGE'):
	234	splitted_byte_range = line[17:].split('@')
	235	sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end']
	236	byte_range = {
	237	'start': sub_range_start,
	238	'end': sub_range_start + int(splitted_byte_range[0]),
	239	}
	240	elif is_ad_fragment_start(line):
	241	ad_frag_next = True
	242	elif is_ad_fragment_end(line):
	243	ad_frag_next = False
310c2ed2	244	elif line.startswith('#EXT-X-DISCONTINUITY'):
310c2ed2	245	discontinuity_count += 1
4cf1e5d2	246	i += 1
b1bb77d7	247
4cf1e5d2	248	# We only download the first fragment during the test
4c7853de	249	if self.params.get('test', False):
4cf1e5d2	250	fragments = [fragments[0] if fragments else None]
f9a5affa	251
5219cb3e	252	if real_downloader:
03b4de72	253	info_dict['fragments'] = fragments
5219cb3e	254	fd = real_downloader(self.ydl, self.params)
	255	# TODO: Make progress updates work without hooking twice
	256	# for ph in self._progress_hooks:
	257	# fd.add_progress_hook(ph)
03b4de72	258	return fd.real_download(filename, info_dict)
333217f4	259
bd4d1ea3	260	if is_webvtt:
	261	def pack_fragment(frag_content, frag_index):
	262	output = io.StringIO()
	263	adjust = 0
7a6742b5 F	264	overflow = False
7a6742b5 F	265	mpegts_last = None
bd4d1ea3	266	for block in webvtt.parse_fragment(frag_content):
bd4d1ea3	267	if isinstance(block, webvtt.CueBlock):
7a6742b5 F	268	extra_state['webvtt_mpegts_last'] = mpegts_last
	269	if overflow:
	270	extra_state['webvtt_mpegts_adjust'] += 1
	271	overflow = False
bd4d1ea3	272	block.start += adjust
	273	block.end += adjust
	274
	275	dedup_window = extra_state.setdefault('webvtt_dedup_window', [])
bd4d1ea3	276
25a3f4f5 F	277	ready = []
25a3f4f5 F	278
bd4d1ea3	279	i = 0
25a3f4f5	280	is_new = True
bd4d1ea3	281	while i < len(dedup_window):
25a3f4f5 F	282	wcue = dedup_window[i]
	283	wblock = webvtt.CueBlock.from_json(wcue)
	284	i += 1
	285	if wblock.hinges(block):
	286	wcue['end'] = block.end
	287	is_new = False
	288	continue
	289	if wblock == block:
	290	is_new = False
	291	continue
	292	if wblock.end > block.start:
4a2f19ab	293	continue
25a3f4f5 F	294	ready.append(wblock)
25a3f4f5 F	295	i -= 1
bd4d1ea3	296	del dedup_window[i]
bd4d1ea3	297
25a3f4f5 F	298	if is_new:
	299	dedup_window.append(block.as_json)
	300	for block in ready:
	301	block.write_into(output)
bd4d1ea3	302
25a3f4f5 F	303	# we only emit cues once they fall out of the duplicate window
25a3f4f5 F	304	continue
bd4d1ea3	305	elif isinstance(block, webvtt.Magic):
	306	# take care of MPEG PES timestamp overflow
	307	if block.mpegts is None:
	308	block.mpegts = 0
	309	extra_state.setdefault('webvtt_mpegts_adjust', 0)
	310	block.mpegts += extra_state['webvtt_mpegts_adjust'] << 33
	311	if block.mpegts < extra_state.get('webvtt_mpegts_last', 0):
7a6742b5	312	overflow = True
bd4d1ea3	313	block.mpegts += 1 << 33
7a6742b5	314	mpegts_last = block.mpegts
bd4d1ea3	315
	316	if frag_index == 1:
	317	extra_state['webvtt_mpegts'] = block.mpegts or 0
	318	extra_state['webvtt_local'] = block.local or 0
	319	# XXX: block.local = block.mpegts = None ?
	320	else:
	321	if block.mpegts is not None and block.local is not None:
	322	adjust = (
	323	(block.mpegts - extra_state.get('webvtt_mpegts', 0))
	324	- (block.local - extra_state.get('webvtt_local', 0))
	325	)
	326	continue
	327	elif isinstance(block, webvtt.HeaderBlock):
	328	if frag_index != 1:
	329	# XXX: this should probably be silent as well
	330	# or verify that all segments contain the same data
	331	self.report_warning(bug_reports_message(
	332	'Discarding a %s block found in the middle of the stream; '
	333	'if the subtitles display incorrectly,'
	334	% (type(block).__name__)))
	335	continue
	336	block.write_into(output)
	337
0f06bcd7	338	return output.getvalue().encode()
25a3f4f5 F	339
	340	def fin_fragments():
	341	dedup_window = extra_state.get('webvtt_dedup_window')
	342	if not dedup_window:
	343	return b''
	344
	345	output = io.StringIO()
	346	for cue in dedup_window:
	347	webvtt.CueBlock.from_json(cue).write_into(output)
	348
0f06bcd7	349	return output.getvalue().encode()
25a3f4f5 F	350
	351	self.download_and_append_fragments(
	352	ctx, fragments, info_dict, pack_func=pack_fragment, finish_func=fin_fragments)
bd4d1ea3	353	else:
25a3f4f5	354	return self.download_and_append_fragments(ctx, fragments, info_dict)