[yt-dlp.git] / yt_dlp / downloader / dash.py

from __future__ import unicode_literals

try:
    import concurrent.futures
    can_threaded_download = True
except ImportError:
    can_threaded_download = False

from ..downloader import _get_real_downloader
from .fragment import FragmentFD

from ..compat import compat_urllib_error
from ..utils import (
    DownloadError,
    sanitize_open,
    urljoin,
)


class DashSegmentsFD(FragmentFD):
    """
    Download segments in a DASH manifest. External downloaders can take over
    the fragment downloads by supporting the 'dash_frag_urls' protocol
    """

    FD_NAME = 'dashsegments'

    def real_download(self, filename, info_dict):
        fragment_base_url = info_dict.get('fragment_base_url')
        fragments = info_dict['fragments'][:1] if self.params.get(
            'test', False) else info_dict['fragments']

        real_downloader = _get_real_downloader(info_dict, 'dash_frag_urls', self.params, None)

        ctx = {
            'filename': filename,
            'total_frags': len(fragments),
        }

        if real_downloader:
            self._prepare_external_frag_download(ctx)
        else:
            self._prepare_and_start_frag_download(ctx)

        fragment_retries = self.params.get('fragment_retries', 0)
        skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)

        fragments_to_download = []
        frag_index = 0
        for i, fragment in enumerate(fragments):
            frag_index += 1
            if frag_index <= ctx['fragment_index']:
                continue
            fragment_url = fragment.get('url')
            if not fragment_url:
                assert fragment_base_url
                fragment_url = urljoin(fragment_base_url, fragment['path'])

            fragments_to_download.append({
                'frag_index': frag_index,
                'index': i,
                'url': fragment_url,
            })

        if real_downloader:
            self.to_screen(
                '[%s] Fragment downloads will be delegated to %s' % (self.FD_NAME, real_downloader.get_basename()))
            info_copy = info_dict.copy()
            info_copy['fragments'] = fragments_to_download
            fd = real_downloader(self.ydl, self.params)
            # TODO: Make progress updates work without hooking twice
            # for ph in self._progress_hooks:
            #     fd.add_progress_hook(ph)
            success = fd.real_download(filename, info_copy)
            if not success:
                return False
        else:
            def download_fragment(fragment):
                i = fragment['index']
                frag_index = fragment['frag_index']
                fragment_url = fragment['url']

                ctx['fragment_index'] = frag_index

                # In DASH, the first segment contains necessary headers to
                # generate a valid MP4 file, so always abort for the first segment
                fatal = i == 0 or not skip_unavailable_fragments
                count = 0
                while count <= fragment_retries:
                    try:
                        success, frag_content = self._download_fragment(ctx, fragment_url, info_dict)
                        if not success:
                            return False, frag_index
                        break
                    except compat_urllib_error.HTTPError as err:
                        # YouTube may often return 404 HTTP error for a fragment causing the
                        # whole download to fail. However if the same fragment is immediately
                        # retried with the same request data this usually succeeds (1-2 attempts
                        # is usually enough) thus allowing to download the whole file successfully.
                        # To be future-proof we will retry all fragments that fail with any
                        # HTTP error.
                        count += 1
                        if count <= fragment_retries:
                            self.report_retry_fragment(err, frag_index, count, fragment_retries)
                    except DownloadError:
                        # Don't retry fragment if error occurred during HTTP downloading
                        # itself since it has own retry settings
                        if not fatal:
                            break
                        raise

                if count > fragment_retries:
                    if not fatal:
                        return False, frag_index
                    self.report_error('Giving up after %s fragment retries' % fragment_retries)
                    return False, frag_index

                return frag_content, frag_index

            def append_fragment(frag_content, frag_index):
                if frag_content:
                    fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], frag_index)
                    try:
                        file, frag_sanitized = sanitize_open(fragment_filename, 'rb')
                        ctx['fragment_filename_sanitized'] = frag_sanitized
                        file.close()
                        self._append_fragment(ctx, frag_content)
                        return True
                    except FileNotFoundError:
                        if skip_unavailable_fragments:
                            self.report_skip_fragment(frag_index)
                            return True
                        else:
                            self.report_error(
                                'fragment %s not found, unable to continue' % frag_index)
                            return False
                else:
                    if skip_unavailable_fragments:
                        self.report_skip_fragment(frag_index)
                        return True
                    else:
                        self.report_error(
                            'fragment %s not found, unable to continue' % frag_index)
                        return False

            max_workers = self.params.get('concurrent_fragment_downloads', 1)
            if can_threaded_download and max_workers > 1:
                self.report_warning('The download speed shown is only of one thread. This is a known issue')
                with concurrent.futures.ThreadPoolExecutor(max_workers) as pool:
                    futures = [pool.submit(download_fragment, fragment) for fragment in fragments_to_download]
                    # timeout must be 0 to return instantly
                    done, not_done = concurrent.futures.wait(futures, timeout=0)
                    try:
                        while not_done:
                            # Check every 1 second for KeyboardInterrupt
                            freshly_done, not_done = concurrent.futures.wait(not_done, timeout=1)
                            done |= freshly_done
                    except KeyboardInterrupt:
                        for future in not_done:
                            future.cancel()
                        # timeout must be none to cancel
                        concurrent.futures.wait(not_done, timeout=None)
                        raise KeyboardInterrupt
                results = [future.result() for future in futures]

                for frag_content, frag_index in results:
                    result = append_fragment(frag_content, frag_index)
                    if not result:
                        return False
            else:
                for fragment in fragments_to_download:
                    frag_content, frag_index = download_fragment(fragment)
                    result = append_fragment(frag_content, frag_index)
                    if not result:
                        return False

            self._finish_frag_download(ctx)
        return True
Commit	Line	Data
6800d337	1	from __future__ import unicode_literals
6800d337	2
4cf1e5d2	3	try:
	4	import concurrent.futures
	5	can_threaded_download = True
	6	except ImportError:
	7	can_threaded_download = False
	8
5219cb3e	9	from ..downloader import _get_real_downloader
c43fe026	10	from .fragment import FragmentFD
5219cb3e	11
e33baba0	12	from ..compat import compat_urllib_error
e06632e3 S	13	from ..utils import (
e06632e3 S	14	DownloadError,
4cf1e5d2	15	sanitize_open,
e06632e3 S	16	urljoin,
e06632e3 S	17	)
453a1617	18
6800d337	19
c43fe026	20	class DashSegmentsFD(FragmentFD):
6800d337	21	"""
0a473f2f	22	Download segments in a DASH manifest. External downloaders can take over
52a8a1e1	23	the fragment downloads by supporting the 'dash_frag_urls' protocol
6800d337	24	"""
6800d337	25
c43fe026	26	FD_NAME = 'dashsegments'
5bf3276e	27
c43fe026	28	def real_download(self, filename, info_dict):
1141e910 S	29	fragment_base_url = info_dict.get('fragment_base_url')
1141e910 S	30	fragments = info_dict['fragments'][:1] if self.params.get(
86f4d14f	31	'test', False) else info_dict['fragments']
5bf3276e	32
52a8a1e1	33	real_downloader = _get_real_downloader(info_dict, 'dash_frag_urls', self.params, None)
5219cb3e	34
c43fe026	35	ctx = {
c43fe026	36	'filename': filename,
1141e910	37	'total_frags': len(fragments),
c43fe026	38	}
5bf3276e	39
5219cb3e	40	if real_downloader:
	41	self._prepare_external_frag_download(ctx)
	42	else:
	43	self._prepare_and_start_frag_download(ctx)
6800d337	44
e33baba0	45	fragment_retries = self.params.get('fragment_retries', 0)
25afc2a7	46	skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
e33baba0	47
0a473f2f	48	fragments_to_download = []
75a24854	49	frag_index = 0
1141e910	50	for i, fragment in enumerate(fragments):
75a24854	51	frag_index += 1
3e0304fe	52	if frag_index <= ctx['fragment_index']:
75a24854	53	continue
5219cb3e	54	fragment_url = fragment.get('url')
	55	if not fragment_url:
	56	assert fragment_base_url
	57	fragment_url = urljoin(fragment_base_url, fragment['path'])
	58
4cf1e5d2	59	fragments_to_download.append({
	60	'frag_index': frag_index,
	61	'index': i,
	62	'url': fragment_url,
	63	})
c43fe026	64
5219cb3e	65	if real_downloader:
beb4b92a	66	self.to_screen(
beb4b92a	67	'[%s] Fragment downloads will be delegated to %s' % (self.FD_NAME, real_downloader.get_basename()))
5219cb3e	68	info_copy = info_dict.copy()
0a473f2f	69	info_copy['fragments'] = fragments_to_download
5219cb3e	70	fd = real_downloader(self.ydl, self.params)
	71	# TODO: Make progress updates work without hooking twice
	72	# for ph in self._progress_hooks:
	73	# fd.add_progress_hook(ph)
	74	success = fd.real_download(filename, info_copy)
	75	if not success:
	76	return False
	77	else:
4cf1e5d2	78	def download_fragment(fragment):
	79	i = fragment['index']
	80	frag_index = fragment['frag_index']
	81	fragment_url = fragment['url']
	82
	83	ctx['fragment_index'] = frag_index
	84
	85	# In DASH, the first segment contains necessary headers to
	86	# generate a valid MP4 file, so always abort for the first segment
	87	fatal = i == 0 or not skip_unavailable_fragments
	88	count = 0
	89	while count <= fragment_retries:
	90	try:
	91	success, frag_content = self._download_fragment(ctx, fragment_url, info_dict)
	92	if not success:
	93	return False, frag_index
	94	break
	95	except compat_urllib_error.HTTPError as err:
	96	# YouTube may often return 404 HTTP error for a fragment causing the
	97	# whole download to fail. However if the same fragment is immediately
	98	# retried with the same request data this usually succeeds (1-2 attempts
	99	# is usually enough) thus allowing to download the whole file successfully.
	100	# To be future-proof we will retry all fragments that fail with any
	101	# HTTP error.
	102	count += 1
	103	if count <= fragment_retries:
	104	self.report_retry_fragment(err, frag_index, count, fragment_retries)
	105	except DownloadError:
	106	# Don't retry fragment if error occurred during HTTP downloading
	107	# itself since it has own retry settings
	108	if not fatal:
	109	break
	110	raise
	111
	112	if count > fragment_retries:
	113	if not fatal:
	114	return False, frag_index
beb4b92a	115	self.report_error('Giving up after %s fragment retries' % fragment_retries)
4cf1e5d2	116	return False, frag_index
	117
	118	return frag_content, frag_index
	119
	120	def append_fragment(frag_content, frag_index):
	121	if frag_content:
	122	fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], frag_index)
	123	try:
	124	file, frag_sanitized = sanitize_open(fragment_filename, 'rb')
	125	ctx['fragment_filename_sanitized'] = frag_sanitized
	126	file.close()
	127	self._append_fragment(ctx, frag_content)
	128	return True
	129	except FileNotFoundError:
	130	if skip_unavailable_fragments:
	131	self.report_skip_fragment(frag_index)
	132	return True
	133	else:
	134	self.report_error(
	135	'fragment %s not found, unable to continue' % frag_index)
	136	return False
	137	else:
	138	if skip_unavailable_fragments:
	139	self.report_skip_fragment(frag_index)
	140	return True
	141	else:
	142	self.report_error(
	143	'fragment %s not found, unable to continue' % frag_index)
	144	return False
	145
	146	max_workers = self.params.get('concurrent_fragment_downloads', 1)
	147	if can_threaded_download and max_workers > 1:
	148	self.report_warning('The download speed shown is only of one thread. This is a known issue')
	149	with concurrent.futures.ThreadPoolExecutor(max_workers) as pool:
	150	futures = [pool.submit(download_fragment, fragment) for fragment in fragments_to_download]
	151	# timeout must be 0 to return instantly
	152	done, not_done = concurrent.futures.wait(futures, timeout=0)
	153	try:
	154	while not_done:
	155	# Check every 1 second for KeyboardInterrupt
	156	freshly_done, not_done = concurrent.futures.wait(not_done, timeout=1)
	157	done \|= freshly_done
	158	except KeyboardInterrupt:
	159	for future in not_done:
	160	future.cancel()
	161	# timeout must be none to cancel
	162	concurrent.futures.wait(not_done, timeout=None)
	163	raise KeyboardInterrupt
	164	results = [future.result() for future in futures]
	165
	166	for frag_content, frag_index in results:
	167	result = append_fragment(frag_content, frag_index)
	168	if not result:
	169	return False
	170	else:
	171	for fragment in fragments_to_download:
	172	frag_content, frag_index = download_fragment(fragment)
	173	result = append_fragment(frag_content, frag_index)
	174	if not result:
	175	return False
	176
5219cb3e	177	self._finish_frag_download(ctx)
6800d337	178	return True