[yt-dlp.git] / yt_dlp / downloader / dash.py

from __future__ import unicode_literals

import errno
try:
    import concurrent.futures
    can_threaded_download = True
except ImportError:
    can_threaded_download = False

from ..downloader import _get_real_downloader
from .fragment import FragmentFD

from ..compat import compat_urllib_error
from ..utils import (
    DownloadError,
    sanitize_open,
    urljoin,
)


class DashSegmentsFD(FragmentFD):
    """
    Download segments in a DASH manifest. External downloaders can take over
    the fragment downloads by supporting the 'dash_frag_urls' protocol
    """

    FD_NAME = 'dashsegments'

    def real_download(self, filename, info_dict):
        fragment_base_url = info_dict.get('fragment_base_url')
        fragments = info_dict['fragments'][:1] if self.params.get(
            'test', False) else info_dict['fragments']

        real_downloader = _get_real_downloader(info_dict, 'dash_frag_urls', self.params, None)

        ctx = {
            'filename': filename,
            'total_frags': len(fragments),
        }

        if real_downloader:
            self._prepare_external_frag_download(ctx)
        else:
            self._prepare_and_start_frag_download(ctx)

        fragment_retries = self.params.get('fragment_retries', 0)
        skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)

        fragments_to_download = []
        frag_index = 0
        for i, fragment in enumerate(fragments):
            frag_index += 1
            if frag_index <= ctx['fragment_index']:
                continue
            fragment_url = fragment.get('url')
            if not fragment_url:
                assert fragment_base_url
                fragment_url = urljoin(fragment_base_url, fragment['path'])

            fragments_to_download.append({
                'frag_index': frag_index,
                'index': i,
                'url': fragment_url,
            })

        if real_downloader:
            self.to_screen(
                '[%s] Fragment downloads will be delegated to %s' % (self.FD_NAME, real_downloader.get_basename()))
            info_copy = info_dict.copy()
            info_copy['fragments'] = fragments_to_download
            fd = real_downloader(self.ydl, self.params)
            # TODO: Make progress updates work without hooking twice
            # for ph in self._progress_hooks:
            #     fd.add_progress_hook(ph)
            success = fd.real_download(filename, info_copy)
            if not success:
                return False
        else:
            def download_fragment(fragment):
                i = fragment['index']
                frag_index = fragment['frag_index']
                fragment_url = fragment['url']

                ctx['fragment_index'] = frag_index

                # In DASH, the first segment contains necessary headers to
                # generate a valid MP4 file, so always abort for the first segment
                fatal = i == 0 or not skip_unavailable_fragments
                count = 0
                while count <= fragment_retries:
                    try:
                        success, frag_content = self._download_fragment(ctx, fragment_url, info_dict)
                        if not success:
                            return False, frag_index
                        break
                    except compat_urllib_error.HTTPError as err:
                        # YouTube may often return 404 HTTP error for a fragment causing the
                        # whole download to fail. However if the same fragment is immediately
                        # retried with the same request data this usually succeeds (1-2 attempts
                        # is usually enough) thus allowing to download the whole file successfully.
                        # To be future-proof we will retry all fragments that fail with any
                        # HTTP error.
                        count += 1
                        if count <= fragment_retries:
                            self.report_retry_fragment(err, frag_index, count, fragment_retries)
                    except DownloadError:
                        # Don't retry fragment if error occurred during HTTP downloading
                        # itself since it has own retry settings
                        if not fatal:
                            break
                        raise

                if count > fragment_retries:
                    if not fatal:
                        return False, frag_index
                    ctx['dest_stream'].close()
                    self.report_error('Giving up after %s fragment retries' % fragment_retries)
                    return False, frag_index

                return frag_content, frag_index

            def append_fragment(frag_content, frag_index):
                fatal = frag_index == 1 or not skip_unavailable_fragments
                if frag_content:
                    fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], frag_index)
                    try:
                        file, frag_sanitized = sanitize_open(fragment_filename, 'rb')
                        ctx['fragment_filename_sanitized'] = frag_sanitized
                        file.close()
                        self._append_fragment(ctx, frag_content)
                        return True
                    except EnvironmentError as ose:
                        if ose.errno != errno.ENOENT:
                            raise
                        # FileNotFoundError
                        if not fatal:
                            self.report_skip_fragment(frag_index)
                            return True
                        else:
                            ctx['dest_stream'].close()
                            self.report_error(
                                'fragment %s not found, unable to continue' % frag_index)
                            return False
                else:
                    if not fatal:
                        self.report_skip_fragment(frag_index)
                        return True
                    else:
                        ctx['dest_stream'].close()
                        self.report_error(
                            'fragment %s not found, unable to continue' % frag_index)
                        return False

            max_workers = self.params.get('concurrent_fragment_downloads', 1)
            if can_threaded_download and max_workers > 1:
                self.report_warning('The download speed shown is only of one thread. This is a known issue')
                with concurrent.futures.ThreadPoolExecutor(max_workers) as pool:
                    futures = [pool.submit(download_fragment, fragment) for fragment in fragments_to_download]
                    # timeout must be 0 to return instantly
                    done, not_done = concurrent.futures.wait(futures, timeout=0)
                    try:
                        while not_done:
                            # Check every 1 second for KeyboardInterrupt
                            freshly_done, not_done = concurrent.futures.wait(not_done, timeout=1)
                            done |= freshly_done
                    except KeyboardInterrupt:
                        for future in not_done:
                            future.cancel()
                        # timeout must be none to cancel
                        concurrent.futures.wait(not_done, timeout=None)
                        raise KeyboardInterrupt
                results = [future.result() for future in futures]

                for frag_content, frag_index in results:
                    result = append_fragment(frag_content, frag_index)
                    if not result:
                        return False
            else:
                for fragment in fragments_to_download:
                    frag_content, frag_index = download_fragment(fragment)
                    result = append_fragment(frag_content, frag_index)
                    if not result:
                        return False

            self._finish_frag_download(ctx)
        return True
Commit	Line	Data
6800d337	1	from __future__ import unicode_literals
6800d337	2
88728713	3	import errno
4cf1e5d2	4	try:
	5	import concurrent.futures
	6	can_threaded_download = True
	7	except ImportError:
	8	can_threaded_download = False
	9
5219cb3e	10	from ..downloader import _get_real_downloader
c43fe026	11	from .fragment import FragmentFD
5219cb3e	12
e33baba0	13	from ..compat import compat_urllib_error
e06632e3 S	14	from ..utils import (
e06632e3 S	15	DownloadError,
4cf1e5d2	16	sanitize_open,
e06632e3 S	17	urljoin,
e06632e3 S	18	)
453a1617	19
6800d337	20
c43fe026	21	class DashSegmentsFD(FragmentFD):
6800d337	22	"""
0a473f2f	23	Download segments in a DASH manifest. External downloaders can take over
52a8a1e1	24	the fragment downloads by supporting the 'dash_frag_urls' protocol
6800d337	25	"""
6800d337	26
c43fe026	27	FD_NAME = 'dashsegments'
5bf3276e	28
c43fe026	29	def real_download(self, filename, info_dict):
1141e910 S	30	fragment_base_url = info_dict.get('fragment_base_url')
1141e910 S	31	fragments = info_dict['fragments'][:1] if self.params.get(
86f4d14f	32	'test', False) else info_dict['fragments']
5bf3276e	33
52a8a1e1	34	real_downloader = _get_real_downloader(info_dict, 'dash_frag_urls', self.params, None)
5219cb3e	35
c43fe026	36	ctx = {
c43fe026	37	'filename': filename,
1141e910	38	'total_frags': len(fragments),
c43fe026	39	}
5bf3276e	40
5219cb3e	41	if real_downloader:
	42	self._prepare_external_frag_download(ctx)
	43	else:
	44	self._prepare_and_start_frag_download(ctx)
6800d337	45
e33baba0	46	fragment_retries = self.params.get('fragment_retries', 0)
25afc2a7	47	skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
e33baba0	48
0a473f2f	49	fragments_to_download = []
75a24854	50	frag_index = 0
1141e910	51	for i, fragment in enumerate(fragments):
75a24854	52	frag_index += 1
3e0304fe	53	if frag_index <= ctx['fragment_index']:
75a24854	54	continue
5219cb3e	55	fragment_url = fragment.get('url')
	56	if not fragment_url:
	57	assert fragment_base_url
	58	fragment_url = urljoin(fragment_base_url, fragment['path'])
	59
4cf1e5d2	60	fragments_to_download.append({
	61	'frag_index': frag_index,
	62	'index': i,
	63	'url': fragment_url,
	64	})
c43fe026	65
5219cb3e	66	if real_downloader:
beb4b92a	67	self.to_screen(
beb4b92a	68	'[%s] Fragment downloads will be delegated to %s' % (self.FD_NAME, real_downloader.get_basename()))
5219cb3e	69	info_copy = info_dict.copy()
0a473f2f	70	info_copy['fragments'] = fragments_to_download
5219cb3e	71	fd = real_downloader(self.ydl, self.params)
	72	# TODO: Make progress updates work without hooking twice
	73	# for ph in self._progress_hooks:
	74	# fd.add_progress_hook(ph)
	75	success = fd.real_download(filename, info_copy)
	76	if not success:
	77	return False
	78	else:
4cf1e5d2	79	def download_fragment(fragment):
	80	i = fragment['index']
	81	frag_index = fragment['frag_index']
	82	fragment_url = fragment['url']
	83
	84	ctx['fragment_index'] = frag_index
	85
	86	# In DASH, the first segment contains necessary headers to
	87	# generate a valid MP4 file, so always abort for the first segment
	88	fatal = i == 0 or not skip_unavailable_fragments
	89	count = 0
	90	while count <= fragment_retries:
	91	try:
	92	success, frag_content = self._download_fragment(ctx, fragment_url, info_dict)
	93	if not success:
	94	return False, frag_index
	95	break
	96	except compat_urllib_error.HTTPError as err:
	97	# YouTube may often return 404 HTTP error for a fragment causing the
	98	# whole download to fail. However if the same fragment is immediately
	99	# retried with the same request data this usually succeeds (1-2 attempts
	100	# is usually enough) thus allowing to download the whole file successfully.
	101	# To be future-proof we will retry all fragments that fail with any
	102	# HTTP error.
	103	count += 1
	104	if count <= fragment_retries:
	105	self.report_retry_fragment(err, frag_index, count, fragment_retries)
	106	except DownloadError:
	107	# Don't retry fragment if error occurred during HTTP downloading
	108	# itself since it has own retry settings
	109	if not fatal:
	110	break
	111	raise
	112
	113	if count > fragment_retries:
	114	if not fatal:
	115	return False, frag_index
6ef6bcbd	116	ctx['dest_stream'].close()
beb4b92a	117	self.report_error('Giving up after %s fragment retries' % fragment_retries)
4cf1e5d2	118	return False, frag_index
	119
	120	return frag_content, frag_index
	121
	122	def append_fragment(frag_content, frag_index):
000ee7ef	123	fatal = frag_index == 1 or not skip_unavailable_fragments
4cf1e5d2	124	if frag_content:
	125	fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], frag_index)
	126	try:
	127	file, frag_sanitized = sanitize_open(fragment_filename, 'rb')
	128	ctx['fragment_filename_sanitized'] = frag_sanitized
	129	file.close()
	130	self._append_fragment(ctx, frag_content)
	131	return True
88728713	132	except EnvironmentError as ose:
	133	if ose.errno != errno.ENOENT:
	134	raise
	135	# FileNotFoundError
000ee7ef	136	if not fatal:
4cf1e5d2	137	self.report_skip_fragment(frag_index)
	138	return True
	139	else:
6ef6bcbd	140	ctx['dest_stream'].close()
4cf1e5d2	141	self.report_error(
	142	'fragment %s not found, unable to continue' % frag_index)
	143	return False
	144	else:
000ee7ef	145	if not fatal:
4cf1e5d2	146	self.report_skip_fragment(frag_index)
	147	return True
	148	else:
6ef6bcbd	149	ctx['dest_stream'].close()
4cf1e5d2	150	self.report_error(
	151	'fragment %s not found, unable to continue' % frag_index)
	152	return False
	153
	154	max_workers = self.params.get('concurrent_fragment_downloads', 1)
	155	if can_threaded_download and max_workers > 1:
	156	self.report_warning('The download speed shown is only of one thread. This is a known issue')
	157	with concurrent.futures.ThreadPoolExecutor(max_workers) as pool:
	158	futures = [pool.submit(download_fragment, fragment) for fragment in fragments_to_download]
	159	# timeout must be 0 to return instantly
	160	done, not_done = concurrent.futures.wait(futures, timeout=0)
	161	try:
	162	while not_done:
	163	# Check every 1 second for KeyboardInterrupt
	164	freshly_done, not_done = concurrent.futures.wait(not_done, timeout=1)
	165	done \|= freshly_done
	166	except KeyboardInterrupt:
	167	for future in not_done:
	168	future.cancel()
	169	# timeout must be none to cancel
	170	concurrent.futures.wait(not_done, timeout=None)
	171	raise KeyboardInterrupt
	172	results = [future.result() for future in futures]
	173
	174	for frag_content, frag_index in results:
	175	result = append_fragment(frag_content, frag_index)
	176	if not result:
	177	return False
	178	else:
	179	for fragment in fragments_to_download:
	180	frag_content, frag_index = download_fragment(fragment)
	181	result = append_fragment(frag_content, frag_index)
	182	if not result:
	183	return False
	184
5219cb3e	185	self._finish_frag_download(ctx)
6800d337	186	return True