X-Git-Url: https://jfr.im/git/yt-dlp.git/blobdiff_plain/185bf31070b1e8b6845da5ff8b33321017b22157..ac668111128b5f124b4271b3aa4c35f6e71a4749:/yt_dlp/downloader/fragment.py diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index d4f112b0f..3535e0e7d 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -1,30 +1,23 @@ -from __future__ import division, unicode_literals - +import concurrent.futures +import contextlib import http.client import json import math import os +import struct import time - -try: - import concurrent.futures - can_threaded_download = True -except ImportError: - can_threaded_download = False +import urllib.error from .common import FileDownloader from .http import HttpFD -from ..aes import aes_cbc_decrypt_bytes -from ..compat import ( - compat_os_name, - compat_urllib_error, - compat_struct_pack, -) +from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7 +from ..compat import compat_os_name from ..utils import ( DownloadError, - error_to_compat_str, encodeFilename, + error_to_compat_str, sanitized_Request, + traverse_obj, ) @@ -32,9 +25,7 @@ class HttpQuietDownloader(HttpFD): def to_screen(self, *args, **kargs): pass - def report_retry(self, err, count, retries): - super().to_screen( - f'[download] Got server HTTP error: {err}. Retrying (attempt {count} of {self.format_retries(retries)}) ...') + to_console_title = to_screen class FragmentFD(FileDownloader): @@ -77,6 +68,7 @@ def report_retry_fragment(self, err, frag_index, count, retries): self.to_screen( '\r[download] Got server HTTP error: %s. Retrying fragment %d (attempt %d of %s) ...' % (error_to_compat_str(err), frag_index, count, self.format_retries(retries))) + self.sleep_retry('fragment', count) def report_skip_fragment(self, frag_index, err=None): err = f' {err};' if err else '' @@ -130,16 +122,23 @@ def _download_fragment(self, ctx, frag_url, info_dict, headers=None, request_dat 'request_data': request_data, 'ctx_id': ctx.get('ctx_id'), } - success = ctx['dl'].download(fragment_filename, fragment_info_dict) + success, _ = ctx['dl'].download(fragment_filename, fragment_info_dict) if not success: - return False, None + return False if fragment_info_dict.get('filetime'): ctx['fragment_filetime'] = fragment_info_dict.get('filetime') ctx['fragment_filename_sanitized'] = fragment_filename - return True, self._read_fragment(ctx) + return True def _read_fragment(self, ctx): - down, frag_sanitized = self.sanitize_open(ctx['fragment_filename_sanitized'], 'rb') + if not ctx.get('fragment_filename_sanitized'): + return None + try: + down, frag_sanitized = self.sanitize_open(ctx['fragment_filename_sanitized'], 'rb') + except FileNotFoundError: + if ctx.get('live'): + return None + raise ctx['fragment_filename_sanitized'] = frag_sanitized frag_content = down.read() down.close() @@ -153,7 +152,7 @@ def _append_fragment(self, ctx, frag_content): if self.__do_ytdl_file(ctx): self._write_ytdl_file(ctx) if not self.params.get('keep_fragments', False): - os.remove(encodeFilename(ctx['fragment_filename_sanitized'])) + self.try_remove(encodeFilename(ctx['fragment_filename_sanitized'])) del ctx['fragment_filename_sanitized'] def _prepare_frag_download(self, ctx): @@ -166,21 +165,13 @@ def _prepare_frag_download(self, ctx): total_frags_str += ' (not including %d ad)' % ad_frags else: total_frags_str = 'unknown (live)' - self.to_screen( - '[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str)) + self.to_screen(f'[{self.FD_NAME}] Total fragments: {total_frags_str}') self.report_destination(ctx['filename']) - dl = HttpQuietDownloader( - self.ydl, - { - 'continuedl': True, - 'quiet': self.params.get('quiet'), - 'noprogress': True, - 'ratelimit': self.params.get('ratelimit'), - 'retries': self.params.get('retries', 0), - 'nopart': self.params.get('nopart', False), - 'test': self.params.get('test', False), - } - ) + dl = HttpQuietDownloader(self.ydl, { + **self.params, + 'noprogress': True, + 'test': False, + }) tmpfilename = self.temp_name(ctx['filename']) open_mode = 'wb' resume_len = 0 @@ -253,6 +244,9 @@ def frag_progress_hook(s): if s['status'] not in ('downloading', 'finished'): return + if not total_frags and ctx.get('fragment_count'): + state['fragment_count'] = ctx['fragment_count'] + if ctx_id is not None and s.get('ctx_id') != ctx_id: return @@ -299,7 +293,7 @@ def _finish_frag_download(self, ctx, info_dict): if self.__do_ytdl_file(ctx): ytdl_filename = encodeFilename(self.ytdl_filename(ctx['filename'])) if os.path.isfile(ytdl_filename): - os.remove(ytdl_filename) + self.try_remove(ytdl_filename) elapsed = time.time() - ctx['started'] if ctx['tmpfilename'] == '-': @@ -309,10 +303,8 @@ def _finish_frag_download(self, ctx, info_dict): if self.params.get('updatetime', True): filetime = ctx.get('fragment_filetime') if filetime: - try: + with contextlib.suppress(Exception): os.utime(ctx['filename'], (time.time(), filetime)) - except Exception: - pass downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename'])) self._hook_progress({ @@ -336,8 +328,7 @@ def _prepare_external_frag_download(self, ctx): total_frags_str += ' (not including %d ad)' % ad_frags else: total_frags_str = 'unknown (live)' - self.to_screen( - '[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str)) + self.to_screen(f'[{self.FD_NAME}] Total fragments: {total_frags_str}') tmpfilename = self.temp_name(ctx['filename']) @@ -359,15 +350,14 @@ def decrypt_fragment(fragment, frag_content): decrypt_info = fragment.get('decrypt_info') if not decrypt_info or decrypt_info['METHOD'] != 'AES-128': return frag_content - iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', fragment['media_sequence']) + iv = decrypt_info.get('IV') or struct.pack('>8xq', fragment['media_sequence']) decrypt_info['KEY'] = decrypt_info.get('KEY') or _get_key(info_dict.get('_decryption_key_url') or decrypt_info['URI']) # Don't decrypt the content in tests since the data is explicitly truncated and it's not to a valid block # size (see https://github.com/ytdl-org/youtube-dl/pull/27660). Tests only care that the correct data downloaded, # not what it decrypts to. if self.params.get('test', False): return frag_content - decrypted_data = aes_cbc_decrypt_bytes(frag_content, decrypt_info['KEY'], iv) - return decrypted_data[:-decrypted_data[-1]] + return unpad_pkcs7(aes_cbc_decrypt_bytes(frag_content, decrypt_info['KEY'], iv)) return decrypt_fragment @@ -383,6 +373,7 @@ def download_and_append_fragments_multiple(self, *args, pack_func=None, finish_f max_workers = self.params.get('concurrent_fragment_downloads', 1) if max_progress > 1: self._prepare_multiline_status(max_progress) + is_live = any(traverse_obj(args, (..., 2, 'is_live'), default=[])) def thread_func(idx, ctx, fragments, info_dict, tpe): ctx['max_progress'] = max_progress @@ -396,25 +387,43 @@ class FTPE(concurrent.futures.ThreadPoolExecutor): def __exit__(self, exc_type, exc_val, exc_tb): pass - spins = [] if compat_os_name == 'nt': - self.report_warning('Ctrl+C does not work on Windows when used with parallel threads. ' - 'This is a known issue and patches are welcome') + def future_result(future): + while True: + try: + return future.result(0.1) + except KeyboardInterrupt: + raise + except concurrent.futures.TimeoutError: + continue + else: + def future_result(future): + return future.result() + + def interrupt_trigger_iter(fg): + for f in fg: + if not interrupt_trigger[0]: + break + yield f + + spins = [] for idx, (ctx, fragments, info_dict) in enumerate(args): tpe = FTPE(math.ceil(max_workers / max_progress)) - job = tpe.submit(thread_func, idx, ctx, fragments, info_dict, tpe) + job = tpe.submit(thread_func, idx, ctx, interrupt_trigger_iter(fragments), info_dict, tpe) spins.append((tpe, job)) result = True for tpe, job in spins: try: - result = result and job.result() + result = result and future_result(job) except KeyboardInterrupt: interrupt_trigger[0] = False finally: tpe.shutdown(wait=True) - if not interrupt_trigger[0]: + if not interrupt_trigger[0] and not is_live: raise KeyboardInterrupt() + # we expect the user wants to stop and DO WANT the preceding postprocessors to run; + # so returning a intermediate result here instead of KeyboardInterrupt on live return result def download_and_append_fragments( @@ -432,25 +441,25 @@ def download_and_append_fragments( pack_func = lambda frag_content, _: frag_content def download_fragment(fragment, ctx): + if not interrupt_trigger[0]: + return + frag_index = ctx['fragment_index'] = fragment['frag_index'] ctx['last_error'] = None - if not interrupt_trigger[0]: - return False, frag_index headers = info_dict.get('http_headers', {}).copy() byte_range = fragment.get('byte_range') if byte_range: headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1) # Never skip the first fragment - fatal = is_fatal(fragment.get('index') or (frag_index - 1)) - count, frag_content = 0, None + fatal, count = is_fatal(fragment.get('index') or (frag_index - 1)), 0 while count <= fragment_retries: try: - success, frag_content = self._download_fragment(ctx, fragment['url'], info_dict, headers) - if not success: - return False, frag_index - break - except (compat_urllib_error.HTTPError, http.client.IncompleteRead) as err: + ctx['fragment_count'] = fragment.get('fragment_count') + if self._download_fragment(ctx, fragment['url'], info_dict, headers): + break + return + except (urllib.error.HTTPError, http.client.IncompleteRead) as err: # Unavailable (possibly temporary) fragments may be served. # First we try to retry then either skip or abort. # See https://github.com/ytdl-org/youtube-dl/issues/10165, @@ -466,54 +475,59 @@ def download_fragment(fragment, ctx): break raise - if count > fragment_retries: - if not fatal: - return False, frag_index + if count > fragment_retries and fatal: ctx['dest_stream'].close() self.report_error('Giving up after %s fragment retries' % fragment_retries) - return False, frag_index - return frag_content, frag_index def append_fragment(frag_content, frag_index, ctx): - if not frag_content: - if not is_fatal(frag_index - 1): - self.report_skip_fragment(frag_index, 'fragment not found') - return True - else: - ctx['dest_stream'].close() - self.report_error( - 'fragment %s not found, unable to continue' % frag_index) - return False - self._append_fragment(ctx, pack_func(frag_content, frag_index)) + if frag_content: + self._append_fragment(ctx, pack_func(frag_content, frag_index)) + elif not is_fatal(frag_index - 1): + self.report_skip_fragment(frag_index, 'fragment not found') + else: + ctx['dest_stream'].close() + self.report_error(f'fragment {frag_index} not found, unable to continue') + return False return True decrypt_fragment = self.decrypter(info_dict) max_workers = math.ceil( self.params.get('concurrent_fragment_downloads', 1) / ctx.get('max_progress', 1)) - if can_threaded_download and max_workers > 1: - + if max_workers > 1: def _download_fragment(fragment): ctx_copy = ctx.copy() - frag_content, frag_index = download_fragment(fragment, ctx_copy) - return fragment, frag_content, frag_index, ctx_copy.get('fragment_filename_sanitized') + download_fragment(fragment, ctx_copy) + return fragment, fragment['frag_index'], ctx_copy.get('fragment_filename_sanitized') self.report_warning('The download speed shown is only of one thread. This is a known issue and patches are welcome') with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool: - for fragment, frag_content, frag_index, frag_filename in pool.map(_download_fragment, fragments): - if not interrupt_trigger[0]: - break - ctx['fragment_filename_sanitized'] = frag_filename - ctx['fragment_index'] = frag_index - result = append_fragment(decrypt_fragment(fragment, frag_content), frag_index, ctx) - if not result: - return False + try: + for fragment, frag_index, frag_filename in pool.map(_download_fragment, fragments): + ctx.update({ + 'fragment_filename_sanitized': frag_filename, + 'fragment_index': frag_index, + }) + if not append_fragment(decrypt_fragment(fragment, self._read_fragment(ctx)), frag_index, ctx): + return False + except KeyboardInterrupt: + self._finish_multiline_status() + self.report_error( + 'Interrupted by user. Waiting for all threads to shutdown...', is_error=False, tb=False) + pool.shutdown(wait=False) + raise else: for fragment in fragments: if not interrupt_trigger[0]: break - frag_content, frag_index = download_fragment(fragment, ctx) - result = append_fragment(decrypt_fragment(fragment, frag_content), frag_index, ctx) + try: + download_fragment(fragment, ctx) + result = append_fragment( + decrypt_fragment(fragment, self._read_fragment(ctx)), fragment['frag_index'], ctx) + except KeyboardInterrupt: + if info_dict.get('is_live'): + break + raise if not result: return False