X-Git-Url: https://jfr.im/git/yt-dlp.git/blobdiff_plain/abbeeebc4c20bc5c690d96c83e91e89effd04b81..dca4f462742d64ab82e943c7560d0d9cbc5a9305:/yt_dlp/YoutubeDL.py diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 9cec43680..fd1584a7f 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -72,6 +72,7 @@ GeoRestrictedError, get_domain, HEADRequest, + InAdvancePagedList, int_or_none, iri_to_uri, ISO3166Utils, @@ -91,6 +92,7 @@ PerRequestProxyHandler, platform_name, Popen, + POSTPROCESS_WHEN, PostProcessingError, preferredencoding, prepend_extension, @@ -199,7 +201,12 @@ class YoutubeDL(object): verbose: Print additional info to stdout. quiet: Do not print messages to stdout. no_warnings: Do not print out anything for warnings. - forceprint: A list of templates to force print + forceprint: A dict with keys WHEN mapped to a list of templates to + print to stdout. The allowed keys are video or any of the + items in utils.POSTPROCESS_WHEN. + For compatibility, a single list is also accepted + print_to_file: A dict with keys WHEN (same as forceprint) mapped to + a list of tuples with (template, filename) forceurl: Force printing final URL. (Deprecated) forcetitle: Force printing title. (Deprecated) forceid: Force printing ID. (Deprecated) @@ -320,6 +327,8 @@ class YoutubeDL(object): cookiesfrombrowser: A tuple containing the name of the browser, the profile name/pathfrom where cookies are loaded, and the name of the keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT') + legacyserverconnect: Explicitly allow HTTPS connection to servers that do not + support RFC 5746 secure renegotiation nocheckcertificate: Do not verify SSL certificates prefer_insecure: Use HTTP instead of HTTPS to retrieve information. At the moment, this is only supported by YouTube. @@ -343,8 +352,8 @@ class YoutubeDL(object): postprocessors: A list of dictionaries, each with an entry * key: The name of the postprocessor. See yt_dlp/postprocessor/__init__.py for a list. - * when: When to run the postprocessor. Can be one of - pre_process|before_dl|post_process|after_move. + * when: When to run the postprocessor. Allowed values are + the entries of utils.POSTPROCESS_WHEN Assumed to be 'post_process' if not given post_hooks: Deprecated - Register a custom postprocessor instead A list of functions that get called as the final step @@ -475,6 +484,7 @@ class YoutubeDL(object): extractor_args: A dictionary of arguments to be passed to the extractors. See "EXTRACTOR ARGUMENTS" for details. Eg: {'youtube': {'skip': ['dash', 'hls']}} + mark_watched: Mark videos watched (even with --simulate). Only for YouTube youtube_include_dash_manifest: Deprecated - Use extractor_args instead. If True (default), DASH manifests and related data will be downloaded and processed by extractor. @@ -505,7 +515,7 @@ class YoutubeDL(object): params = None _ies = {} - _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []} + _pps = {k: [] for k in POSTPROCESS_WHEN} _printed_messages = set() _first_webpage_request = True _download_retcode = None @@ -523,7 +533,7 @@ def __init__(self, params=None, auto_init=True): params = {} self._ies = {} self._ies_instances = {} - self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []} + self._pps = {k: [] for k in POSTPROCESS_WHEN} self._printed_messages = set() self._first_webpage_request = True self._post_hooks = [] @@ -531,6 +541,7 @@ def __init__(self, params=None, auto_init=True): self._postprocessor_hooks = [] self._download_retcode = 0 self._num_downloads = 0 + self._num_videos = 0 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)] self._err_file = sys.stderr self.params = params @@ -585,7 +596,14 @@ def check_deprecated(param, option, suggestion): else: self.params['nooverwrites'] = not self.params['overwrites'] - if params.get('bidi_workaround', False): + self.params.setdefault('forceprint', {}) + self.params.setdefault('print_to_file', {}) + + # Compatibility with older syntax + if not isinstance(params['forceprint'], dict): + self.params['forceprint'] = {'video': params['forceprint']} + + if self.params.get('bidi_workaround', False): try: import pty master, slave = pty.openpty() @@ -613,7 +631,7 @@ def check_deprecated(param, option, suggestion): if (sys.platform != 'win32' and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] - and not params.get('restrictfilenames', False)): + and not self.params.get('restrictfilenames', False)): # Unicode filesystem API will throw errors (#1474, #13027) self.report_warning( 'Assuming --restrict-filenames since file system encoding ' @@ -1036,6 +1054,7 @@ def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False): if info_dict.get('duration', None) is not None else None) info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads + info_dict['video_autonumber'] = self._num_videos if info_dict.get('resolution') is None: info_dict['resolution'] = self.format_resolution(info_dict, default=None) @@ -1151,7 +1170,7 @@ def create_key(outer_mobj): str_fmt = f'{fmt[:-1]}s' if fmt[-1] == 'l': # list delim = '\n' if '#' in flags else ', ' - value, fmt = delim.join(variadic(value, allowed_types=(str, bytes))), str_fmt + value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt elif fmt[-1] == 'j': # json value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt elif fmt[-1] == 'q': # quoted @@ -1203,10 +1222,17 @@ def _prepare_filename(self, info_dict, tmpl_type='default'): try: outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])) filename = self.evaluate_outtmpl(outtmpl, info_dict, True) + if not filename: + return None - force_ext = OUTTMPL_TYPES.get(tmpl_type) - if filename and force_ext is not None: - filename = replace_extension(filename, force_ext, info_dict.get('ext')) + if tmpl_type in ('default', 'temp'): + final_ext, ext = self.params.get('final_ext'), info_dict.get('ext') + if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'): + filename = replace_extension(filename, ext, final_ext) + else: + force_ext = OUTTMPL_TYPES[tmpl_type] + if force_ext: + filename = replace_extension(filename, force_ext, info_dict.get('ext')) # https://github.com/blackjack4494/youtube-dlc/issues/85 trim_file_name = self.params.get('trim_file_name', False) @@ -1586,6 +1612,19 @@ def _fixup(r): def _ensure_dir_exists(self, path): return make_dir(path, self.report_error) + @staticmethod + def _playlist_infodict(ie_result, **kwargs): + return { + **ie_result, + 'playlist': ie_result.get('title') or ie_result.get('id'), + 'playlist_id': ie_result.get('id'), + 'playlist_title': ie_result.get('title'), + 'playlist_uploader': ie_result.get('uploader'), + 'playlist_uploader_id': ie_result.get('uploader_id'), + 'playlist_index': 0, + **kwargs, + } + def __process_playlist(self, ie_result, download): # We process each entry in the playlist playlist = ie_result.get('title') or ie_result.get('id') @@ -1626,23 +1665,27 @@ def iter_playlistitems(format): playlistitems = orderedSet(iter_playlistitems(playlistitems_str)) ie_entries = ie_result['entries'] - msg = ( - 'Downloading %d videos' if not isinstance(ie_entries, list) - else 'Collected %d videos; downloading %%d of them' % len(ie_entries)) - if isinstance(ie_entries, list): + playlist_count = len(ie_entries) + msg = f'Collected {playlist_count} videos; downloading %d of them' + ie_result['playlist_count'] = ie_result.get('playlist_count') or playlist_count + def get_entry(i): return ie_entries[i - 1] else: + msg = 'Downloading %d videos' if not isinstance(ie_entries, (PagedList, LazyList)): ie_entries = LazyList(ie_entries) + elif isinstance(ie_entries, InAdvancePagedList): + if ie_entries._pagesize == 1: + playlist_count = ie_entries._pagecount def get_entry(i): return YoutubeDL.__handle_extraction_exceptions( lambda self, i: ie_entries[i - 1] )(self, i) - entries = [] + entries, broken = [], False items = playlistitems if playlistitems is not None else itertools.count(playliststart) for i in items: if i == 0: @@ -1664,6 +1707,7 @@ def get_entry(i): if entry is not None: self._match_entry(entry, incomplete=True, silent=True) except (ExistingVideoReached, RejectedVideoReached): + broken = True break ie_result['entries'] = entries @@ -1674,23 +1718,19 @@ def get_entry(i): if entry is not None] n_entries = len(entries) + if not (ie_result.get('playlist_count') or broken or playlistitems or playlistend): + ie_result['playlist_count'] = n_entries + if not playlistitems and (playliststart != 1 or playlistend): playlistitems = list(range(playliststart, playliststart + n_entries)) ie_result['requested_entries'] = playlistitems _infojson_written = False - if not self.params.get('simulate') and self.params.get('allow_playlist_files', True): - ie_copy = { - 'playlist': playlist, - 'playlist_id': ie_result.get('id'), - 'playlist_title': ie_result.get('title'), - 'playlist_uploader': ie_result.get('uploader'), - 'playlist_uploader_id': ie_result.get('uploader_id'), - 'playlist_index': 0, - 'n_entries': n_entries, - } - ie_copy.update(dict(ie_result)) - + write_playlist_files = self.params.get('allow_playlist_files', True) + if write_playlist_files and self.params.get('list_thumbnails'): + self.list_thumbnails(ie_result) + if write_playlist_files and not self.params.get('simulate'): + ie_copy = self._playlist_infodict(ie_result, n_entries=n_entries) _infojson_written = self._write_info_json( 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson')) if _infojson_written is None: @@ -1723,6 +1763,7 @@ def get_entry(i): extra = { 'n_entries': n_entries, '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries), + 'playlist_count': ie_result.get('playlist_count'), 'playlist_index': playlist_index, 'playlist_autonumber': i, 'playlist': playlist, @@ -1755,7 +1796,9 @@ def get_entry(i): 'updated playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None: return - self.to_screen('[download] Finished downloading playlist: %s' % playlist) + + ie_result = self.run_all_pps('playlist', ie_result) + self.to_screen(f'[download] Finished downloading playlist: {playlist}') return ie_result @__handle_extraction_exceptions @@ -2197,10 +2240,7 @@ def restore_last_token(self): def _calc_headers(self, info_dict): res = std_headers.copy() - - add_headers = info_dict.get('http_headers') - if add_headers: - res.update(add_headers) + res.update(info_dict.get('http_headers') or {}) cookies = self._calc_cookies(info_dict) if cookies: @@ -2260,12 +2300,20 @@ def check_thumbnails(thumbnails): def process_video_result(self, info_dict, download=True): assert info_dict.get('_type', 'video') == 'video' + self._num_videos += 1 if 'id' not in info_dict: - raise ExtractorError('Missing "id" field in extractor result') + raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor']) + elif not info_dict.get('id'): + raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor']) + + info_dict['fulltitle'] = info_dict.get('title') if 'title' not in info_dict: raise ExtractorError('Missing "title" field in extractor result', video_id=info_dict['id'], ie=info_dict['extractor']) + elif not info_dict.get('title'): + self.report_warning('Extractor failed to obtain "title". Creating a generic title instead') + info_dict['title'] = f'{info_dict["extractor"]} video #{info_dict["id"]}' def report_force_conversion(field, field_not, conversion): self.report_warning( @@ -2313,6 +2361,7 @@ def sanitize_numeric_fields(info): for ts_key, date_key in ( ('timestamp', 'upload_date'), ('release_timestamp', 'release_date'), + ('modified_timestamp', 'modified_date'), ): if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None: # Working around out-of-range timestamp values (e.g. negative ones on Windows, @@ -2375,6 +2424,8 @@ def sanitize_numeric_fields(info): if info_dict.get('is_live'): get_from_start = bool(self.params.get('live_from_start')) formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start] + if not get_from_start: + info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M') if not formats: self.raise_no_formats(info_dict) @@ -2536,24 +2587,46 @@ def is_wellformed(f): if not self.params.get('ignore_no_formats_error'): raise ExtractorError('Requested format is not available', expected=True, video_id=info_dict['id'], ie=info_dict['extractor']) - else: - self.report_warning('Requested format is not available') - # Process what we can, even without any available formats. - self.process_info(dict(info_dict)) - elif download: - self.to_screen( - '[info] %s: Downloading %d format(s): %s' % ( - info_dict['id'], len(formats_to_download), - ", ".join([f['format_id'] for f in formats_to_download]))) - for fmt in formats_to_download: - new_info = dict(info_dict) + self.report_warning('Requested format is not available') + # Process what we can, even without any available formats. + formats_to_download = [{}] + + best_format = formats_to_download[-1] + if download: + if best_format: + self.to_screen( + f'[info] {info_dict["id"]}: Downloading {len(formats_to_download)} format(s): ' + + ', '.join([f['format_id'] for f in formats_to_download])) + max_downloads_reached = False + for i, fmt in enumerate(formats_to_download): + formats_to_download[i] = new_info = dict(info_dict) # Save a reference to the original info_dict so that it can be modified in process_info if needed - new_info['__original_infodict'] = info_dict new_info.update(fmt) - self.process_info(new_info) + new_info['__original_infodict'] = info_dict + try: + self.process_info(new_info) + except MaxDownloadsReached: + max_downloads_reached = True + new_info.pop('__original_infodict') + # Remove copied info + for key, val in tuple(new_info.items()): + if info_dict.get(key) == val: + new_info.pop(key) + if max_downloads_reached: + break + + write_archive = set(f.get('__write_download_archive', False) for f in formats_to_download) + assert write_archive.issubset({True, False, 'ignore'}) + if True in write_archive and False not in write_archive: + self.record_download_archive(info_dict) + + info_dict['requested_downloads'] = formats_to_download + info_dict = self.run_all_pps('after_video', info_dict) + if max_downloads_reached: + raise MaxDownloadsReached() + # We update the info dict with the selected best quality format (backwards compatibility) - if formats_to_download: - info_dict.update(formats_to_download[-1]) + info_dict.update(best_format) return info_dict def process_subtitles(self, video_id, normal_subtitles, automatic_captions): @@ -2624,6 +2697,33 @@ def process_subtitles(self, video_id, normal_subtitles, automatic_captions): subs[lang] = f return subs + def _forceprint(self, key, info_dict): + if info_dict is None: + return + info_copy = info_dict.copy() + info_copy['formats_table'] = self.render_formats_table(info_dict) + info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict) + info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles')) + info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions')) + + def format_tmpl(tmpl): + mobj = re.match(r'\w+(=?)$', tmpl) + if mobj and mobj.group(1): + return f'{tmpl[:-1]} = %({tmpl[:-1]})r' + elif mobj: + return f'%({tmpl})s' + return tmpl + + for tmpl in self.params['forceprint'].get(key, []): + self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy)) + + for tmpl, file_tmpl in self.params['print_to_file'].get(key, []): + filename = self.evaluate_outtmpl(file_tmpl, info_dict) + tmpl = format_tmpl(tmpl) + self.to_screen(f'[info] Writing {tmpl!r} to: {filename}') + with io.open(filename, 'a', encoding='utf-8') as f: + f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n') + def __forced_printings(self, info_dict, filename, incomplete): def print_mandatory(field, actual_field=None): if actual_field is None: @@ -2646,15 +2746,11 @@ def print_optional(field): elif 'url' in info_dict: info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '') - if self.params.get('forceprint') or self.params.get('forcejson'): + if (self.params.get('forcejson') + or self.params['forceprint'].get('video') + or self.params['print_to_file'].get('video')): self.post_extract(info_dict) - for tmpl in self.params.get('forceprint', []): - mobj = re.match(r'\w+(=?)$', tmpl) - if mobj and mobj.group(1): - tmpl = f'{tmpl[:-1]} = %({tmpl[:-1]})s' - elif mobj: - tmpl = '%({})s'.format(tmpl) - self.to_stdout(self.evaluate_outtmpl(tmpl, info_dict)) + self._forceprint('video', info_dict) print_mandatory('title') print_mandatory('id') @@ -2692,7 +2788,9 @@ def dl(self, name, info, subtitle=False, test=False): if not test: for ph in self._progress_hooks: fd.add_progress_hook(ph) - urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']]) + urls = '", "'.join( + (f['url'].split(',')[0] + ',' if f['url'].startswith('data:') else f['url']) + for f in info.get('requested_formats', []) or [info]) self.write_debug('Invoking downloader on "%s"' % urls) # Note: Ideally info should be a deep-copied so that hooks cannot modify it. @@ -2702,26 +2800,27 @@ def dl(self, name, info, subtitle=False, test=False): new_info['http_headers'] = self._calc_headers(new_info) return fd.download(name, new_info, subtitle) - def process_info(self, info_dict): - """Process a single resolved IE result.""" - - assert info_dict.get('_type', 'video') == 'video' + def existing_file(self, filepaths, *, default_overwrite=True): + existing_files = list(filter(os.path.exists, orderedSet(filepaths))) + if existing_files and not self.params.get('overwrites', default_overwrite): + return existing_files[0] - max_downloads = self.params.get('max_downloads') - if max_downloads is not None: - if self._num_downloads >= int(max_downloads): - raise MaxDownloadsReached() + for file in existing_files: + self.report_file_delete(file) + os.remove(file) + return None - if info_dict.get('is_live') and not self.params.get('live_from_start'): - info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M') + def process_info(self, info_dict): + """Process a single resolved IE result. (Modified it in-place)""" - # TODO: backward compatibility, to be removed - info_dict['fulltitle'] = info_dict['title'] + assert info_dict.get('_type', 'video') == 'video' + original_infodict = info_dict if 'format' not in info_dict and 'ext' in info_dict: info_dict['format'] = info_dict['ext'] if self._match_entry(info_dict) is not None: + info_dict['__write_download_archive'] = 'ignore' return self.post_extract(info_dict) @@ -2736,9 +2835,7 @@ def process_info(self, info_dict): self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict)) if self.params.get('simulate'): - if self.params.get('force_write_download_archive', False): - self.record_download_archive(info_dict) - # Do nothing else if in simulate mode + info_dict['__write_download_archive'] = self.params.get('force_write_download_archive') return if full_filename is None: @@ -2833,43 +2930,39 @@ def _write_link_file(link_type): for link_type, should_write in write_links.items()): return + def replace_info_dict(new_info): + nonlocal info_dict + if new_info == info_dict: + return + info_dict.clear() + info_dict.update(new_info) + try: - info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move) + new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move) + replace_info_dict(new_info) except PostProcessingError as err: self.report_error('Preprocessing: %s' % str(err)) return - must_record_download_archive = False - if self.params.get('skip_download', False): + if self.params.get('skip_download'): info_dict['filepath'] = temp_filename info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename))) info_dict['__files_to_move'] = files_to_move - info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict) + replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)) + info_dict['__write_download_archive'] = self.params.get('force_write_download_archive') else: # Download info_dict.setdefault('__postprocessors', []) try: - def existing_file(*filepaths): + def existing_video_file(*filepaths): ext = info_dict.get('ext') - final_ext = self.params.get('final_ext', ext) - existing_files = [] - for file in orderedSet(filepaths): - if final_ext != ext: - converted = replace_extension(file, final_ext, ext) - if os.path.exists(encodeFilename(converted)): - existing_files.append(converted) - if os.path.exists(encodeFilename(file)): - existing_files.append(file) - - if not existing_files or self.params.get('overwrites', False): - for file in orderedSet(existing_files): - self.report_file_delete(file) - os.remove(encodeFilename(file)) - return None - - info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:] - return existing_files[0] + converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext) + file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)), + default_overwrite=False) + if file: + info_dict['ext'] = os.path.splitext(file)[1][1:] + return file success = True if info_dict.get('requested_formats') is not None: @@ -2923,7 +3016,7 @@ def correct_ext(filename, ext=new_ext): # Ensure filename always has a correct extension for successful merge full_filename = correct_ext(full_filename) temp_filename = correct_ext(temp_filename) - dl_filename = existing_file(full_filename, temp_filename) + dl_filename = existing_video_file(full_filename, temp_filename) info_dict['__real_download'] = False downloaded = [] @@ -2986,7 +3079,7 @@ def correct_ext(filename, ext=new_ext): files_to_move[file] = None else: # Just a single file - dl_filename = existing_file(full_filename, temp_filename) + dl_filename = existing_video_file(full_filename, temp_filename) if dl_filename is None or dl_filename == temp_filename: # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part. # So we should try to resume the download @@ -3063,7 +3156,7 @@ def ffmpeg_fixup(cndn, msg, cls): fixup() try: - info_dict = self.post_process(dl_filename, info_dict, files_to_move) + replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move)) except PostProcessingError as err: self.report_error('Postprocessing: %s' % str(err)) return @@ -3073,10 +3166,14 @@ def ffmpeg_fixup(cndn, msg, cls): except Exception as err: self.report_error('post hooks: %s' % str(err)) return - must_record_download_archive = True + info_dict['__write_download_archive'] = True + + if self.params.get('force_write_download_archive'): + info_dict['__write_download_archive'] = True + + # Make sure the info_dict was modified in-place + assert info_dict is original_infodict - if must_record_download_archive or self.params.get('force_write_download_archive', False): - self.record_download_archive(info_dict) max_downloads = self.params.get('max_downloads') if max_downloads is not None and self._num_downloads >= int(max_downloads): raise MaxDownloadsReached() @@ -3142,12 +3239,13 @@ def sanitize_info(info_dict, remove_private_keys=False): if info_dict is None: return info_dict info_dict.setdefault('epoch', int(time.time())) + info_dict.setdefault('_type', 'video') remove_keys = {'__original_infodict'} # Always remove this since this may contain a copy of the entire dict keep_keys = ['_type'] # Always keep this to facilitate load-info-json if remove_private_keys: remove_keys |= { - 'requested_formats', 'requested_subtitles', 'requested_entries', 'entries', - 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber', + 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries', + 'entries', 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber', } reject = lambda k, v: k not in keep_keys and ( k.startswith('_') or k in remove_keys or v is None) @@ -3171,6 +3269,25 @@ def filter_requested_info(info_dict, actually_filter=True): ''' Alias of sanitize_info for backward compatibility ''' return YoutubeDL.sanitize_info(info_dict, actually_filter) + @staticmethod + def post_extract(info_dict): + def actual_post_extract(info_dict): + if info_dict.get('_type') in ('playlist', 'multi_video'): + for video_dict in info_dict.get('entries', {}): + actual_post_extract(video_dict or {}) + return + + post_extractor = info_dict.get('__post_extractor') or (lambda: {}) + extra = post_extractor().items() + info_dict.update(extra) + info_dict.pop('__post_extractor', None) + + original_infodict = info_dict.get('__original_infodict') or {} + original_infodict.update(extra) + original_infodict.pop('__post_extractor', None) + + actual_post_extract(info_dict or {}) + def run_pp(self, pp, infodict): files_to_delete = [] if '__files_to_move' not in infodict: @@ -3200,45 +3317,26 @@ def run_pp(self, pp, infodict): del infodict['__files_to_move'][old_filename] return infodict - @staticmethod - def post_extract(info_dict): - def actual_post_extract(info_dict): - if info_dict.get('_type') in ('playlist', 'multi_video'): - for video_dict in info_dict.get('entries', {}): - actual_post_extract(video_dict or {}) - return - - post_extractor = info_dict.get('__post_extractor') or (lambda: {}) - extra = post_extractor().items() - info_dict.update(extra) - info_dict.pop('__post_extractor', None) - - original_infodict = info_dict.get('__original_infodict') or {} - original_infodict.update(extra) - original_infodict.pop('__post_extractor', None) - - actual_post_extract(info_dict or {}) + def run_all_pps(self, key, info, *, additional_pps=None): + self._forceprint(key, info) + for pp in (additional_pps or []) + self._pps[key]: + info = self.run_pp(pp, info) + return info def pre_process(self, ie_info, key='pre_process', files_to_move=None): info = dict(ie_info) info['__files_to_move'] = files_to_move or {} - for pp in self._pps[key]: - info = self.run_pp(pp, info) + info = self.run_all_pps(key, info) return info, info.pop('__files_to_move', None) - def post_process(self, filename, ie_info, files_to_move=None): + def post_process(self, filename, info, files_to_move=None): """Run all the postprocessors on the given file.""" - info = dict(ie_info) info['filepath'] = filename info['__files_to_move'] = files_to_move or {} - - for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']: - info = self.run_pp(pp, info) + info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors')) info = self.run_pp(MoveFilesAfterDownloadPP(self), info) del info['__files_to_move'] - for pp in self._pps['after_move']: - info = self.run_pp(pp, info) - return info + return self.run_all_pps('after_move', info) def _make_archive_id(self, info_dict): video_id = info_dict.get('id') @@ -3277,6 +3375,7 @@ def record_download_archive(self, info_dict): return vid_id = self._make_archive_id(info_dict) assert vid_id + self.write_debug(f'Adding to archive: {vid_id}') with locked_file(fn, 'a', encoding='utf-8') as archive_file: archive_file.write(vid_id + '\n') self.archive.add(vid_id) @@ -3295,6 +3394,11 @@ def format_resolution(format, default='unknown'): return '%dx?' % format['width'] return default + def _list_format_headers(self, *headers): + if self.params.get('listformats_table', True) is not False: + return [self._format_screen(header, self.Styles.HEADERS) for header in headers] + return headers + def _format_note(self, fdict): res = '' if fdict.get('ext') in ['f4f', 'f4m']: @@ -3355,102 +3459,97 @@ def _format_note(self, fdict): res += '~' + format_bytes(fdict['filesize_approx']) return res - def _list_format_headers(self, *headers): - if self.params.get('listformats_table', True) is not False: - return [self._format_screen(header, self.Styles.HEADERS) for header in headers] - return headers - - def list_formats(self, info_dict): + def render_formats_table(self, info_dict): if not info_dict.get('formats') and not info_dict.get('url'): - self.to_screen('%s has no formats' % info_dict['id']) - return - self.to_screen('[info] Available formats for %s:' % info_dict['id']) + return None formats = info_dict.get('formats', [info_dict]) - new_format = self.params.get('listformats_table', True) is not False - if new_format: - delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True) - table = [ - [ - self._format_screen(format_field(f, 'format_id'), self.Styles.ID), - format_field(f, 'ext'), - format_field(f, func=self.format_resolution, ignore=('audio only', 'images')), - format_field(f, 'fps', '\t%d'), - format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''), - delim, - format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes), - format_field(f, 'tbr', '\t%dk'), - shorten_protocol_name(f.get('protocol', '')), - delim, - format_field(f, 'vcodec', default='unknown').replace( - 'none', - 'images' if f.get('acodec') == 'none' - else self._format_screen('audio only', self.Styles.SUPPRESS)), - format_field(f, 'vbr', '\t%dk'), - format_field(f, 'acodec', default='unknown').replace( - 'none', - '' if f.get('vcodec') == 'none' - else self._format_screen('video only', self.Styles.SUPPRESS)), - format_field(f, 'abr', '\t%dk'), - format_field(f, 'asr', '\t%dHz'), - join_nonempty( - self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None, - format_field(f, 'language', '[%s]'), - join_nonempty( - format_field(f, 'format_note'), - format_field(f, 'container', ignore=(None, f.get('ext'))), - delim=', '), - delim=' '), - ] for f in formats if f.get('preference') is None or f['preference'] >= -1000] - header_line = self._list_format_headers( - 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO', - delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO') - else: + if not self.params.get('listformats_table', True) is not False: table = [ [ format_field(f, 'format_id'), format_field(f, 'ext'), self.format_resolution(f), - self._format_note(f)] - for f in formats - if f.get('preference') is None or f['preference'] >= -1000] - header_line = ['format code', 'extension', 'resolution', 'note'] - - self.to_stdout(render_table( - header_line, table, - extra_gap=(0 if new_format else 1), - hide_empty=new_format, - delim=new_format and self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True))) - - def list_thumbnails(self, info_dict): - thumbnails = list(info_dict.get('thumbnails')) + self._format_note(f) + ] for f in formats if f.get('preference') is None or f['preference'] >= -1000] + return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1) + + delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True) + table = [ + [ + self._format_screen(format_field(f, 'format_id'), self.Styles.ID), + format_field(f, 'ext'), + format_field(f, func=self.format_resolution, ignore=('audio only', 'images')), + format_field(f, 'fps', '\t%d'), + format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''), + delim, + format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes), + format_field(f, 'tbr', '\t%dk'), + shorten_protocol_name(f.get('protocol', '')), + delim, + format_field(f, 'vcodec', default='unknown').replace( + 'none', 'images' if f.get('acodec') == 'none' + else self._format_screen('audio only', self.Styles.SUPPRESS)), + format_field(f, 'vbr', '\t%dk'), + format_field(f, 'acodec', default='unknown').replace( + 'none', '' if f.get('vcodec') == 'none' + else self._format_screen('video only', self.Styles.SUPPRESS)), + format_field(f, 'abr', '\t%dk'), + format_field(f, 'asr', '\t%dHz'), + join_nonempty( + self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None, + format_field(f, 'language', '[%s]'), + join_nonempty(format_field(f, 'format_note'), + format_field(f, 'container', ignore=(None, f.get('ext'))), + delim=', '), + delim=' '), + ] for f in formats if f.get('preference') is None or f['preference'] >= -1000] + header_line = self._list_format_headers( + 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO', + delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO') + + return render_table( + header_line, table, hide_empty=True, + delim=self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True)) + + def render_thumbnails_table(self, info_dict): + thumbnails = list(info_dict.get('thumbnails') or []) if not thumbnails: - self.to_screen('[info] No thumbnails present for %s' % info_dict['id']) - return - - self.to_screen( - '[info] Thumbnails for %s:' % info_dict['id']) - self.to_stdout(render_table( + return None + return render_table( self._list_format_headers('ID', 'Width', 'Height', 'URL'), - [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])) - - def list_subtitles(self, video_id, subtitles, name='subtitles'): - if not subtitles: - self.to_screen('%s has no %s' % (video_id, name)) - return - self.to_screen( - 'Available %s for %s:' % (name, video_id)) + [[t.get('id'), t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]) + def render_subtitles_table(self, video_id, subtitles): def _row(lang, formats): exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats))) if len(set(names)) == 1: names = [] if names[0] == 'unknown' else names[:1] return [lang, ', '.join(names), ', '.join(exts)] - self.to_stdout(render_table( + if not subtitles: + return None + return render_table( self._list_format_headers('Language', 'Name', 'Formats'), [_row(lang, formats) for lang, formats in subtitles.items()], - hide_empty=True)) + hide_empty=True) + + def __list_table(self, video_id, name, func, *args): + table = func(*args) + if not table: + self.to_screen(f'{video_id} has no {name}') + return + self.to_screen(f'[info] Available {name} for {video_id}:') + self.to_stdout(table) + + def list_formats(self, info_dict): + self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict) + + def list_thumbnails(self, info_dict): + self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict) + + def list_subtitles(self, video_id, subtitles, name='subtitles'): + self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles) def urlopen(self, req): """ Start an HTTP download """ @@ -3699,10 +3798,11 @@ def _write_subtitles(self, info_dict, filename): sub_format = sub_info['ext'] sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext')) sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext')) - if not self.params.get('overwrites', True) and os.path.exists(sub_filename): + existing_sub = self.existing_file((sub_filename_final, sub_filename)) + if existing_sub: self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present') - sub_info['filepath'] = sub_filename - ret.append((sub_filename, sub_filename_final)) + sub_info['filepath'] = existing_sub + ret.append((existing_sub, sub_filename_final)) continue self.to_screen(f'[info] Writing video subtitles to: {sub_filename}') @@ -3725,9 +3825,10 @@ def _write_subtitles(self, info_dict, filename): self.dl(sub_filename, sub_copy, subtitle=True) sub_info['filepath'] = sub_filename ret.append((sub_filename, sub_filename_final)) - except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err: + except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err: + if self.params.get('ignoreerrors') is not True: # False or 'only_download' + raise DownloadError(f'Unable to download video subtitles for {sub_lang!r}: {err}', err) self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}') - continue return ret def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None): @@ -3750,11 +3851,12 @@ def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext')) thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext')) - if not self.params.get('overwrites', True) and os.path.exists(thumb_filename): - ret.append((thumb_filename, thumb_filename_final)) - t['filepath'] = thumb_filename + existing_thumb = self.existing_file((thumb_filename_final, thumb_filename)) + if existing_thumb: self.to_screen('[info] %s is already present' % ( thumb_display_id if multiple else f'{label} thumbnail').capitalize()) + t['filepath'] = existing_thumb + ret.append((existing_thumb, thumb_filename_final)) else: self.to_screen(f'[info] Downloading {thumb_display_id} ...') try: