X-Git-Url: https://jfr.im/git/yt-dlp.git/blobdiff_plain/aebb4f4ba78ec7542416832e9dd5e47788cb12aa..c8bc203fbf3bb09914e53f0833eed622ab7edbb9:/yt_dlp/YoutubeDL.py diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index e1c24b892..8f52a71a9 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -21,7 +21,7 @@ import traceback import unicodedata import urllib.request -from string import ascii_letters +from string import Formatter, ascii_letters from .cache import Cache from .compat import compat_os_name, compat_shlex_quote @@ -32,7 +32,8 @@ from .extractor.common import UnsupportedURLIE from .extractor.openload import PhantomJSwrapper from .minicurses import format_text -from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors +from .plugins import directories as plugin_directories +from .postprocessor import _PLUGIN_CLASSES as plugin_pps from .postprocessor import ( EmbedThumbnailPP, FFmpegFixupDuplicateMoovPP, @@ -67,6 +68,7 @@ EntryNotInPlaylist, ExistingVideoReached, ExtractorError, + FormatSorter, GeoRestrictedError, HEADRequest, ISO3166Utils, @@ -148,7 +150,7 @@ write_json_file, write_string, ) -from .version import RELEASE_GIT_HEAD, VARIANT, __version__ +from .version import CHANNEL, RELEASE_GIT_HEAD, VARIANT, __version__ if compat_os_name == 'nt': import ctypes @@ -188,6 +190,7 @@ class YoutubeDL: ap_username: Multiple-system operator account username. ap_password: Multiple-system operator account password. usenetrc: Use netrc for authentication instead. + netrc_location: Location of the netrc file. Defaults to ~/.netrc. verbose: Print additional info to stdout. quiet: Do not print messages to stdout. no_warnings: Do not print out anything for warnings. @@ -298,8 +301,6 @@ class YoutubeDL: Videos already present in the file are not downloaded again. break_on_existing: Stop the download process after attempting to download a file that is in the archive. - break_on_reject: Stop the download process when encountering a video that - has been filtered out. break_per_url: Whether break_on_reject and break_on_existing should act on each input URL as opposed to for the entire queue cookiefile: File name or text stream from where cookies should be read and dumped to @@ -316,6 +317,7 @@ class YoutubeDL: If not provided and the key is encrypted, yt-dlp will ask interactively prefer_insecure: Use HTTP instead of HTTPS to retrieve information. (Only supported by some extractors) + enable_file_urls: Enable file:// URLs. This is disabled by default for security reasons. http_headers: A dictionary of custom headers to be used for all requests proxy: URL of the proxy server to use geo_verification_proxy: URL of the proxy to use for IP address verification @@ -411,6 +413,8 @@ class YoutubeDL: - If it returns None, the video is downloaded. - If it returns utils.NO_DEFAULT, the user is interactively asked whether to download the video. + - Raise utils.DownloadCancelled(msg) to abort remaining + downloads when a video is rejected. match_filter_func in utils.py is one example for this. no_color: Do not emit color codes in output. geo_bypass: Bypass geographic restriction via faking X-Forwarded-For @@ -480,6 +484,9 @@ class YoutubeDL: The following options are deprecated and may be removed in the future: + break_on_reject: Stop the download process when encountering a video that + has been filtered out. + - `raise DownloadCancelled(msg)` in match_filter instead force_generic_extractor: Force downloader to use the generic extractor - Use allowed_extractors = ['generic', 'default'] playliststart: - Use playlist_items @@ -547,11 +554,11 @@ class YoutubeDL: _format_fields = { # NB: Keep in sync with the docstring of extractor/common.py 'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note', - 'width', 'height', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels', - 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', + 'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels', + 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns', 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start', 'preference', 'language', 'language_preference', 'quality', 'source_preference', - 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options', + 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options', 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time' } _format_selection_exts = { @@ -583,7 +590,6 @@ def __init__(self, params=None, auto_init=True): self._playlist_urls = set() self.cache = Cache(self) - windows_enable_vt_mode() stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout self._out_files = Namespace( out=stdout, @@ -592,6 +598,12 @@ def __init__(self, params=None, auto_init=True): console=None if compat_os_name == 'nt' else next( filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None) ) + + try: + windows_enable_vt_mode() + except Exception as e: + self.write_debug(f'Failed to enable VT mode: {e}') + self._allow_colors = Namespace(**{ type_: not self.params.get('no_color') and supports_terminal_sequences(stream) for type_, stream in self._out_files.items_ if type_ != 'console' @@ -606,7 +618,7 @@ def __init__(self, params=None, auto_init=True): '\n You will no longer receive updates on this version') if current_version < MIN_SUPPORTED: msg = 'Python version %d.%d is no longer supported' - self.deprecation_warning( + self.deprecated_feature( f'{msg}! Please update to Python %d.%d or above' % (*current_version, *MIN_RECOMMENDED)) if self.params.get('allow_unplayable_formats'): @@ -616,6 +628,30 @@ def __init__(self, params=None, auto_init=True): ' If you experience any issues while using this option, ' f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report') + if self.params.get('bidi_workaround', False): + try: + import pty + master, slave = pty.openpty() + width = shutil.get_terminal_size().columns + width_args = [] if width is None else ['-w', str(width)] + sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error} + try: + self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs) + except OSError: + self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs) + self._output_channel = os.fdopen(master, 'rb') + except OSError as ose: + if ose.errno == errno.ENOENT: + self.report_warning( + 'Could not find fribidi executable, ignoring --bidi-workaround. ' + 'Make sure that fribidi is an executable file in one of the directories in your $PATH.') + else: + raise + + self.params['compat_opts'] = set(self.params.get('compat_opts', ())) + if auto_init and auto_init != 'no_verbose_header': + self.print_debug_header() + def check_deprecated(param, option, suggestion): if self.params.get(param) is not None: self.report_warning(f'{option} is deprecated. Use {suggestion} instead') @@ -635,7 +671,6 @@ def check_deprecated(param, option, suggestion): for msg in self.params.get('_deprecation_warnings', []): self.deprecated_feature(msg) - self.params['compat_opts'] = set(self.params.get('compat_opts', ())) if 'list-formats' in self.params['compat_opts']: self.params['listformats_table'] = False @@ -649,6 +684,13 @@ def check_deprecated(param, option, suggestion): else: self.params['nooverwrites'] = not self.params['overwrites'] + if self.params.get('simulate') is None and any(( + self.params.get('list_thumbnails'), + self.params.get('listformats'), + self.params.get('listsubtitles'), + )): + self.params['simulate'] = 'list_only' + self.params.setdefault('forceprint', {}) self.params.setdefault('print_to_file', {}) @@ -656,29 +698,7 @@ def check_deprecated(param, option, suggestion): if not isinstance(params['forceprint'], dict): self.params['forceprint'] = {'video': params['forceprint']} - if self.params.get('bidi_workaround', False): - try: - import pty - master, slave = pty.openpty() - width = shutil.get_terminal_size().columns - width_args = [] if width is None else ['-w', str(width)] - sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error} - try: - self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs) - except OSError: - self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs) - self._output_channel = os.fdopen(master, 'rb') - except OSError as ose: - if ose.errno == errno.ENOENT: - self.report_warning( - 'Could not find fribidi executable, ignoring --bidi-workaround. ' - 'Make sure that fribidi is an executable file in one of the directories in your $PATH.') - else: - raise - if auto_init: - if auto_init != 'no_verbose_header': - self.print_debug_header() self.add_default_info_extractors() if (sys.platform != 'win32' @@ -1059,7 +1079,7 @@ def _outtmpl_expandpath(outtmpl): # correspondingly that is not what we want since we need to keep # '%%' intact for template dict substitution step. Working around # with boundary-alike separator hack. - sep = ''.join([random.choice(ascii_letters) for _ in range(32)]) + sep = ''.join(random.choices(ascii_letters, k=32)) outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$') # outtmpl should be expand_path'ed before template dict substitution @@ -1137,7 +1157,7 @@ def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False): } MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})' MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys())) - INTERNAL_FORMAT_RE = re.compile(rf'''(?x) + INTERNAL_FORMAT_RE = re.compile(rf'''(?xs) (?P-)? (?P{FIELD_RE}) (?P(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*) @@ -1218,6 +1238,14 @@ def _dumpjson_default(obj): return list(obj) return repr(obj) + class _ReplacementFormatter(Formatter): + def get_field(self, field_name, args, kwargs): + if field_name.isdigit(): + return args[0], -1 + raise ValueError('Unsupported field') + + replacement_formatter = _ReplacementFormatter() + def create_key(outer_mobj): if not outer_mobj.group('has_key'): return outer_mobj.group(0) @@ -1239,7 +1267,13 @@ def create_key(outer_mobj): if fmt == 's' and value is not None and key in field_size_compat_map.keys(): fmt = f'0{field_size_compat_map[key]:d}d' - value = default if value is None else value if replacement is None else replacement + if value is None: + value = default + elif replacement is not None: + try: + value = replacement_formatter.format(replacement, value) + except ValueError: + value = na flags = outer_mobj.group('conversion') or '' str_fmt = f'{fmt[:-1]}s' @@ -1249,7 +1283,7 @@ def create_key(outer_mobj): elif fmt[-1] == 'j': # json value, fmt = json.dumps( value, default=_dumpjson_default, - indent=4 if '#' in flags else None, ensure_ascii=False), str_fmt + indent=4 if '#' in flags else None, ensure_ascii='+' not in flags), str_fmt elif fmt[-1] == 'h': # html value, fmt = escapeHTML(str(value)), str_fmt elif fmt[-1] == 'q': # quoted @@ -1349,11 +1383,19 @@ def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False): return self.get_output_path(dir_type, filename) def _match_entry(self, info_dict, incomplete=False, silent=False): - """ Returns None if the file should be downloaded """ + """Returns None if the file should be downloaded""" + _type = info_dict.get('_type', 'video') + assert incomplete or _type == 'video', 'Only video result can be considered complete' video_title = info_dict.get('title', info_dict.get('id', 'entry')) def check_filter(): + if _type in ('playlist', 'multi_video'): + return + elif _type in ('url', 'url_transparent') and not try_call( + lambda: self.get_info_extractor(info_dict['ie_key']).is_single_video(info_dict['url'])): + return + if 'title' in info_dict: # This can happen when we're just evaluating the playlist title = info_dict['title'] @@ -1365,6 +1407,7 @@ def check_filter(): if rejecttitle: if re.search(rejecttitle, title, re.IGNORECASE): return '"' + title + '" title matched reject pattern "' + rejecttitle + '"' + date = info_dict.get('upload_date') if date is not None: dateRange = self.params.get('daterange', DateRange()) @@ -1382,31 +1425,44 @@ def check_filter(): return 'Skipping "%s" because it is age restricted' % video_title match_filter = self.params.get('match_filter') - if match_filter is not None: + if match_filter is None: + return None + + cancelled = None + try: try: ret = match_filter(info_dict, incomplete=incomplete) except TypeError: # For backward compatibility ret = None if incomplete else match_filter(info_dict) - if ret is NO_DEFAULT: - while True: - filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME) - reply = input(self._format_screen( - f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip() - if reply in {'y', ''}: - return None - elif reply == 'n': - return f'Skipping {video_title}' - elif ret is not None: - return ret - return None + except DownloadCancelled as err: + if err.msg is not NO_DEFAULT: + raise + ret, cancelled = err.msg, err + + if ret is NO_DEFAULT: + while True: + filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME) + reply = input(self._format_screen( + f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip() + if reply in {'y', ''}: + return None + elif reply == 'n': + if cancelled: + raise type(cancelled)(f'Skipping {video_title}') + return f'Skipping {video_title}' + return ret if self.in_download_archive(info_dict): reason = '%s has already been recorded in the archive' % video_title break_opt, break_err = 'break_on_existing', ExistingVideoReached else: - reason = check_filter() - break_opt, break_err = 'break_on_reject', RejectedVideoReached + try: + reason = check_filter() + except DownloadCancelled as e: + reason, break_opt, break_err = e.msg, 'match_filter', type(e) + else: + break_opt, break_err = 'break_on_reject', RejectedVideoReached if reason is not None: if not silent: self.to_screen('[download] ' + reason) @@ -1608,8 +1664,8 @@ def process_ie_result(self, ie_result, download=True, extra_info=None): if result_type in ('url', 'url_transparent'): ie_result['url'] = sanitize_url( ie_result['url'], scheme='http' if self.params.get('prefer_insecure') else 'https') - if ie_result.get('original_url'): - extra_info.setdefault('original_url', ie_result['original_url']) + if ie_result.get('original_url') and not extra_info.get('original_url'): + extra_info = {'original_url': ie_result['original_url'], **extra_info} extract_flat = self.params.get('extract_flat', False) if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) @@ -1621,7 +1677,8 @@ def process_ie_result(self, ie_result, download=True, extra_info=None): self.add_default_extra_info(info_copy, ie, ie_result['url']) self.add_extra_info(info_copy, extra_info) info_copy, _ = self.pre_process(info_copy) - self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True) + self._fill_common_fields(info_copy, False) + self.__forced_printings(info_copy) self._raise_pending_errors(info_copy) if self.params.get('force_write_download_archive', False): self.record_download_archive(info_copy) @@ -1751,7 +1808,7 @@ def _playlist_infodict(ie_result, strict=False, **kwargs): return { **info, 'playlist_index': 0, - '__last_playlist_index': max(ie_result['requested_entries'] or (0, 0)), + '__last_playlist_index': max(ie_result.get('requested_entries') or (0, 0)), 'extractor': ie_result['extractor'], 'extractor_key': ie_result['extractor_key'], } @@ -1807,7 +1864,7 @@ def __process_playlist(self, ie_result, download): elif self.params.get('playlistrandom'): random.shuffle(entries) - self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} videos' + self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} items' f'{format_field(ie_result, "playlist_count", " of %s")}') keep_resolved_entries = self.params.get('extract_flat') != 'discard' @@ -1840,14 +1897,13 @@ def __process_playlist(self, ie_result, download): resolved_entries[i] = (playlist_index, NO_DEFAULT) continue - self.to_screen('[download] Downloading video %s of %s' % ( + self.to_screen('[download] Downloading item %s of %s' % ( self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS))) - extra.update({ + entry_result = self.__process_iterable_entry(entry, download, collections.ChainMap({ 'playlist_index': playlist_index, 'playlist_autonumber': i + 1, - }) - entry_result = self.__process_iterable_entry(entry, download, extra) + }, extra)) if not entry_result: failures += 1 if failures >= max_failures: @@ -1858,8 +1914,11 @@ def __process_playlist(self, ie_result, download): resolved_entries[i] = (playlist_index, entry_result) # Update with processed data - ie_result['requested_entries'] = [i for i, e in resolved_entries if e is not NO_DEFAULT] ie_result['entries'] = [e for _, e in resolved_entries if e is not NO_DEFAULT] + ie_result['requested_entries'] = [i for i, e in resolved_entries if e is not NO_DEFAULT] + if ie_result['requested_entries'] == try_call(lambda: list(range(1, ie_result['playlist_count'] + 1))): + # Do not set for full playlist + ie_result.pop('requested_entries') # Write the updated info to json if _infojson_written is True and self._write_info_json( @@ -1888,7 +1947,7 @@ def _build_format_filter(self, filter_spec): '!=': operator.ne, } operator_rex = re.compile(r'''(?x)\s* - (?Pwidth|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s* + (?P[\w.-]+)\s* (?P%s)(?P\s*\?)?\s* (?P[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s* ''' % '|'.join(map(re.escape, OPERATORS.keys()))) @@ -2165,6 +2224,7 @@ def _merge(formats_pair): 'vcodec': the_only_video.get('vcodec'), 'vbr': the_only_video.get('vbr'), 'stretched_ratio': the_only_video.get('stretched_ratio'), + 'aspect_ratio': the_only_video.get('aspect_ratio'), }) if the_only_audio: @@ -2379,15 +2439,10 @@ def check_thumbnails(thumbnails): else: info_dict['thumbnails'] = thumbnails - def _fill_common_fields(self, info_dict, is_video=True): + def _fill_common_fields(self, info_dict, final=True): # TODO: move sanitization here - if is_video: - # playlists are allowed to lack "title" - title = info_dict.get('title', NO_DEFAULT) - if title is NO_DEFAULT: - raise ExtractorError('Missing "title" field in extractor result', - video_id=info_dict['id'], ie=info_dict['extractor']) - info_dict['fulltitle'] = title + if final: + title = info_dict['fulltitle'] = info_dict.get('title') if not title: if title == '': self.write_debug('Extractor gave empty title. Creating a generic title') @@ -2432,7 +2487,7 @@ def _fill_common_fields(self, info_dict, is_video=True): # Auto generate title fields corresponding to the *_number fields when missing # in order to always have clean titles. This is very common for TV series. for field in ('chapter', 'season', 'episode'): - if info_dict.get('%s_number' % field) is not None and not info_dict.get(field): + if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field): info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field]) def _raise_pending_errors(self, info): @@ -2440,6 +2495,11 @@ def _raise_pending_errors(self, info): if err: self.report_error(err, tb=False) + def sort_formats(self, info_dict): + formats = self._get_formats(info_dict) + formats.sort(key=FormatSorter( + self, info_dict.get('_format_sort_fields') or []).calculate_preference) + def process_video_result(self, info_dict, download=True): assert info_dict.get('_type', 'video') == 'video' self._num_videos += 1 @@ -2527,6 +2587,11 @@ def sanitize_numeric_fields(info): formats = self._get_formats(info_dict) + # Backward compatibility with InfoExtractor._sort_formats + field_preference = (formats or [{}])[0].pop('__sort_fields', None) + if field_preference: + info_dict['_format_sort_fields'] = field_preference + # or None ensures --clean-infojson removes it info_dict['_has_drm'] = any(f.get('has_drm') for f in formats) or None if not self.params.get('allow_unplayable_formats'): @@ -2564,22 +2629,43 @@ def is_wellformed(f): if not formats: self.raise_no_formats(info_dict) - formats_dict = {} - - # We check that all the formats have the format and format_id fields - for i, format in enumerate(formats): + for format in formats: sanitize_string_field(format, 'format_id') sanitize_numeric_fields(format) format['url'] = sanitize_url(format['url']) + if format.get('ext') is None: + format['ext'] = determine_ext(format['url']).lower() + if format.get('protocol') is None: + format['protocol'] = determine_protocol(format) + if format.get('resolution') is None: + format['resolution'] = self.format_resolution(format, default=None) + if format.get('dynamic_range') is None and format.get('vcodec') != 'none': + format['dynamic_range'] = 'SDR' + if format.get('aspect_ratio') is None: + format['aspect_ratio'] = try_call(lambda: round(format['width'] / format['height'], 2)) + if (info_dict.get('duration') and format.get('tbr') + and not format.get('filesize') and not format.get('filesize_approx')): + format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8)) + format['http_headers'] = self._calc_headers(collections.ChainMap(format, info_dict)) + + # This is copied to http_headers by the above _calc_headers and can now be removed + if '__x_forwarded_for_ip' in info_dict: + del info_dict['__x_forwarded_for_ip'] + + self.sort_formats({ + 'formats': formats, + '_format_sort_fields': info_dict.get('_format_sort_fields') + }) + + # Sanitize and group by format_id + formats_dict = {} + for i, format in enumerate(formats): if not format.get('format_id'): format['format_id'] = str(i) else: # Sanitize format_id from characters used in format selector expression format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id']) - format_id = format['format_id'] - if format_id not in formats_dict: - formats_dict[format_id] = [] - formats_dict[format_id].append(format) + formats_dict.setdefault(format['format_id'], []).append(format) # Make sure all formats have unique format_id common_exts = set(itertools.chain(*self._format_selection_exts.values())) @@ -2588,38 +2674,17 @@ def is_wellformed(f): for i, format in enumerate(ambiguous_formats): if ambigious_id: format['format_id'] = '%s-%d' % (format_id, i) - if format.get('ext') is None: - format['ext'] = determine_ext(format['url']).lower() # Ensure there is no conflict between id and ext in format selection # See https://github.com/yt-dlp/yt-dlp/issues/1282 if format['format_id'] != format['ext'] and format['format_id'] in common_exts: format['format_id'] = 'f%s' % format['format_id'] - for i, format in enumerate(formats): - if format.get('format') is None: - format['format'] = '{id} - {res}{note}'.format( - id=format['format_id'], - res=self.format_resolution(format), - note=format_field(format, 'format_note', ' (%s)'), - ) - if format.get('protocol') is None: - format['protocol'] = determine_protocol(format) - if format.get('resolution') is None: - format['resolution'] = self.format_resolution(format, default=None) - if format.get('dynamic_range') is None and format.get('vcodec') != 'none': - format['dynamic_range'] = 'SDR' - if (info_dict.get('duration') and format.get('tbr') - and not format.get('filesize') and not format.get('filesize_approx')): - format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8)) - - # Add HTTP headers, so that external programs can use them from the - # json output - full_format_info = info_dict.copy() - full_format_info.update(format) - format['http_headers'] = self._calc_headers(full_format_info) - # Remove private housekeeping stuff - if '__x_forwarded_for_ip' in info_dict: - del info_dict['__x_forwarded_for_ip'] + if format.get('format') is None: + format['format'] = '{id} - {res}{note}'.format( + id=format['format_id'], + res=self.format_resolution(format), + note=format_field(format, 'format_note', ' (%s)'), + ) if self.params.get('check_formats') is True: formats = LazyList(self._check_formats(formats[::-1]), reverse=True) @@ -2642,8 +2707,7 @@ def is_wellformed(f): # The pre-processors may have modified the formats formats = self._get_formats(info_dict) - list_only = self.params.get('simulate') is None and ( - self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles')) + list_only = self.params.get('simulate') == 'list_only' interactive_format_selection = not list_only and self.format_selector == '-' if self.params.get('list_thumbnails'): self.list_thumbnails(info_dict) @@ -2656,7 +2720,7 @@ def is_wellformed(f): self.list_formats(info_dict) if list_only: # Without this printing, -F --print-json will not work - self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True) + self.__forced_printings(info_dict) return info_dict format_selector = self.format_selector @@ -2720,7 +2784,8 @@ def to_screen(*msg): if chapter or offset: new_info.update({ 'section_start': offset + chapter.get('start_time', 0), - 'section_end': end_time if end_time < offset + duration else None, + # duration may not be accurate. So allow deviations <1sec + 'section_end': end_time if end_time <= offset + duration + 1 else None, 'section_title': chapter.get('title'), 'section_number': chapter.get('index'), }) @@ -2776,10 +2841,14 @@ def process_subtitles(self, video_id, normal_subtitles, automatic_captions): self.params.get('subtitleslangs'), {'all': all_sub_langs}, use_regex=True) except re.error as e: raise ValueError(f'Wrong regex for subtitlelangs: {e.pattern}') - elif normal_sub_langs: - requested_langs = ['en'] if 'en' in normal_sub_langs else normal_sub_langs[:1] else: - requested_langs = ['en'] if 'en' in all_sub_langs else all_sub_langs[:1] + requested_langs = LazyList(itertools.chain( + ['en'] if 'en' in normal_sub_langs else [], + filter(lambda f: f.startswith('en'), normal_sub_langs), + ['en'] if 'en' in all_sub_langs else [], + filter(lambda f: f.startswith('en'), all_sub_langs), + normal_sub_langs, all_sub_langs, + ))[:1] if requested_langs: self.to_screen(f'[info] {video_id}: Downloading subtitles: {", ".join(requested_langs)}') @@ -2811,6 +2880,12 @@ def _forceprint(self, key, info_dict): if info_dict is None: return info_copy = info_dict.copy() + info_copy.setdefault('filename', self.prepare_filename(info_dict)) + if info_dict.get('requested_formats') is not None: + # For RTMP URLs, also include the playpath + info_copy['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats']) + elif info_dict.get('url'): + info_copy['urls'] = info_dict['url'] + info_dict.get('play_path', '') info_copy['formats_table'] = self.render_formats_table(info_dict) info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict) info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles')) @@ -2836,46 +2911,36 @@ def format_tmpl(tmpl): tmpl = format_tmpl(tmpl) self.to_screen(f'[info] Writing {tmpl!r} to: {filename}') if self._ensure_dir_exists(filename): - with open(filename, 'a', encoding='utf-8') as f: - f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n') + with open(filename, 'a', encoding='utf-8', newline='') as f: + f.write(self.evaluate_outtmpl(tmpl, info_copy) + os.linesep) - def __forced_printings(self, info_dict, filename, incomplete): - def print_mandatory(field, actual_field=None): - if actual_field is None: - actual_field = field - if (self.params.get('force%s' % field, False) - and (not incomplete or info_dict.get(actual_field) is not None)): - self.to_stdout(info_dict[actual_field]) - - def print_optional(field): - if (self.params.get('force%s' % field, False) - and info_dict.get(field) is not None): - self.to_stdout(info_dict[field]) - - info_dict = info_dict.copy() - if filename is not None: - info_dict['filename'] = filename - if info_dict.get('requested_formats') is not None: - # For RTMP URLs, also include the playpath - info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats']) - elif info_dict.get('url'): - info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '') + return info_copy + def __forced_printings(self, info_dict, filename=None, incomplete=True): if (self.params.get('forcejson') or self.params['forceprint'].get('video') or self.params['print_to_file'].get('video')): self.post_extract(info_dict) - self._forceprint('video', info_dict) - - print_mandatory('title') - print_mandatory('id') - print_mandatory('url', 'urls') - print_optional('thumbnail') - print_optional('description') - print_optional('filename') - if self.params.get('forceduration') and info_dict.get('duration') is not None: - self.to_stdout(formatSeconds(info_dict['duration'])) - print_mandatory('format') + if filename: + info_dict['filename'] = filename + info_copy = self._forceprint('video', info_dict) + + def print_field(field, actual_field=None, optional=False): + if actual_field is None: + actual_field = field + if self.params.get(f'force{field}') and ( + info_copy.get(field) is not None or (not optional and not incomplete)): + self.to_stdout(info_copy[actual_field]) + + print_field('title') + print_field('id') + print_field('url', 'urls') + print_field('thumbnail', optional=True) + print_field('description', optional=True) + print_field('filename', optional=True) + if self.params.get('forceduration') and info_copy.get('duration') is not None: + self.to_stdout(formatSeconds(info_copy['duration'])) + print_field('format') if self.params.get('forcejson'): self.to_stdout(json.dumps(self.sanitize_info(info_dict))) @@ -2934,14 +2999,22 @@ def process_info(self, info_dict): if 'format' not in info_dict and 'ext' in info_dict: info_dict['format'] = info_dict['ext'] - # This is mostly just for backward compatibility of process_info - # As a side-effect, this allows for format-specific filters if self._match_entry(info_dict) is not None: info_dict['__write_download_archive'] = 'ignore' return # Does nothing under normal operation - for backward compatibility of process_info self.post_extract(info_dict) + + def replace_info_dict(new_info): + nonlocal info_dict + if new_info == info_dict: + return + info_dict.clear() + info_dict.update(new_info) + + new_info, _ = self.pre_process(info_dict, 'video') + replace_info_dict(new_info) self._num_downloads += 1 # info_dict['_filename'] needs to be set for backward compatibility @@ -3055,13 +3128,6 @@ def _write_link_file(link_type): for link_type, should_write in write_links.items()): return - def replace_info_dict(new_info): - nonlocal info_dict - if new_info == info_dict: - return - info_dict.clear() - info_dict.update(new_info) - new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move) replace_info_dict(new_info) @@ -3088,7 +3154,7 @@ def existing_video_file(*filepaths): fd, success = None, True if info_dict.get('protocol') or info_dict.get('url'): fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-') - if fd is not FFmpegFD and ( + if fd is not FFmpegFD and 'no-direct-merge' not in self.params['compat_opts'] and ( info_dict.get('section_start') or info_dict.get('section_end')): msg = ('This format cannot be partially downloaded' if FFmpegFD.available() else 'You have requested downloading the video partially, but ffmpeg is not installed') @@ -3257,7 +3323,7 @@ def ffmpeg_fixup(cndn, msg, cls): or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None, 'Possible MPEG-TS in MP4 container or malformed AAC timestamps', FFmpegFixupM3u8PP) - ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD', + ffmpeg_fixup(info_dict.get('is_live') and downloader == 'dashsegments', 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP) ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP) @@ -3321,18 +3387,19 @@ def download_with_info_file(self, info_filename): [info_filename], mode='r', openhook=fileinput.hook_encoded('utf-8'))) as f: # FileInput doesn't have a read method, we can't call json.load - info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True)) - try: - self.__download_wrapper(self.process_ie_result)(info, download=True) - except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e: - if not isinstance(e, EntryNotInPlaylist): - self.to_stderr('\r') - webpage_url = info.get('webpage_url') - if webpage_url is not None: + infos = [self.sanitize_info(info, self.params.get('clean_infojson', True)) + for info in variadic(json.loads('\n'.join(f)))] + for info in infos: + try: + self.__download_wrapper(self.process_ie_result)(info, download=True) + except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e: + if not isinstance(e, EntryNotInPlaylist): + self.to_stderr('\r') + webpage_url = info.get('webpage_url') + if webpage_url is None: + raise self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}') - return self.download([webpage_url]) - else: - raise + self.download([webpage_url]) return self._download_retcode @staticmethod @@ -3353,6 +3420,7 @@ def sanitize_info(info_dict, remove_private_keys=False): reject = lambda k, v: v is None or k.startswith('__') or k in { 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries', 'entries', 'filepath', '_filename', 'infojson_filename', 'original_url', 'playlist_autonumber', + '_format_sort_fields', } else: reject = lambda k, v: False @@ -3421,8 +3489,9 @@ def run_pp(self, pp, infodict): *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)') return infodict - def run_all_pps(self, key, info, *, additional_pps=None): - self._forceprint(key, info) + def run_all_pps(self, key, info, *, additional_pps=None, fatal=True): + if key != 'video': + self._forceprint(key, info) for pp in (additional_pps or []) + self._pps[key]: info = self.run_pp(pp, info) return info @@ -3585,7 +3654,7 @@ def render_formats_table(self, info_dict): format_field(f, 'ext'), self.format_resolution(f), self._format_note(f) - ] for f in formats if f.get('preference') is None or f['preference'] >= -1000] + ] for f in formats if (f.get('preference') or 0) >= -1000] return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1) def simplified_codec(f, field): @@ -3624,6 +3693,7 @@ def simplified_codec(f, field): format_field(f, 'asr', '\t%s', func=format_decimal_suffix), join_nonempty( self._format_out('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None, + self._format_out('DRM', 'light red') if f.get('has_drm') else None, format_field(f, 'language', '[%s]'), join_nonempty(format_field(f, 'format_note'), format_field(f, 'container', ignore=(None, f.get('ext'))), @@ -3691,7 +3761,10 @@ def print_debug_header(self): # These imports can be slow. So import them only as needed from .extractor.extractors import _LAZY_LOADER - from .extractor.extractors import _PLUGIN_CLASSES as plugin_extractors + from .extractor.extractors import ( + _PLUGIN_CLASSES as plugin_ies, + _PLUGIN_OVERRIDES as plugin_ie_overrides + ) def get_encoding(stream): ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)) @@ -3720,22 +3793,23 @@ def get_encoding(stream): source = detect_variant() if VARIANT not in (None, 'pip'): source += '*' + klass = type(self) write_debug(join_nonempty( f'{"yt-dlp" if REPOSITORY == "yt-dlp/yt-dlp" else REPOSITORY} version', - __version__, - f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '', + f'{CHANNEL}@{__version__}', + f'[{RELEASE_GIT_HEAD[:9]}]' if RELEASE_GIT_HEAD else '', '' if source == 'unknown' else f'({source})', - '' if _IN_CLI else 'API', + '' if _IN_CLI else 'API' if klass == YoutubeDL else f'API:{self.__module__}.{klass.__qualname__}', delim=' ')) + + if not _IN_CLI: + write_debug(f'params: {self.params}') + if not _LAZY_LOADER: if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'): write_debug('Lazy loading extractors is forcibly disabled') else: write_debug('Lazy loading extractors is disabled') - if plugin_extractors or plugin_postprocessors: - write_debug('Plugins: %s' % [ - '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}') - for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())]) if self.params['compat_opts']: write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts'])) @@ -3769,6 +3843,21 @@ def get_encoding(stream): proxy_map.update(handler.proxies) write_debug(f'Proxy map: {proxy_map}') + for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items(): + display_list = ['%s%s' % ( + klass.__name__, '' if klass.__name__ == name else f' as {name}') + for name, klass in plugins.items()] + if plugin_type == 'Extractor': + display_list.extend(f'{plugins[-1].IE_NAME.partition("+")[2]} ({parent.__name__})' + for parent, plugins in plugin_ie_overrides.items()) + if not display_list: + continue + write_debug(f'{plugin_type} Plugins: {", ".join(sorted(display_list))}') + + plugin_dirs = plugin_directories() + if plugin_dirs: + write_debug(f'Plugin directories: {plugin_dirs}') + # Not implemented if False and self.params.get('call_home'): ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode() @@ -3818,9 +3907,12 @@ def _setup_opener(self): # https://github.com/ytdl-org/youtube-dl/issues/8227) file_handler = urllib.request.FileHandler() - def file_open(*args, **kwargs): - raise urllib.error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons') - file_handler.file_open = file_open + if not self.params.get('enable_file_urls'): + def file_open(*args, **kwargs): + raise urllib.error.URLError( + 'file:// URLs are explicitly disabled in yt-dlp for security reasons. ' + 'Use --enable-file-urls to enable at your own risk.') + file_handler.file_open = file_open opener = urllib.request.build_opener( proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler) @@ -3882,7 +3974,7 @@ def _write_description(self, label, ie_result, descfn): elif not self.params.get('overwrites', True) and os.path.exists(descfn): self.to_screen(f'[info] {label.title()} description is already present') elif ie_result.get('description') is None: - self.report_warning(f'There\'s no {label} description to write') + self.to_screen(f'[info] There\'s no {label} description to write') return False else: try: @@ -3898,15 +3990,18 @@ def _write_subtitles(self, info_dict, filename): ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error''' ret = [] subtitles = info_dict.get('requested_subtitles') - if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')): + if not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')): # subtitles download errors are already managed as troubles in relevant IE # that way it will silently go on when used with unsupporting IE return ret - + elif not subtitles: + self.to_screen('[info] There are no subtitles for the requested languages') + return ret sub_filename_base = self.prepare_filename(info_dict, 'subtitle') if not sub_filename_base: self.to_screen('[info] Skipping writing video subtitles') return ret + for sub_lang, sub_info in subtitles.items(): sub_format = sub_info['ext'] sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext')) @@ -3953,6 +4048,9 @@ def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None thumbnails, ret = [], [] if write_all or self.params.get('writethumbnail', False): thumbnails = info_dict.get('thumbnails') or [] + if not thumbnails: + self.to_screen(f'[info] There are no {label} thumbnails to download') + return ret multiple = write_all and len(thumbnails) > 1 if thumb_filename_base is None: