X-Git-Url: https://jfr.im/git/yt-dlp.git/blobdiff_plain/0c14d66ad9ce1c517fd3fab09a96a16724d3d2ab..7e88d7d78f452ea69f06bbdf23f82e9ad7c3de5e:/yt_dlp/YoutubeDL.py diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 33f33ddfe..4162727c4 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1,8 +1,4 @@ #!/usr/bin/env python3 -# coding: utf-8 - -from __future__ import absolute_import, unicode_literals - import collections import contextlib import datetime @@ -16,6 +12,7 @@ import operator import os import platform +import random import re import shutil import subprocess @@ -24,94 +21,114 @@ import time import tokenize import traceback -import random import unicodedata - -from enum import Enum +import urllib.request from string import ascii_letters +from .cache import Cache from .compat import ( - compat_basestring, - compat_brotli, + HAS_LEGACY as compat_has_legacy, compat_get_terminal_size, - compat_kwargs, - compat_numeric_types, compat_os_name, - compat_pycrypto_AES, compat_shlex_quote, compat_str, - compat_tokenize_tokenize, compat_urllib_error, compat_urllib_request, - compat_urllib_request_DataHandler, - windows_enable_vt_mode, ) from .cookies import load_cookies +from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name +from .downloader.rtmp import rtmpdump_version +from .extractor import gen_extractor_classes, get_info_extractor +from .extractor.openload import PhantomJSwrapper +from .minicurses import format_text +from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors +from .postprocessor import ( + EmbedThumbnailPP, + FFmpegFixupDuplicateMoovPP, + FFmpegFixupDurationPP, + FFmpegFixupM3u8PP, + FFmpegFixupM4aPP, + FFmpegFixupStretchedPP, + FFmpegFixupTimestampPP, + FFmpegMergerPP, + FFmpegPostProcessor, + MoveFilesAfterDownloadPP, + get_postprocessor, +) +from .update import detect_variant from .utils import ( + DEFAULT_OUTTMPL, + LINK_TEMPLATES, + NO_DEFAULT, + NUMBER_RE, + OUTTMPL_TYPES, + POSTPROCESS_WHEN, + STR_FORMAT_RE_TMPL, + STR_FORMAT_TYPES, + ContentTooShortError, + DateRange, + DownloadCancelled, + DownloadError, + EntryNotInPlaylist, + ExistingVideoReached, + ExtractorError, + GeoRestrictedError, + HEADRequest, + ISO3166Utils, + LazyList, + MaxDownloadsReached, + Namespace, + PagedList, + PerRequestProxyHandler, + PlaylistEntries, + Popen, + PostProcessingError, + ReExtractInfo, + RejectedVideoReached, + SameFileError, + UnavailableVideoError, + YoutubeDLCookieProcessor, + YoutubeDLHandler, + YoutubeDLRedirectHandler, age_restricted, args_to_str, - ContentTooShortError, date_from_str, - DateRange, - DEFAULT_OUTTMPL, determine_ext, determine_protocol, - DownloadCancelled, - DownloadError, encode_compat_str, encodeFilename, - EntryNotInPlaylist, error_to_compat_str, - ExistingVideoReached, expand_path, - ExtractorError, + filter_dict, float_or_none, format_bytes, - format_field, format_decimal_suffix, + format_field, formatSeconds, - GeoRestrictedError, get_domain, - HEADRequest, - InAdvancePagedList, int_or_none, iri_to_uri, - ISO3166Utils, join_nonempty, - LazyList, - LINK_TEMPLATES, locked_file, make_dir, make_HTTPS_handler, - MaxDownloadsReached, merge_headers, network_exceptions, number_of_digits, orderedSet, - OUTTMPL_TYPES, - PagedList, parse_filesize, - PerRequestProxyHandler, platform_name, - Popen, - POSTPROCESS_WHEN, - PostProcessingError, preferredencoding, prepend_extension, - ReExtractInfo, register_socks_protocols, - RejectedVideoReached, remove_terminal_sequences, render_table, replace_extension, - SameFileError, sanitize_filename, sanitize_path, sanitize_url, sanitized_Request, std_headers, - STR_FORMAT_RE_TMPL, - STR_FORMAT_TYPES, str_or_none, strftime_or_none, subtitles_filename, @@ -120,53 +137,20 @@ to_high_limit_path, traverse_obj, try_get, - UnavailableVideoError, url_basename, variadic, version_tuple, + windows_enable_vt_mode, write_json_file, write_string, - YoutubeDLCookieProcessor, - YoutubeDLHandler, - YoutubeDLRedirectHandler, -) -from .cache import Cache -from .minicurses import format_text -from .extractor import ( - gen_extractor_classes, - get_info_extractor, - _LAZY_LOADER, - _PLUGIN_CLASSES as plugin_extractors -) -from .extractor.openload import PhantomJSwrapper -from .downloader import ( - FFmpegFD, - get_suitable_downloader, - shorten_protocol_name ) -from .downloader.rtmp import rtmpdump_version -from .postprocessor import ( - get_postprocessor, - EmbedThumbnailPP, - FFmpegFixupDuplicateMoovPP, - FFmpegFixupDurationPP, - FFmpegFixupM3u8PP, - FFmpegFixupM4aPP, - FFmpegFixupStretchedPP, - FFmpegFixupTimestampPP, - FFmpegMergerPP, - FFmpegPostProcessor, - MoveFilesAfterDownloadPP, - _PLUGIN_CLASSES as plugin_postprocessors -) -from .update import detect_variant -from .version import __version__, RELEASE_GIT_HEAD +from .version import RELEASE_GIT_HEAD, __version__ if compat_os_name == 'nt': import ctypes -class YoutubeDL(object): +class YoutubeDL: """YoutubeDL class. YoutubeDL objects are the ones responsible of downloading the @@ -209,13 +193,6 @@ class YoutubeDL(object): For compatibility, a single list is also accepted print_to_file: A dict with keys WHEN (same as forceprint) mapped to a list of tuples with (template, filename) - forceurl: Force printing final URL. (Deprecated) - forcetitle: Force printing title. (Deprecated) - forceid: Force printing ID. (Deprecated) - forcethumbnail: Force printing thumbnail URL. (Deprecated) - forcedescription: Force printing description. (Deprecated) - forcefilename: Force printing final filename. (Deprecated) - forceduration: Force printing duration. (Deprecated) forcejson: Force printing info_dict as JSON. dump_single_json: Force printing the info_dict of the whole playlist (or video) as a single JSON line. @@ -292,9 +269,6 @@ class YoutubeDL(object): writedesktoplink: Write a Linux internet shortcut file (.desktop) writesubtitles: Write the video subtitles to a file writeautomaticsub: Write the automatically generated subtitles to a file - allsubtitles: Deprecated - Use subtitleslangs = ['all'] - Downloads all the subtitles of the video - (requires writesubtitles or writeautomaticsub) listsubtitles: Lists all available subtitles for the video subtitlesformat: The format code for subtitles subtitleslangs: List of languages of the subtitles to download (can be regex). @@ -327,13 +301,17 @@ class YoutubeDL(object): has been filtered out. break_per_url: Whether break_on_reject and break_on_existing should act on each input URL as opposed to for the entire queue - cookiefile: File name where cookies should be read from and dumped to + cookiefile: File name or text stream from where cookies should be read and dumped to cookiesfrombrowser: A tuple containing the name of the browser, the profile name/pathfrom where cookies are loaded, and the name of the keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT') legacyserverconnect: Explicitly allow HTTPS connection to servers that do not support RFC 5746 secure renegotiation nocheckcertificate: Do not verify SSL certificates + client_certificate: Path to client certificate file in PEM format. May include the private key + client_certificate_key: Path to private key file for client certificate + client_certificate_password: Password for client certificate private key, if encrypted. + If not provided and the key is encrypted, yt-dlp will ask interactively prefer_insecure: Use HTTP instead of HTTPS to retrieve information. At the moment, this is only supported by YouTube. http_headers: A dictionary of custom headers to be used for all requests @@ -344,7 +322,6 @@ class YoutubeDL(object): bidi_workaround: Work around buggy terminals without bidirectional text support, using fridibi debug_printtraffic:Print out sent and received HTTP traffic - include_ads: Download ads as well (deprecated) default_search: Prepend this string if an input url is not valid. 'auto' for elaborate guessing encoding: Use this encoding instead of the system-specified. @@ -360,10 +337,6 @@ class YoutubeDL(object): * when: When to run the postprocessor. Allowed values are the entries of utils.POSTPROCESS_WHEN Assumed to be 'post_process' if not given - post_hooks: Deprecated - Register a custom postprocessor instead - A list of functions that get called as the final step - for each video file, after all postprocessors have been - called. The filename will be passed as the only argument. progress_hooks: A list of functions that get called on download progress, with a dictionary with the entries * status: One of "downloading", "error", or "finished". @@ -408,8 +381,6 @@ class YoutubeDL(object): - "detect_or_warn": check whether we can do anything about it, warn otherwise (default) source_address: Client-side IP address to bind to. - call_home: Boolean, true iff we are allowed to contact the - yt-dlp servers for debugging. (BROKEN) sleep_interval_requests: Number of seconds to sleep between requests during extraction sleep_interval: Number of seconds to sleep before each download when @@ -425,10 +396,14 @@ class YoutubeDL(object): sleep_interval_subtitles: Number of seconds to sleep before each subtitle download listformats: Print an overview of available video formats and exit. list_thumbnails: Print a table of all thumbnails and exit. - match_filter: A function that gets called with the info_dict of - every video. - If it returns a message, the video is ignored. - If it returns None, the video is downloaded. + match_filter: A function that gets called for every video with the signature + (info_dict, *, incomplete: bool) -> Optional[str] + For backward compatibility with youtube-dl, the signature + (info_dict) -> Optional[str] is also allowed. + - If it returns a message, the video is ignored. + - If it returns None, the video is downloaded. + - If it returns utils.NO_DEFAULT, the user is interactively + asked whether to download the video. match_filter_func in utils.py is one example for this. no_color: Do not emit color codes in output. geo_bypass: Bypass geographic restriction via faking X-Forwarded-For @@ -440,17 +415,10 @@ class YoutubeDL(object): geo_bypass_ip_block: IP range in CIDR notation that will be used similarly to geo_bypass_country - - The following options determine which downloader is picked: external_downloader: A dictionary of protocol keys and the executable of the external downloader to use for it. The allowed protocols are default|http|ftp|m3u8|dash|rtsp|rtmp|mms. Set the value to 'native' to use the native downloader - hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'} - or {'m3u8': 'ffmpeg'} instead. - Use the native HLS downloader instead of ffmpeg/avconv - if True, otherwise use ffmpeg/avconv if False, otherwise - use downloader suggested by extractor if None. compat_opts: Compatibility options. See "Differences in default behavior". The following options do not work when used through the API: filename, abort-on-error, multistreams, no-live-chat, format-sort @@ -460,6 +428,16 @@ class YoutubeDL(object): Allowed keys are 'download', 'postprocess', 'download-title' (console title) and 'postprocess-title'. The template is mapped on a dictionary with keys 'progress' and 'info' + retry_sleep_functions: Dictionary of functions that takes the number of attempts + as argument and returns the time to sleep in seconds. + Allowed keys are 'http', 'fragment', 'file_access' + download_ranges: A function that gets called for every video with the signature + (info_dict, *, ydl) -> Iterable[Section]. + Only the returned sections will be downloaded. Each Section contains: + * start_time: Start time of the section in seconds + * end_time: End time of the section in seconds + * title: Section title (Optional) + * index: Section number (Optional) The following parameters are not used by YoutubeDL itself, they are used by the downloader (see yt_dlp/downloader/common.py): @@ -469,8 +447,6 @@ class YoutubeDL(object): external_downloader_args, concurrent_fragment_downloads. The following options are used by the post processors: - prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available, - otherwise prefer ffmpeg. (avconv support is deprecated) ffmpeg_location: Location of the ffmpeg/avconv binary; either the path to the binary or its containing directory. postprocessor_args: A dictionary of postprocessor/executable keys (in lower case) @@ -490,19 +466,55 @@ class YoutubeDL(object): See "EXTRACTOR ARGUMENTS" for details. Eg: {'youtube': {'skip': ['dash', 'hls']}} mark_watched: Mark videos watched (even with --simulate). Only for YouTube - youtube_include_dash_manifest: Deprecated - Use extractor_args instead. + + The following options are deprecated and may be removed in the future: + + forceurl: - Use forceprint + Force printing final URL. + forcetitle: - Use forceprint + Force printing title. + forceid: - Use forceprint + Force printing ID. + forcethumbnail: - Use forceprint + Force printing thumbnail URL. + forcedescription: - Use forceprint + Force printing description. + forcefilename: - Use forceprint + Force printing final filename. + forceduration: - Use forceprint + Force printing duration. + allsubtitles: - Use subtitleslangs = ['all'] + Downloads all the subtitles of the video + (requires writesubtitles or writeautomaticsub) + include_ads: - Doesn't work + Download ads as well + call_home: - Not implemented + Boolean, true iff we are allowed to contact the + yt-dlp servers for debugging. + post_hooks: - Register a custom postprocessor + A list of functions that get called as the final step + for each video file, after all postprocessors have been + called. The filename will be passed as the only argument. + hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}. + Use the native HLS downloader instead of ffmpeg/avconv + if True, otherwise use ffmpeg/avconv if False, otherwise + use downloader suggested by extractor if None. + prefer_ffmpeg: - avconv support is deprecated + If False, use avconv instead of ffmpeg if both are available, + otherwise prefer ffmpeg. + youtube_include_dash_manifest: - Use extractor_args If True (default), DASH manifests and related data will be downloaded and processed by extractor. You can reduce network I/O by disabling it if you don't care about DASH. (only for youtube) - youtube_include_hls_manifest: Deprecated - Use extractor_args instead. + youtube_include_hls_manifest: - Use extractor_args If True (default), HLS manifests and related data will be downloaded and processed by extractor. You can reduce network I/O by disabling it if you don't care about HLS. (only for youtube) """ - _NUMERIC_FIELDS = set(( + _NUMERIC_FIELDS = { 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx', 'timestamp', 'release_timestamp', 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count', @@ -510,8 +522,18 @@ class YoutubeDL(object): 'start_time', 'end_time', 'chapter_number', 'season_number', 'episode_number', 'track_number', 'disc_number', 'release_year', - )) + } + _format_fields = { + # NB: Keep in sync with the docstring of extractor/common.py + 'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note', + 'width', 'height', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', + 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', + 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start', + 'preference', 'language', 'language_preference', 'quality', 'source_preference', + 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options', + 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time' + } _format_selection_exts = { 'audio': {'m4a', 'mp3', 'ogg', 'aac'}, 'video': {'mp4', 'flv', 'webm', '3gp'}, @@ -542,17 +564,18 @@ def __init__(self, params=None, auto_init=True): self.cache = Cache(self) windows_enable_vt_mode() - self._out_files = { - 'error': sys.stderr, - 'print': sys.stderr if self.params.get('logtostderr') else sys.stdout, - 'console': None if compat_os_name == 'nt' else next( + stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout + self._out_files = Namespace( + out=stdout, + error=sys.stderr, + screen=sys.stderr if self.params.get('quiet') else stdout, + console=None if compat_os_name == 'nt' else next( filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None) - } - self._out_files['screen'] = sys.stderr if self.params.get('quiet') else self._out_files['print'] - self._allow_colors = { - type_: not self.params.get('no_color') and supports_terminal_sequences(self._out_files[type_]) - for type_ in ('screen', 'error') - } + ) + self._allow_colors = Namespace(**{ + type_: not self.params.get('no_color') and supports_terminal_sequences(stream) + for type_, stream in self._out_files.items_ if type_ != 'console' + }) if sys.version_info < (3, 6): self.report_warning( @@ -567,7 +590,7 @@ def __init__(self, params=None, auto_init=True): def check_deprecated(param, option, suggestion): if self.params.get(param) is not None: - self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion)) + self.report_warning(f'{option} is deprecated. Use {suggestion} instead') return True return False @@ -584,7 +607,10 @@ def check_deprecated(param, option, suggestion): for msg in self.params.get('_deprecation_warnings', []): self.deprecation_warning(msg) - if 'list-formats' in self.params.get('compat_opts', []): + self.params['compat_opts'] = set(self.params.get('compat_opts', ())) + if not compat_has_legacy: + self.params['compat_opts'].add('no-compat-legacy') + if 'list-formats' in self.params['compat_opts']: self.params['listformats_table'] = False if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None: @@ -609,14 +635,8 @@ def check_deprecated(param, option, suggestion): import pty master, slave = pty.openpty() width = compat_get_terminal_size().columns - if width is None: - width_args = [] - else: - width_args = ['-w', str(width)] - sp_kwargs = dict( - stdin=subprocess.PIPE, - stdout=slave, - stderr=self._out_files['error']) + width_args = [] if width is None else ['-w', str(width)] + sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error} try: self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs) except OSError: @@ -630,6 +650,11 @@ def check_deprecated(param, option, suggestion): else: raise + if auto_init: + if auto_init != 'no_verbose_header': + self.print_debug_header() + self.add_default_info_extractors() + if (sys.platform != 'win32' and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and not self.params.get('restrictfilenames', False)): @@ -651,13 +676,6 @@ def check_deprecated(param, option, suggestion): # Set http_headers defaults according to std_headers self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {})) - self._setup_opener() - - if auto_init: - if auto_init != 'no_verbose_header': - self.print_debug_header() - self.add_default_info_extractors() - hooks = { 'post_hooks': self.add_post_hook, 'progress_hooks': self.add_progress_hook, @@ -671,9 +689,10 @@ def check_deprecated(param, option, suggestion): pp_def = dict(pp_def_raw) when = pp_def.pop('when', 'post_process') self.add_post_processor( - get_postprocessor(pp_def.pop('key'))(self, **compat_kwargs(pp_def)), + get_postprocessor(pp_def.pop('key'))(self, **pp_def), when=when) + self._setup_opener() register_socks_protocols() def preload_download_archive(fn): @@ -685,7 +704,7 @@ def preload_download_archive(fn): with locked_file(fn, 'r', encoding='utf-8') as archive_file: for line in archive_file: self.archive.add(line.strip()) - except IOError as ioe: + except OSError as ioe: if ioe.errno != errno.ENOENT: raise return False @@ -771,9 +790,9 @@ def _bidi_workaround(self, message): assert hasattr(self, '_output_process') assert isinstance(message, compat_str) line_count = message.count('\n') + 1 - self._output_process.stdin.write((message + '\n').encode('utf-8')) + self._output_process.stdin.write((message + '\n').encode()) self._output_process.stdin.flush() - res = ''.join(self._output_channel.readline().decode('utf-8') + res = ''.join(self._output_channel.readline().decode() for _ in range(line_count)) return res[:-len('\n')] @@ -787,10 +806,10 @@ def _write_string(self, message, out=None, only_once=False): def to_stdout(self, message, skip_eol=False, quiet=None): """Print message to stdout""" if quiet is not None: - self.deprecation_warning('"ydl.to_stdout" no longer accepts the argument quiet. Use "ydl.to_screen" instead') - self._write_string( - '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')), - self._out_files['print']) + self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. Use "YoutubeDL.to_screen" instead') + if skip_eol is not False: + self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. Use "YoutubeDL.to_screen" instead') + self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out) def to_screen(self, message, skip_eol=False, quiet=None): """Print message to screen if not in quiet mode""" @@ -801,7 +820,7 @@ def to_screen(self, message, skip_eol=False, quiet=None): return self._write_string( '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')), - self._out_files['screen']) + self._out_files.screen) def to_stderr(self, message, only_once=False): """Print message to stderr""" @@ -809,12 +828,12 @@ def to_stderr(self, message, only_once=False): if self.params.get('logger'): self.params['logger'].error(message) else: - self._write_string('%s\n' % self._bidi_workaround(message), self._out_files['error'], only_once=only_once) + self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once) def _send_console_code(self, code): - if compat_os_name == 'nt' or not self._out_files['console']: + if compat_os_name == 'nt' or not self._out_files.console: return - self._write_string(code, self._out_files['console']) + self._write_string(code, self._out_files.console) def to_console_title(self, message): if not self.params.get('consoletitle', False): @@ -882,16 +901,19 @@ def trouble(self, message=None, tb=None, is_error=True): raise DownloadError(message, exc_info) self._download_retcode = 1 - class Styles(Enum): - HEADERS = 'yellow' - EMPHASIS = 'light blue' - ID = 'green' - DELIM = 'blue' - ERROR = 'red' - WARNING = 'yellow' - SUPPRESS = 'light black' + Styles = Namespace( + HEADERS='yellow', + EMPHASIS='light blue', + FILENAME='green', + ID='green', + DELIM='blue', + ERROR='red', + WARNING='yellow', + SUPPRESS='light black', + ) def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False): + text = str(text) if test_encoding: original_text = text # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711 @@ -899,17 +921,16 @@ def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_enc text = text.encode(encoding, 'ignore').decode(encoding) if fallback is not None and text != original_text: text = fallback - if isinstance(f, self.Styles): - f = f.value return format_text(text, f) if allow_colors else text if fallback is None else fallback + def _format_out(self, *args, **kwargs): + return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs) + def _format_screen(self, *args, **kwargs): - return self._format_text( - self._out_files['screen'], self._allow_colors['screen'], *args, **kwargs) + return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs) def _format_err(self, *args, **kwargs): - return self._format_text( - self._out_files['error'], self._allow_colors['error'], *args, **kwargs) + return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs) def report_warning(self, message, only_once=False): ''' @@ -925,7 +946,7 @@ def report_warning(self, message, only_once=False): def deprecation_warning(self, message): if self.params.get('logger') is not None: - self.params['logger'].warning('DeprecationWarning: {message}') + self.params['logger'].warning(f'DeprecationWarning: {message}') else: self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True) @@ -940,7 +961,7 @@ def write_debug(self, message, only_once=False): '''Log debug message or Print message to stderr''' if not self.params.get('verbose', False): return - message = '[debug] %s' % message + message = f'[debug] {message}' if self.params.get('logger'): self.params['logger'].debug(message) else: @@ -961,7 +982,7 @@ def report_file_delete(self, file_name): self.to_screen('Deleting existing file') def raise_no_formats(self, info, forced=False, *, msg=None): - has_drm = info.get('__has_drm') + has_drm = info.get('_has_drm') ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg) msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!' if forced or not ignored: @@ -982,11 +1003,9 @@ def parse_outtmpl(self): outtmpl_dict.update({ k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl_dict.get(k) is None}) - for key, val in outtmpl_dict.items(): + for _, val in outtmpl_dict.items(): if isinstance(val, bytes): - self.report_warning( - 'Parameter outtmpl is bytes, but should be a unicode string. ' - 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.') + self.report_warning('Parameter outtmpl is bytes, but should be a unicode string') return outtmpl_dict def get_output_path(self, dir_type='', filename=None): @@ -996,12 +1015,6 @@ def get_output_path(self, dir_type='', filename=None): expand_path(paths.get('home', '').strip()), expand_path(paths.get(dir_type, '').strip()) if dir_type else '', filename or '') - - # Temporary fix for #4787 - # 'Treat' all problem characters by passing filename through preferredencoding - # to workaround encoding issues with subprocess on python2 @ Windows - if sys.version_info < (3, 0) and sys.platform == 'win32': - path = encodeFilename(path, True).decode(preferredencoding()) return sanitize_path(path, force=self.params.get('windowsfilenames')) @staticmethod @@ -1011,7 +1024,7 @@ def _outtmpl_expandpath(outtmpl): # '%%' intact for template dict substitution step. Working around # with boundary-alike separator hack. sep = ''.join([random.choice(ascii_letters) for _ in range(32)]) - outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep)) + outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$') # outtmpl should be expand_path'ed before template dict substitution # because meta fields may contain env variables we don't want to @@ -1044,6 +1057,7 @@ def validate_outtmpl(cls, outtmpl): def _copy_infodict(info_dict): info_dict = dict(info_dict) info_dict.pop('__postprocessors', None) + info_dict.pop('__pending_error', None) return info_dict def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False): @@ -1059,7 +1073,7 @@ def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False): formatSeconds(info_dict['duration'], '-' if sanitize else ':') if info_dict.get('duration', None) is not None else None) - info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads + info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads) info_dict['video_autonumber'] = self._num_videos if info_dict.get('resolution') is None: info_dict['resolution'] = self.format_resolution(info_dict, default=None) @@ -1067,7 +1081,7 @@ def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False): # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences # of %(field)s to %(field)0Nd for backward compatibility field_size_compat_map = { - 'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0), + 'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0), 'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0), 'autonumber': self.params.get('autonumber_size') or 5, } @@ -1081,17 +1095,18 @@ def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False): # Field is of the form key1.key2... # where keys (except first) can be string, int or slice FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)') - MATH_FIELD_RE = r'''(?:{field}|{num})'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?') + MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})' MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys())) - INTERNAL_FORMAT_RE = re.compile(r'''(?x) + INTERNAL_FORMAT_RE = re.compile(rf'''(?x) (?P-)? - (?P{field}) - (?P(?:{math_op}{math_field})*) + (?P{FIELD_RE}) + (?P(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*) (?:>(?P.+?))? - (?P(?.*?))? - (?:\|(?P.*?))? - $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE)) + (?P + (?P(?.*?))? + (?:\|(?P.*?))? + )$''') def _traverse_infodict(k): k = k.split('.') @@ -1138,8 +1153,10 @@ def get_value(mdict): na = self.params.get('outtmpl_na_placeholder', 'NA') def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')): - return sanitize_filename(str(value), restricted=restricted, - is_id=re.search(r'(^|[_.])id(\.|$)', key)) + return sanitize_filename(str(value), restricted=restricted, is_id=( + bool(re.search(r'(^|[_.])id(\.|$)', key)) + if 'filename-sanitization' in self.params['compat_opts'] + else NO_DEFAULT)) sanitizer = sanitize if callable(sanitize) else filename_sanitizer sanitize = bool(sanitize) @@ -1162,13 +1179,13 @@ def create_key(outer_mobj): value = get_value(mobj) replacement = mobj['replacement'] if value is None and mobj['alternate']: - mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:]) + mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:]) else: break fmt = outer_mobj.group('format') if fmt == 's' and value is not None and key in field_size_compat_map.keys(): - fmt = '0{:d}d'.format(field_size_compat_map[key]) + fmt = f'0{field_size_compat_map[key]:d}d' value = default if value is None else value if replacement is None else replacement @@ -1183,7 +1200,7 @@ def create_key(outer_mobj): value = map(str, variadic(value) if '#' in flags else [value]) value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt elif fmt[-1] == 'B': # bytes - value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8') + value = f'%{str_fmt}'.encode() % str(value).encode() value, fmt = value.decode('utf-8', 'ignore'), 's' elif fmt[-1] == 'U': # unicode normalized value, fmt = unicodedata.normalize( @@ -1224,18 +1241,21 @@ def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs): outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs) return self.escape_outtmpl(outtmpl) % info_dict - def _prepare_filename(self, info_dict, tmpl_type='default'): + def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None): + assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive' + if outtmpl is None: + outtmpl = self.outtmpl_dict.get(tmpl_type or 'default', self.outtmpl_dict['default']) try: - outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])) + outtmpl = self._outtmpl_expandpath(outtmpl) filename = self.evaluate_outtmpl(outtmpl, info_dict, True) if not filename: return None - if tmpl_type in ('default', 'temp'): + if tmpl_type in ('', 'temp'): final_ext, ext = self.params.get('final_ext'), info_dict.get('ext') if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'): filename = replace_extension(filename, ext, final_ext) - else: + elif tmpl_type: force_ext = OUTTMPL_TYPES[tmpl_type] if force_ext: filename = replace_extension(filename, force_ext, info_dict.get('ext')) @@ -1251,10 +1271,12 @@ def _prepare_filename(self, info_dict, tmpl_type='default'): self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')') return None - def prepare_filename(self, info_dict, dir_type='', warn=False): - """Generate the output filename.""" - - filename = self._prepare_filename(info_dict, dir_type or 'default') + def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False): + """Generate the output filename""" + if outtmpl: + assert not dir_type, 'outtmpl and dir_type are mutually exclusive' + dir_type = None + filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl) if not filename and dir_type not in ('', 'temp'): return '' @@ -1291,7 +1313,7 @@ def check_filter(): if date is not None: dateRange = self.params.get('daterange', DateRange()) if date not in dateRange: - return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange) + return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}' view_count = info_dict.get('view_count') if view_count is not None: min_views = self.params.get('min_views') @@ -1310,7 +1332,16 @@ def check_filter(): except TypeError: # For backward compatibility ret = None if incomplete else match_filter(info_dict) - if ret is not None: + if ret is NO_DEFAULT: + while True: + filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME) + reply = input(self._format_screen( + f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip() + if reply in {'y', ''}: + return None + elif reply == 'n': + return f'Skipping {video_title}' + elif ret is not None: return ret return None @@ -1379,7 +1410,7 @@ def extract_info(self, url, download=True, ie_key=None, extra_info=None, else: self.report_error('no suitable InfoExtractor for URL %s' % url) - def __handle_extraction_exceptions(func): + def _handle_extraction_exceptions(func): @functools.wraps(func) def wrapper(self, *args, **kwargs): while True: @@ -1452,7 +1483,7 @@ def progress(msg): self.to_screen('') raise - @__handle_extraction_exceptions + @_handle_extraction_exceptions def __extract_info(self, url, ie, download, extra_info, process): ie_result = ie.extract(url) if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here) @@ -1518,6 +1549,7 @@ def process_ie_result(self, ie_result, download=True, extra_info=None): self.add_extra_info(info_copy, extra_info) info_copy, _ = self.pre_process(info_copy) self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True) + self._raise_pending_errors(info_copy) if self.params.get('force_write_download_archive', False): self.record_download_archive(info_copy) return ie_result @@ -1525,6 +1557,7 @@ def process_ie_result(self, ie_result, download=True, extra_info=None): if result_type == 'video': self.add_extra_info(ie_result, extra_info) ie_result = self.process_video_result(ie_result, download=download) + self._raise_pending_errors(ie_result) additional_urls = (ie_result or {}).get('additional_urls') if additional_urls: # TODO: Improve MetadataParserPP to allow setting a list @@ -1559,13 +1592,9 @@ def process_ie_result(self, ie_result, download=True, extra_info=None): if not info: return info - force_properties = dict( - (k, v) for k, v in ie_result.items() if v is not None) - for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'): - if f in force_properties: - del force_properties[f] new_result = info.copy() - new_result.update(force_properties) + new_result.update(filter_dict(ie_result, lambda k, v: ( + v is not None and k not in {'_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'}))) # Extracted info may not be a video result (i.e. # info.get('_type', 'video') != video) but rather an url or @@ -1637,104 +1666,14 @@ def _playlist_infodict(ie_result, **kwargs): } def __process_playlist(self, ie_result, download): - # We process each entry in the playlist - playlist = ie_result.get('title') or ie_result.get('id') - self.to_screen('[download] Downloading playlist: %s' % playlist) - - if 'entries' not in ie_result: - raise EntryNotInPlaylist('There are no entries') - - MissingEntry = object() - incomplete_entries = bool(ie_result.get('requested_entries')) - if incomplete_entries: - def fill_missing_entries(entries, indices): - ret = [MissingEntry] * max(indices) - for i, entry in zip(indices, entries): - ret[i - 1] = entry - return ret - ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries']) - - playlist_results = [] - - playliststart = self.params.get('playliststart', 1) - playlistend = self.params.get('playlistend') - # For backwards compatibility, interpret -1 as whole list - if playlistend == -1: - playlistend = None - - playlistitems_str = self.params.get('playlist_items') - playlistitems = None - if playlistitems_str is not None: - def iter_playlistitems(format): - for string_segment in format.split(','): - if '-' in string_segment: - start, end = string_segment.split('-') - for item in range(int(start), int(end) + 1): - yield int(item) - else: - yield int(string_segment) - playlistitems = orderedSet(iter_playlistitems(playlistitems_str)) + """Process each entry in the playlist""" + title = ie_result.get('title') or ie_result.get('id') or '' + self.to_screen(f'[download] Downloading playlist: {title}') - ie_entries = ie_result['entries'] - if isinstance(ie_entries, list): - playlist_count = len(ie_entries) - msg = f'Collected {playlist_count} videos; downloading %d of them' - ie_result['playlist_count'] = ie_result.get('playlist_count') or playlist_count - - def get_entry(i): - return ie_entries[i - 1] - else: - msg = 'Downloading %d videos' - if not isinstance(ie_entries, (PagedList, LazyList)): - ie_entries = LazyList(ie_entries) - elif isinstance(ie_entries, InAdvancePagedList): - if ie_entries._pagesize == 1: - playlist_count = ie_entries._pagecount - - def get_entry(i): - return YoutubeDL.__handle_extraction_exceptions( - lambda self, i: ie_entries[i - 1] - )(self, i) - - entries, broken = [], False - items = playlistitems if playlistitems is not None else itertools.count(playliststart) - for i in items: - if i == 0: - continue - if playlistitems is None and playlistend is not None and playlistend < i: - break - entry = None - try: - entry = get_entry(i) - if entry is MissingEntry: - raise EntryNotInPlaylist() - except (IndexError, EntryNotInPlaylist): - if incomplete_entries: - raise EntryNotInPlaylist(f'Entry {i} cannot be found') - elif not playlistitems: - break - entries.append(entry) - try: - if entry is not None: - self._match_entry(entry, incomplete=True, silent=True) - except (ExistingVideoReached, RejectedVideoReached): - broken = True - break - ie_result['entries'] = entries - - # Save playlist_index before re-ordering - entries = [ - ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry) - for i, entry in enumerate(entries, 1) - if entry is not None] - n_entries = len(entries) - - if not (ie_result.get('playlist_count') or broken or playlistitems or playlistend): - ie_result['playlist_count'] = n_entries - - if not playlistitems and (playliststart != 1 or playlistend): - playlistitems = list(range(playliststart, playliststart + n_entries)) - ie_result['requested_entries'] = playlistitems + all_entries = PlaylistEntries(self, ie_result) + entries = orderedSet(all_entries.get_requested_items()) + ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*entries)) or ([], []) + n_entries, ie_result['playlist_count'] = len(entries), all_entries.full_count _infojson_written = False write_playlist_files = self.params.get('allow_playlist_files', True) @@ -1757,27 +1696,29 @@ def get_entry(i): if self.params.get('playlistrandom', False): random.shuffle(entries) - x_forwarded_for = ie_result.get('__x_forwarded_for_ip') + self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} videos' + f'{format_field(ie_result, "playlist_count", " of %s")}') - self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries)) failures = 0 max_failures = self.params.get('skip_playlist_after_errors') or float('inf') - for i, entry_tuple in enumerate(entries, 1): - playlist_index, entry = entry_tuple + for i, (playlist_index, entry) in enumerate(entries, 1): + # TODO: Add auto-generated fields + if self._match_entry(entry, incomplete=True) is not None: + continue + if 'playlist-index' in self.params.get('compat_opts', []): - playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1 - self.to_screen('[download] Downloading video %s of %s' % (i, n_entries)) - # This __x_forwarded_for_ip thing is a bit ugly but requires - # minimal changes - if x_forwarded_for: - entry['__x_forwarded_for_ip'] = x_forwarded_for - extra = { + playlist_index = ie_result['requested_entries'][i - 1] + self.to_screen('[download] Downloading video %s of %s' % ( + self._format_screen(i, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS))) + + entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip') + entry_result = self.__process_iterable_entry(entry, download, { 'n_entries': n_entries, - '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries), + '__last_playlist_index': max(ie_result['requested_entries']), 'playlist_count': ie_result.get('playlist_count'), 'playlist_index': playlist_index, 'playlist_autonumber': i, - 'playlist': playlist, + 'playlist': title, 'playlist_id': ie_result.get('id'), 'playlist_title': ie_result.get('title'), 'playlist_uploader': ie_result.get('uploader'), @@ -1787,32 +1728,29 @@ def get_entry(i): 'webpage_url_basename': url_basename(ie_result['webpage_url']), 'webpage_url_domain': get_domain(ie_result['webpage_url']), 'extractor_key': ie_result['extractor_key'], - } - - if self._match_entry(entry, incomplete=True) is not None: - continue - - entry_result = self.__process_iterable_entry(entry, download, extra) + }) if not entry_result: failures += 1 if failures >= max_failures: self.report_error( - 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures)) + f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction') break - playlist_results.append(entry_result) - ie_result['entries'] = playlist_results + entries[i - 1] = (playlist_index, entry_result) + + # Update with processed data + ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*entries)) or ([], []) # Write the updated info to json - if _infojson_written and self._write_info_json( + if _infojson_written is True and self._write_info_json( 'updated playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None: return ie_result = self.run_all_pps('playlist', ie_result) - self.to_screen(f'[download] Finished downloading playlist: {playlist}') + self.to_screen(f'[download] Finished downloading playlist: {title}') return ie_result - @__handle_extraction_exceptions + @_handle_extraction_exceptions def __process_iterable_entry(self, entry, download, extra_info): return self.process_ie_result( entry, download=download, extra_info=extra_info) @@ -1894,7 +1832,7 @@ def _check_formats(self, formats): temp_file.close() try: success, _ = self.dl(temp_file.name, f, test=True) - except (DownloadError, IOError, OSError, ValueError) + network_exceptions: + except (DownloadError, OSError, ValueError) + network_exceptions: success = False finally: if os.path.exists(temp_file.name): @@ -1918,12 +1856,12 @@ def can_merge(): and download and ( not can_merge() - or info_dict.get('is_live', False) + or info_dict.get('is_live') and not self.params.get('live_from_start') or self.outtmpl_dict['default'] == '-')) compat = ( prefer_best or self.params.get('allow_multiple_audio_streams', False) - or 'format-spec' in self.params.get('compat_opts', [])) + or 'format-spec' in self.params['compat_opts']) return ( 'best/bestvideo+bestaudio' if prefer_best @@ -1934,7 +1872,7 @@ def build_format_selector(self, format_spec): def syntax_error(note, start): message = ( 'Invalid format specification: ' - '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1])) + '{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1])) return SyntaxError(message) PICKFIRST = 'PICKFIRST' @@ -2038,7 +1976,7 @@ def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, ins raise syntax_error('Expected a selector', start) current_selector = FormatSelector(MERGE, (selector_1, selector_2), []) else: - raise syntax_error('Operator not recognized: "{0}"'.format(string), start) + raise syntax_error(f'Operator not recognized: "{string}"', start) elif type == tokenize.ENDMARKER: break if current_selector: @@ -2162,7 +2100,8 @@ def selector_function(ctx): yield from _check_formats(ctx['formats'][::-1]) elif format_spec == 'mergeall': def selector_function(ctx): - formats = list(_check_formats(ctx['formats'])) + formats = list(_check_formats( + f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none')) if not formats: return merged_format = formats[-1] @@ -2171,7 +2110,7 @@ def selector_function(ctx): yield merged_format else: - format_fallback, format_reverse, format_idx = False, True, 1 + format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1 mobj = re.match( r'(?Pbest|worst|b|w)(?Pvideo|audio|v|a)?(?P\*)?(?:\.(?P[1-9]\d*))?$', format_spec) @@ -2198,6 +2137,7 @@ def selector_function(ctx): filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' elif format_spec in self._format_selection_exts['video']: filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none' + seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none' elif format_spec in self._format_selection_exts['storyboards']: filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none' else: @@ -2206,15 +2146,19 @@ def selector_function(ctx): def selector_function(ctx): formats = list(ctx['formats']) matches = list(filter(filter_f, formats)) if filter_f is not None else formats - if format_fallback and ctx['incomplete_formats'] and not matches: - # for extractors with incomplete formats (audio only (soundcloud) - # or video only (imgur)) best/worst will fallback to - # best/worst {video,audio}-only format - matches = formats + if not matches: + if format_fallback and ctx['incomplete_formats']: + # for extractors with incomplete formats (audio only (soundcloud) + # or video only (imgur)) best/worst will fallback to + # best/worst {video,audio}-only format + matches = formats + elif seperate_fallback and not ctx['has_merged_format']: + # for compatibility with youtube-dl when there is no pre-merged format + matches = list(filter(seperate_fallback, formats)) matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1])) try: yield matches[format_idx - 1] - except IndexError: + except LazyList.IndexError: return filters = [self._build_format_filter(f) for f in selector.filters] @@ -2226,13 +2170,13 @@ def final_selector(ctx): return selector_function(ctx_copy) return final_selector - stream = io.BytesIO(format_spec.encode('utf-8')) + stream = io.BytesIO(format_spec.encode()) try: - tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline))) + tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline))) except tokenize.TokenError: raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec))) - class TokenIterator(object): + class TokenIterator: def __init__(self, tokens): self.tokens = tokens self.counter = 0 @@ -2258,7 +2202,7 @@ def restore_last_token(self): def _calc_headers(self, info_dict): res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {}) - cookies = self._calc_cookies(info_dict) + cookies = self._calc_cookies(info_dict['url']) if cookies: res['Cookie'] = cookies @@ -2269,8 +2213,8 @@ def _calc_headers(self, info_dict): return res - def _calc_cookies(self, info_dict): - pr = sanitized_Request(info_dict['url']) + def _calc_cookies(self, url): + pr = sanitized_Request(url) self.cookiejar.add_cookie_header(pr) return pr.get_header('Cookie') @@ -2318,13 +2262,17 @@ def _fill_common_fields(self, info_dict, is_video=True): # TODO: move sanitization here if is_video: # playlists are allowed to lack "title" - info_dict['fulltitle'] = info_dict.get('title') - if 'title' not in info_dict: + title = info_dict.get('title', NO_DEFAULT) + if title is NO_DEFAULT: raise ExtractorError('Missing "title" field in extractor result', video_id=info_dict['id'], ie=info_dict['extractor']) - elif not info_dict.get('title'): - self.report_warning('Extractor failed to obtain "title". Creating a generic title instead') - info_dict['title'] = f'{info_dict["extractor"]} video #{info_dict["id"]}' + info_dict['fulltitle'] = title + if not title: + if title == '': + self.write_debug('Extractor gave empty title. Creating a generic title') + else: + self.report_warning('Extractor failed to obtain "title". Creating a generic title instead') + info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}' if info_dict.get('duration') is not None: info_dict['duration_string'] = formatSeconds(info_dict['duration']) @@ -2337,11 +2285,9 @@ def _fill_common_fields(self, info_dict, is_video=True): if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None: # Working around out-of-range timestamp values (e.g. negative ones on Windows, # see http://bugs.python.org/issue1646728) - try: + with contextlib.suppress(ValueError, OverflowError, OSError): upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key]) info_dict[date_key] = upload_date.strftime('%Y%m%d') - except (ValueError, OverflowError, OSError): - pass live_keys = ('is_live', 'was_live') live_status = info_dict.get('live_status') @@ -2366,6 +2312,11 @@ def _fill_common_fields(self, info_dict, is_video=True): if info_dict.get('%s_number' % field) is not None and not info_dict.get(field): info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field]) + def _raise_pending_errors(self, info): + err = info.pop('__pending_error', None) + if err: + self.report_error(err, tb=False) + def process_video_result(self, info_dict, download=True): assert info_dict.get('_type', 'video') == 'video' self._num_videos += 1 @@ -2390,7 +2341,7 @@ def sanitize_string_field(info, string_field): def sanitize_numeric_fields(info): for numeric_field in self._NUMERIC_FIELDS: field = info.get(numeric_field) - if field is None or isinstance(field, compat_numeric_types): + if field is None or isinstance(field, (int, float)): continue report_force_conversion(numeric_field, 'numeric', 'int') info[numeric_field] = int_or_none(field) @@ -2441,9 +2392,15 @@ def sanitize_numeric_fields(info): else: formats = info_dict['formats'] - info_dict['__has_drm'] = any(f.get('has_drm') for f in formats) + # or None ensures --clean-infojson removes it + info_dict['_has_drm'] = any(f.get('has_drm') for f in formats) or None if not self.params.get('allow_unplayable_formats'): formats = [f for f in formats if not f.get('has_drm')] + if info_dict['_has_drm'] and all( + f.get('acodec') == f.get('vcodec') == 'none' for f in formats): + self.report_warning( + 'This video is DRM protected and only images are available for download. ' + 'Use --list-formats to see them') get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start')) if not get_from_start: @@ -2451,8 +2408,9 @@ def sanitize_numeric_fields(info): if info_dict.get('is_live') and formats: formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start] if get_from_start and not formats: - self.raise_no_formats(info_dict, msg='--live-from-start is passed, but there are no formats that can be downloaded from the start. ' - 'If you want to download from the current time, pass --no-live-from-start') + self.raise_no_formats(info_dict, msg=( + '--live-from-start is passed, but there are no formats that can be downloaded from the start. ' + 'If you want to download from the current time, use --no-live-from-start')) if not formats: self.raise_no_formats(info_dict) @@ -2517,7 +2475,7 @@ def is_wellformed(f): format['dynamic_range'] = 'SDR' if (info_dict.get('duration') and format.get('tbr') and not format.get('filesize') and not format.get('filesize_approx')): - format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8) + format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8)) # Add HTTP headers, so that external programs can use them from the # json output @@ -2540,7 +2498,7 @@ def is_wellformed(f): info_dict, _ = self.pre_process(info_dict) - if self._match_entry(info_dict) is not None: + if self._match_entry(info_dict, incomplete=self._format_fields) is not None: return info_dict self.post_extract(info_dict) @@ -2564,7 +2522,7 @@ def is_wellformed(f): if list_only: # Without this printing, -F --print-json will not work self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True) - return + return info_dict format_selector = self.format_selector if format_selector is None: @@ -2582,33 +2540,15 @@ def is_wellformed(f): self.report_error(err, tb=False, is_error=False) continue - # While in format selection we may need to have an access to the original - # format set in order to calculate some metrics or do some processing. - # For now we need to be able to guess whether original formats provided - # by extractor are incomplete or not (i.e. whether extractor provides only - # video-only or audio-only formats) for proper formats selection for - # extractors with such incomplete formats (see - # https://github.com/ytdl-org/youtube-dl/pull/5556). - # Since formats may be filtered during format selection and may not match - # the original formats the results may be incorrect. Thus original formats - # or pre-calculated metrics should be passed to format selection routines - # as well. - # We will pass a context object containing all necessary additional data - # instead of just formats. - # This fixes incorrect format selection issue (see - # https://github.com/ytdl-org/youtube-dl/issues/10083). - incomplete_formats = ( - # All formats are video-only or - all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats) - # all formats are audio-only - or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)) - - ctx = { + formats_to_download = list(format_selector({ 'formats': formats, - 'incomplete_formats': incomplete_formats, - } - - formats_to_download = list(format_selector(ctx)) + 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats), + 'incomplete_formats': ( + # All formats are video-only or + all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats) + # all formats are audio-only + or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)), + })) if interactive_format_selection and not formats_to_download: self.report_error('Requested format is not available', tb=False, is_error=False) continue @@ -2616,26 +2556,46 @@ def is_wellformed(f): if not formats_to_download: if not self.params.get('ignore_no_formats_error'): - raise ExtractorError('Requested format is not available', expected=True, - video_id=info_dict['id'], ie=info_dict['extractor']) + raise ExtractorError( + 'Requested format is not available. Use --list-formats for a list of available formats', + expected=True, video_id=info_dict['id'], ie=info_dict['extractor']) self.report_warning('Requested format is not available') # Process what we can, even without any available formats. formats_to_download = [{}] - best_format = formats_to_download[-1] + requested_ranges = self.params.get('download_ranges') + if requested_ranges: + requested_ranges = tuple(requested_ranges(info_dict, self)) + + best_format, downloaded_formats = formats_to_download[-1], [] if download: if best_format: - self.to_screen( - f'[info] {info_dict["id"]}: Downloading {len(formats_to_download)} format(s): ' - + ', '.join([f['format_id'] for f in formats_to_download])) + def to_screen(*msg): + self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}') + + to_screen(f'Downloading {len(formats_to_download)} format(s):', + (f['format_id'] for f in formats_to_download)) + if requested_ranges: + to_screen(f'Downloading {len(requested_ranges)} time ranges:', + (f'{int(c["start_time"])}-{int(c["end_time"])}' for c in requested_ranges)) max_downloads_reached = False - for i, fmt in enumerate(formats_to_download): - formats_to_download[i] = new_info = self._copy_infodict(info_dict) + + for fmt, chapter in itertools.product(formats_to_download, requested_ranges or [{}]): + new_info = self._copy_infodict(info_dict) new_info.update(fmt) + if chapter: + new_info.update({ + 'section_start': chapter.get('start_time'), + 'section_end': chapter.get('end_time', 0), + 'section_title': chapter.get('title'), + 'section_number': chapter.get('index'), + }) + downloaded_formats.append(new_info) try: self.process_info(new_info) except MaxDownloadsReached: max_downloads_reached = True + self._raise_pending_errors(new_info) # Remove copied info for key, val in tuple(new_info.items()): if info_dict.get(key) == val: @@ -2643,12 +2603,12 @@ def is_wellformed(f): if max_downloads_reached: break - write_archive = set(f.get('__write_download_archive', False) for f in formats_to_download) + write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats} assert write_archive.issubset({True, False, 'ignore'}) if True in write_archive and False not in write_archive: self.record_download_archive(info_dict) - info_dict['requested_downloads'] = formats_to_download + info_dict['requested_downloads'] = downloaded_formats info_dict = self.run_all_pps('after_video', info_dict) if max_downloads_reached: raise MaxDownloadsReached() @@ -2659,9 +2619,10 @@ def is_wellformed(f): def process_subtitles(self, video_id, normal_subtitles, automatic_captions): """Select the requested subtitles and their format""" - available_subs = {} + available_subs, normal_sub_langs = {}, [] if normal_subtitles and self.params.get('writesubtitles'): available_subs.update(normal_subtitles) + normal_sub_langs = tuple(normal_subtitles.keys()) if automatic_captions and self.params.get('writeautomaticsub'): for lang, cap_info in automatic_captions.items(): if lang not in available_subs: @@ -2672,7 +2633,7 @@ def process_subtitles(self, video_id, normal_subtitles, automatic_captions): available_subs): return None - all_sub_langs = available_subs.keys() + all_sub_langs = tuple(available_subs.keys()) if self.params.get('allsubtitles', False): requested_langs = all_sub_langs elif self.params.get('subtitleslangs', False): @@ -2697,10 +2658,10 @@ def process_subtitles(self, video_id, normal_subtitles, automatic_captions): else: requested_langs.extend(current_langs) requested_langs = orderedSet(requested_langs) - elif 'en' in available_subs: - requested_langs = ['en'] + elif normal_sub_langs: + requested_langs = ['en'] if 'en' in normal_sub_langs else normal_sub_langs[:1] else: - requested_langs = [list(all_sub_langs)[0]] + requested_langs = ['en'] if 'en' in all_sub_langs else all_sub_langs[:1] if requested_langs: self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs)) @@ -2710,7 +2671,7 @@ def process_subtitles(self, video_id, normal_subtitles, automatic_captions): for lang in requested_langs: formats = available_subs.get(lang) if formats is None: - self.report_warning('%s subtitles not available for %s' % (lang, video_id)) + self.report_warning(f'{lang} subtitles not available for {video_id}') continue for ext in formats_preference: if ext == 'best': @@ -2749,11 +2710,11 @@ def format_tmpl(tmpl): self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy)) for tmpl, file_tmpl in self.params['print_to_file'].get(key, []): - filename = self.evaluate_outtmpl(file_tmpl, info_dict) + filename = self.prepare_filename(info_dict, outtmpl=file_tmpl) tmpl = format_tmpl(tmpl) self.to_screen(f'[info] Writing {tmpl!r} to: {filename}') if self._ensure_dir_exists(filename): - with io.open(filename, 'a', encoding='utf-8') as f: + with open(filename, 'a', encoding='utf-8') as f: f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n') def __forced_printings(self, info_dict, filename, incomplete): @@ -2823,7 +2784,7 @@ def dl(self, name, info, subtitle=False, test=False): urls = '", "'.join( (f['url'].split(',')[0] + ',' if f['url'].startswith('data:') else f['url']) for f in info.get('requested_formats', []) or [info]) - self.write_debug('Invoking downloader on "%s"' % urls) + self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"') # Note: Ideally info should be a deep-copied so that hooks cannot modify it. # But it may contain objects that are not deep-copyable @@ -2869,8 +2830,13 @@ def process_info(self, info_dict): # Forced printings self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict)) + def check_max_downloads(): + if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'): + raise MaxDownloadsReached() + if self.params.get('simulate'): info_dict['__write_download_archive'] = self.params.get('force_write_download_archive') + check_max_downloads() return if full_filename is None: @@ -2918,11 +2884,11 @@ def process_info(self, info_dict): else: try: self.to_screen('[info] Writing video annotations to: ' + annofn) - with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile: + with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile: annofile.write(info_dict['annotations']) except (KeyError, TypeError): self.report_warning('There are no annotations to write.') - except (OSError, IOError): + except OSError: self.report_error('Cannot write annotations file: ' + annofn) return @@ -2941,13 +2907,13 @@ def _write_link_file(link_type): return True try: self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}') - with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', - newline='\r\n' if link_type == 'url' else '\n') as linkfile: + with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', + newline='\r\n' if link_type == 'url' else '\n') as linkfile: template_vars = {'url': url} if link_type == 'desktop': template_vars['filename'] = linkfn[:-(len(link_type) + 1)] linkfile.write(LINK_TEMPLATES[link_type] % template_vars) - except (OSError, IOError): + except OSError: self.report_error(f'Cannot write internet shortcut {linkfn}') return False return True @@ -2974,12 +2940,8 @@ def replace_info_dict(new_info): info_dict.clear() info_dict.update(new_info) - try: - new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move) - replace_info_dict(new_info) - except PostProcessingError as err: - self.report_error('Preprocessing: %s' % str(err)) - return + new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move) + replace_info_dict(new_info) if self.params.get('skip_download'): info_dict['filepath'] = temp_filename @@ -3002,6 +2964,16 @@ def existing_video_file(*filepaths): return file success = True + merger, fd = FFmpegMergerPP(self), None + if info_dict.get('protocol') or info_dict.get('url'): + fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-') + if fd is not FFmpegFD and ( + info_dict.get('section_start') or info_dict.get('section_end')): + msg = ('This format cannot be partially downloaded' if merger.available + else 'You have requested downloading the video partially, but ffmpeg is not installed') + self.report_error(f'{msg}. Aborting') + return + if info_dict.get('requested_formats') is not None: def compatible_formats(formats): @@ -3012,10 +2984,10 @@ def compatible_formats(formats): return False # Check extension - exts = set(format.get('ext') for format in formats) + exts = {format.get('ext') for format in formats} COMPATIBLE_EXTS = ( - set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')), - set(('webm',)), + {'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'}, + {'webm'}, ) for ext_sets in COMPATIBLE_EXTS: if ext_sets.issuperset(exts): @@ -3034,7 +3006,7 @@ def compatible_formats(formats): and info_dict.get('thumbnails') # check with type instead of pp_key, __name__, or isinstance # since we dont want any custom PPs to trigger this - and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])): + and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])): # noqa: E721 info_dict['ext'] = 'mkv' self.report_warning( 'webm doesn\'t support embedding a thumbnail, mkv will be used') @@ -3048,7 +3020,7 @@ def correct_ext(filename, ext=new_ext): os.path.splitext(filename)[0] if filename_real_ext in (old_ext, new_ext) else filename) - return '%s.%s' % (filename_wo_ext, ext) + return f'{filename_wo_ext}.{ext}' # Ensure filename always has a correct extension for successful merge full_filename = correct_ext(full_filename) @@ -3057,9 +3029,6 @@ def correct_ext(filename, ext=new_ext): info_dict['__real_download'] = False downloaded = [] - merger = FFmpegMergerPP(self) - - fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-') if dl_filename is not None: self.report_file_already_downloaded(dl_filename) elif fd: @@ -3133,12 +3102,13 @@ def correct_ext(filename, ext=new_ext): except network_exceptions as err: self.report_error('unable to download video data: %s' % error_to_compat_str(err)) return - except (OSError, IOError) as err: + except OSError as err: raise UnavailableVideoError(err) except (ContentTooShortError, ) as err: - self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded)) + self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})') return + self._raise_pending_errors(info_dict) if success and full_filename != '-': def fixup(): @@ -3149,16 +3119,16 @@ def fixup(): if fixup_policy in ('ignore', 'never'): return elif fixup_policy == 'warn': - do_fixup = False + do_fixup = 'warn' elif fixup_policy != 'force': assert fixup_policy in ('detect_or_warn', None) if not info_dict.get('__real_download'): do_fixup = False def ffmpeg_fixup(cndn, msg, cls): - if not cndn: + if not (do_fixup and cndn): return - if not do_fixup: + elif do_fixup == 'warn': self.report_warning(f'{vid}: {msg}') return pp = cls(self) @@ -3181,17 +3151,18 @@ def ffmpeg_fixup(cndn, msg, cls): FFmpegFixupM4aPP) downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None - downloader = downloader.__name__ if downloader else None + downloader = downloader.FD_NAME if downloader else None if info_dict.get('requested_formats') is None: # Not necessary if doing merger - ffmpeg_fixup(downloader == 'HlsFD', + ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts') + or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None, 'Possible MPEG-TS in MP4 container or malformed AAC timestamps', FFmpegFixupM3u8PP) ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD', 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP) - ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP) - ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP) + ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP) + ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP) fixup() try: @@ -3207,15 +3178,10 @@ def ffmpeg_fixup(cndn, msg, cls): return info_dict['__write_download_archive'] = True + assert info_dict is original_infodict # Make sure the info_dict was modified in-place if self.params.get('force_write_download_archive'): info_dict['__write_download_archive'] = True - - # Make sure the info_dict was modified in-place - assert info_dict is original_infodict - - max_downloads = self.params.get('max_downloads') - if max_downloads is not None and self._num_downloads >= int(max_downloads): - raise MaxDownloadsReached() + check_max_downloads() def __download_wrapper(self, func): @functools.wraps(func) @@ -3224,9 +3190,6 @@ def wrapper(*args, **kwargs): res = func(*args, **kwargs) except UnavailableVideoError as e: self.report_error(e) - except MaxDownloadsReached as e: - self.to_screen(f'[info] {e}') - raise except DownloadCancelled as e: self.to_screen(f'[info] {e}') if not self.params.get('break_per_url'): @@ -3281,9 +3244,9 @@ def sanitize_info(info_dict, remove_private_keys=False): info_dict.setdefault('_type', 'video') if remove_private_keys: - reject = lambda k, v: v is None or (k.startswith('_') and k != '_type') or k in { + reject = lambda k, v: v is None or k.startswith('__') or k in { 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries', - 'entries', 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber', + 'entries', 'filepath', '_filename', 'infojson_filename', 'original_url', 'playlist_autonumber', } else: reject = lambda k, v: False @@ -3305,6 +3268,17 @@ def filter_requested_info(info_dict, actually_filter=True): ''' Alias of sanitize_info for backward compatibility ''' return YoutubeDL.sanitize_info(info_dict, actually_filter) + def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None): + for filename in set(filter(None, files_to_delete)): + if msg: + self.to_screen(msg % filename) + try: + os.remove(filename) + except OSError: + self.report_warning(f'Unable to delete file {filename}') + if filename in info.get('__files_to_move', []): # NB: Delete even if None + del info['__files_to_move'][filename] + @staticmethod def post_extract(info_dict): def actual_post_extract(info_dict): @@ -3337,14 +3311,8 @@ def run_pp(self, pp, infodict): for f in files_to_delete: infodict['__files_to_move'].setdefault(f, '') else: - for old_filename in set(files_to_delete): - self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename) - try: - os.remove(encodeFilename(old_filename)) - except (IOError, OSError): - self.report_warning('Unable to remove downloaded original file') - if old_filename in infodict['__files_to_move']: - del infodict['__files_to_move'][old_filename] + self._delete_downloaded_files( + *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)') return infodict def run_all_pps(self, key, info, *, additional_pps=None): @@ -3356,7 +3324,12 @@ def run_all_pps(self, key, info, *, additional_pps=None): def pre_process(self, ie_info, key='pre_process', files_to_move=None): info = dict(ie_info) info['__files_to_move'] = files_to_move or {} - info = self.run_all_pps(key, info) + try: + info = self.run_all_pps(key, info) + except PostProcessingError as err: + msg = f'Preprocessing: {err}' + info.setdefault('__pending_error', msg) + self.report_error(msg, is_error=False) return info, info.pop('__files_to_move', None) def post_process(self, filename, info, files_to_move=None): @@ -3386,7 +3359,7 @@ def _make_archive_id(self, info_dict): break else: return - return '%s %s' % (extractor.lower(), video_id) + return f'{extractor.lower()} {video_id}' def in_download_archive(self, info_dict): fn = self.params.get('download_archive') @@ -3426,7 +3399,7 @@ def format_resolution(format, default='unknown'): def _list_format_headers(self, *headers): if self.params.get('listformats_table', True) is not False: - return [self._format_screen(header, self.Styles.HEADERS) for header in headers] + return [self._format_out(header, self.Styles.HEADERS) for header in headers] return headers def _format_note(self, fdict): @@ -3504,10 +3477,10 @@ def render_formats_table(self, info_dict): ] for f in formats if f.get('preference') is None or f['preference'] >= -1000] return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1) - delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True) + delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True) table = [ [ - self._format_screen(format_field(f, 'format_id'), self.Styles.ID), + self._format_out(format_field(f, 'format_id'), self.Styles.ID), format_field(f, 'ext'), format_field(f, func=self.format_resolution, ignore=('audio only', 'images')), format_field(f, 'fps', '\t%d'), @@ -3519,15 +3492,15 @@ def render_formats_table(self, info_dict): delim, format_field(f, 'vcodec', default='unknown').replace( 'none', 'images' if f.get('acodec') == 'none' - else self._format_screen('audio only', self.Styles.SUPPRESS)), + else self._format_out('audio only', self.Styles.SUPPRESS)), format_field(f, 'vbr', '\t%dk'), format_field(f, 'acodec', default='unknown').replace( 'none', '' if f.get('vcodec') == 'none' - else self._format_screen('video only', self.Styles.SUPPRESS)), + else self._format_out('video only', self.Styles.SUPPRESS)), format_field(f, 'abr', '\t%dk'), format_field(f, 'asr', '\t%dHz'), join_nonempty( - self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None, + self._format_out('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None, format_field(f, 'language', '[%s]'), join_nonempty(format_field(f, 'format_note'), format_field(f, 'container', ignore=(None, f.get('ext'))), @@ -3540,7 +3513,7 @@ def render_formats_table(self, info_dict): return render_table( header_line, table, hide_empty=True, - delim=self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True)) + delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True)) def render_thumbnails_table(self, info_dict): thumbnails = list(info_dict.get('thumbnails') or []) @@ -3583,7 +3556,7 @@ def list_subtitles(self, video_id, subtitles, name='subtitles'): def urlopen(self, req): """ Start an HTTP download """ - if isinstance(req, compat_basestring): + if isinstance(req, str): req = sanitized_Request(req) return self._opener.open(req, timeout=self._socket_timeout) @@ -3591,18 +3564,25 @@ def print_debug_header(self): if not self.params.get('verbose'): return + # These imports can be slow. So import them only as needed + from .extractor.extractors import _LAZY_LOADER + from .extractor.extractors import _PLUGIN_CLASSES as plugin_extractors + def get_encoding(stream): ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)) if not supports_terminal_sequences(stream): - from .compat import WINDOWS_VT_MODE + from .utils import WINDOWS_VT_MODE # Must be imported locally ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)' return ret - encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % ( + encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % ( locale.getpreferredencoding(), sys.getfilesystemencoding(), - get_encoding(self._out_files['screen']), get_encoding(self._out_files['error']), - self.get_encoding()) + self.get_encoding(), + ', '.join( + f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_ + if stream is not None and key != 'console') + ) logger = self.params.get('logger') if logger: @@ -3627,24 +3607,20 @@ def get_encoding(stream): write_debug('Plugins: %s' % [ '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}') for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())]) - if self.params.get('compat_opts'): - write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts'))) + if self.params['compat_opts']: + write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts'])) if source == 'source': try: - sp = Popen( + stdout, _, _ = Popen.run( ['git', 'rev-parse', '--short', 'HEAD'], - stdout=subprocess.PIPE, stderr=subprocess.PIPE, - cwd=os.path.dirname(os.path.abspath(__file__))) - out, err = sp.communicate_or_kill() - out = out.decode().strip() - if re.match('[0-9a-f]+', out): - write_debug('Git HEAD: %s' % out) + text=True, cwd=os.path.dirname(os.path.abspath(__file__)), + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + if re.fullmatch('[0-9a-f]+', stdout.strip()): + write_debug(f'Git HEAD: {stdout.strip()}') except Exception: - try: + with contextlib.suppress(Exception): sys.exc_clear() - except Exception: - pass def python_implementation(): impl_name = platform.python_implementation() @@ -3661,7 +3637,7 @@ def python_implementation(): exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self) ffmpeg_features = {key for key, val in ffmpeg_features.items() if val} if ffmpeg_features: - exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features) + exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features)) exe_versions['rtmpdump'] = rtmpdump_version() exe_versions['phantomjs'] = PhantomJSwrapper._version() @@ -3670,20 +3646,14 @@ def python_implementation(): ) or 'none' write_debug('exe versions: %s' % exe_str) - from .downloader.websocket import has_websockets - from .postprocessor.embedthumbnail import has_mutagen - from .cookies import SQLITE_AVAILABLE, SECRETSTORAGE_AVAILABLE + from .compat.compat_utils import get_package_info + from .dependencies import available_dependencies - lib_str = join_nonempty( - compat_brotli and compat_brotli.__name__, - compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0], - SECRETSTORAGE_AVAILABLE and 'secretstorage', - has_mutagen and 'mutagen', - SQLITE_AVAILABLE and 'sqlite', - has_websockets and 'websockets', - delim=', ') or 'none' - write_debug('Optional libraries: %s' % lib_str) + write_debug('Optional libraries: %s' % (', '.join(sorted({ + join_nonempty(*get_package_info(m)) for m in available_dependencies.values() + })) or 'none')) + self._setup_opener() proxy_map = {} for handler in self._opener.handlers: if hasattr(handler, 'proxies'): @@ -3692,10 +3662,10 @@ def python_implementation(): # Not implemented if False and self.params.get('call_home'): - ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8') + ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode() write_debug('Public IP address: %s' % ipaddr) latest_version = self.urlopen( - 'https://yt-dl.org/latest/version').read().decode('utf-8') + 'https://yt-dl.org/latest/version').read().decode() if version_tuple(latest_version) > version_tuple(__version__): self.report_warning( 'You are using an outdated version (newest version: %s)! ' @@ -3703,6 +3673,8 @@ def python_implementation(): latest_version) def _setup_opener(self): + if hasattr(self, '_opener'): + return timeout_val = self.params.get('socket_timeout') self._socket_timeout = 20 if timeout_val is None else float(timeout_val) @@ -3729,7 +3701,7 @@ def _setup_opener(self): https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel) ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel) redirect_handler = YoutubeDLRedirectHandler() - data_handler = compat_urllib_request_DataHandler() + data_handler = urllib.request.DataHandler() # When passing our own FileHandler instance, build_opener won't add the # default FileHandler and allows us to disable the file protocol, which @@ -3767,7 +3739,7 @@ def get_encoding(self): return encoding def _write_info_json(self, label, ie_result, infofn, overwrite=None): - ''' Write infojson and returns True = written, False = skip, None = error ''' + ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error ''' if overwrite is None: overwrite = self.params.get('overwrites', True) if not self.params.get('writeinfojson'): @@ -3779,14 +3751,15 @@ def _write_info_json(self, label, ie_result, infofn, overwrite=None): return None elif not overwrite and os.path.exists(infofn): self.to_screen(f'[info] {label.title()} metadata is already present') - else: - self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}') - try: - write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn) - except (OSError, IOError): - self.report_error(f'Cannot write {label} metadata to JSON file {infofn}') - return None - return True + return 'exists' + + self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}') + try: + write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn) + return True + except OSError: + self.report_error(f'Cannot write {label} metadata to JSON file {infofn}') + return None def _write_description(self, label, ie_result, descfn): ''' Write description and returns True = written, False = skip, None = error ''' @@ -3805,9 +3778,9 @@ def _write_description(self, label, ie_result, descfn): else: try: self.to_screen(f'[info] Writing {label} description to: {descfn}') - with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile: + with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile: descfile.write(ie_result['description']) - except (OSError, IOError): + except OSError: self.report_error(f'Cannot write {label} description file {descfn}') return None return True @@ -3841,12 +3814,12 @@ def _write_subtitles(self, info_dict, filename): try: # Use newline='' to prevent conversion of newline characters # See https://github.com/ytdl-org/youtube-dl/issues/10268 - with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile: + with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile: subfile.write(sub_info['data']) sub_info['filepath'] = sub_filename ret.append((sub_filename, sub_filename_final)) continue - except (OSError, IOError): + except OSError: self.report_error(f'Cannot write video subtitles file {sub_filename}') return None @@ -3857,9 +3830,12 @@ def _write_subtitles(self, info_dict, filename): sub_info['filepath'] = sub_filename ret.append((sub_filename, sub_filename_final)) except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err: + msg = f'Unable to download video subtitles for {sub_lang!r}: {err}' if self.params.get('ignoreerrors') is not True: # False or 'only_download' - raise DownloadError(f'Unable to download video subtitles for {sub_lang!r}: {err}', err) - self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}') + if not self.params.get('ignoreerrors'): + self.report_error(msg) + raise DownloadError(msg) + self.report_warning(msg) return ret def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):