#!/usr/bin/env python3
-# coding: utf-8
-
-from __future__ import absolute_import, unicode_literals
-
import collections
import contextlib
import datetime
import operator
import os
import platform
+import random
import re
import shutil
import subprocess
import time
import tokenize
import traceback
-import random
import unicodedata
-
-from enum import Enum
+import urllib.request
from string import ascii_letters
+from .cache import Cache
from .compat import (
- compat_basestring,
+ HAS_LEGACY as compat_has_legacy,
compat_get_terminal_size,
- compat_kwargs,
- compat_numeric_types,
compat_os_name,
- compat_pycrypto_AES,
compat_shlex_quote,
compat_str,
- compat_tokenize_tokenize,
compat_urllib_error,
compat_urllib_request,
- compat_urllib_request_DataHandler,
- windows_enable_vt_mode,
)
from .cookies import load_cookies
+from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
+from .downloader.rtmp import rtmpdump_version
+from .extractor import gen_extractor_classes, get_info_extractor
+from .extractor.openload import PhantomJSwrapper
+from .minicurses import format_text
+from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors
+from .postprocessor import (
+ EmbedThumbnailPP,
+ FFmpegFixupDuplicateMoovPP,
+ FFmpegFixupDurationPP,
+ FFmpegFixupM3u8PP,
+ FFmpegFixupM4aPP,
+ FFmpegFixupStretchedPP,
+ FFmpegFixupTimestampPP,
+ FFmpegMergerPP,
+ FFmpegPostProcessor,
+ MoveFilesAfterDownloadPP,
+ get_postprocessor,
+)
+from .update import detect_variant
from .utils import (
+ DEFAULT_OUTTMPL,
+ LINK_TEMPLATES,
+ NO_DEFAULT,
+ NUMBER_RE,
+ OUTTMPL_TYPES,
+ POSTPROCESS_WHEN,
+ STR_FORMAT_RE_TMPL,
+ STR_FORMAT_TYPES,
+ ContentTooShortError,
+ DateRange,
+ DownloadCancelled,
+ DownloadError,
+ EntryNotInPlaylist,
+ ExistingVideoReached,
+ ExtractorError,
+ GeoRestrictedError,
+ HEADRequest,
+ ISO3166Utils,
+ LazyList,
+ MaxDownloadsReached,
+ Namespace,
+ PagedList,
+ PerRequestProxyHandler,
+ PlaylistEntries,
+ Popen,
+ PostProcessingError,
+ ReExtractInfo,
+ RejectedVideoReached,
+ SameFileError,
+ UnavailableVideoError,
+ YoutubeDLCookieProcessor,
+ YoutubeDLHandler,
+ YoutubeDLRedirectHandler,
age_restricted,
args_to_str,
- ContentTooShortError,
date_from_str,
- DateRange,
- DEFAULT_OUTTMPL,
determine_ext,
determine_protocol,
- DownloadCancelled,
- DownloadError,
encode_compat_str,
encodeFilename,
- EntryNotInPlaylist,
error_to_compat_str,
- ExistingVideoReached,
expand_path,
- ExtractorError,
+ filter_dict,
float_or_none,
format_bytes,
+ format_decimal_suffix,
format_field,
formatSeconds,
- GeoRestrictedError,
get_domain,
- HEADRequest,
int_or_none,
iri_to_uri,
- ISO3166Utils,
join_nonempty,
- LazyList,
- LINK_TEMPLATES,
locked_file,
make_dir,
make_HTTPS_handler,
- MaxDownloadsReached,
+ merge_headers,
network_exceptions,
number_of_digits,
orderedSet,
- OUTTMPL_TYPES,
- PagedList,
parse_filesize,
- PerRequestProxyHandler,
platform_name,
- Popen,
- PostProcessingError,
preferredencoding,
prepend_extension,
- ReExtractInfo,
register_socks_protocols,
- RejectedVideoReached,
remove_terminal_sequences,
render_table,
replace_extension,
- SameFileError,
sanitize_filename,
sanitize_path,
sanitize_url,
sanitized_Request,
std_headers,
- STR_FORMAT_RE_TMPL,
- STR_FORMAT_TYPES,
str_or_none,
strftime_or_none,
subtitles_filename,
to_high_limit_path,
traverse_obj,
try_get,
- UnavailableVideoError,
url_basename,
variadic,
version_tuple,
+ windows_enable_vt_mode,
write_json_file,
write_string,
- YoutubeDLCookieProcessor,
- YoutubeDLHandler,
- YoutubeDLRedirectHandler,
)
-from .cache import Cache
-from .minicurses import format_text
-from .extractor import (
- gen_extractor_classes,
- get_info_extractor,
- _LAZY_LOADER,
- _PLUGIN_CLASSES as plugin_extractors
-)
-from .extractor.openload import PhantomJSwrapper
-from .downloader import (
- FFmpegFD,
- get_suitable_downloader,
- shorten_protocol_name
-)
-from .downloader.rtmp import rtmpdump_version
-from .postprocessor import (
- get_postprocessor,
- EmbedThumbnailPP,
- FFmpegFixupDuplicateMoovPP,
- FFmpegFixupDurationPP,
- FFmpegFixupM3u8PP,
- FFmpegFixupM4aPP,
- FFmpegFixupStretchedPP,
- FFmpegFixupTimestampPP,
- FFmpegMergerPP,
- FFmpegPostProcessor,
- MoveFilesAfterDownloadPP,
- _PLUGIN_CLASSES as plugin_postprocessors
-)
-from .update import detect_variant
-from .version import __version__, RELEASE_GIT_HEAD
+from .version import RELEASE_GIT_HEAD, __version__
if compat_os_name == 'nt':
import ctypes
-class YoutubeDL(object):
+class YoutubeDL:
"""YoutubeDL class.
YoutubeDL objects are the ones responsible of downloading the
verbose: Print additional info to stdout.
quiet: Do not print messages to stdout.
no_warnings: Do not print out anything for warnings.
- forceprint: A list of templates to force print
- forceurl: Force printing final URL. (Deprecated)
- forcetitle: Force printing title. (Deprecated)
- forceid: Force printing ID. (Deprecated)
- forcethumbnail: Force printing thumbnail URL. (Deprecated)
- forcedescription: Force printing description. (Deprecated)
- forcefilename: Force printing final filename. (Deprecated)
- forceduration: Force printing duration. (Deprecated)
+ forceprint: A dict with keys WHEN mapped to a list of templates to
+ print to stdout. The allowed keys are video or any of the
+ items in utils.POSTPROCESS_WHEN.
+ For compatibility, a single list is also accepted
+ print_to_file: A dict with keys WHEN (same as forceprint) mapped to
+ a list of tuples with (template, filename)
forcejson: Force printing info_dict as JSON.
dump_single_json: Force printing the info_dict of the whole playlist
(or video) as a single JSON line.
See "Sorting Formats" for more details.
format_sort_force: Force the given format_sort. see "Sorting Formats"
for more details.
+ prefer_free_formats: Whether to prefer video formats with free containers
+ over non-free ones of same quality.
allow_multiple_video_streams: Allow multiple video streams to be merged
into a single file
allow_multiple_audio_streams: Allow multiple audio streams to be merged
writedesktoplink: Write a Linux internet shortcut file (.desktop)
writesubtitles: Write the video subtitles to a file
writeautomaticsub: Write the automatically generated subtitles to a file
- allsubtitles: Deprecated - Use subtitleslangs = ['all']
- Downloads all the subtitles of the video
- (requires writesubtitles or writeautomaticsub)
listsubtitles: Lists all available subtitles for the video
subtitlesformat: The format code for subtitles
subtitleslangs: List of languages of the subtitles to download (can be regex).
has been filtered out.
break_per_url: Whether break_on_reject and break_on_existing
should act on each input URL as opposed to for the entire queue
- cookiefile: File name where cookies should be read from and dumped to
- cookiesfrombrowser: A tuple containing the name of the browser and the profile
- name/path from where cookies are loaded.
- Eg: ('chrome', ) or ('vivaldi', 'default')
- nocheckcertificate:Do not verify SSL certificates
+ cookiefile: File name or text stream from where cookies should be read and dumped to
+ cookiesfrombrowser: A tuple containing the name of the browser, the profile
+ name/pathfrom where cookies are loaded, and the name of the
+ keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
+ legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
+ support RFC 5746 secure renegotiation
+ nocheckcertificate: Do not verify SSL certificates
+ client_certificate: Path to client certificate file in PEM format. May include the private key
+ client_certificate_key: Path to private key file for client certificate
+ client_certificate_password: Password for client certificate private key, if encrypted.
+ If not provided and the key is encrypted, yt-dlp will ask interactively
prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
At the moment, this is only supported by YouTube.
+ http_headers: A dictionary of custom headers to be used for all requests
proxy: URL of the proxy server to use
geo_verification_proxy: URL of the proxy to use for IP address verification
on geo-restricted sites.
bidi_workaround: Work around buggy terminals without bidirectional text
support, using fridibi
debug_printtraffic:Print out sent and received HTTP traffic
- include_ads: Download ads as well (deprecated)
default_search: Prepend this string if an input url is not valid.
'auto' for elaborate guessing
encoding: Use this encoding instead of the system-specified.
postprocessors: A list of dictionaries, each with an entry
* key: The name of the postprocessor. See
yt_dlp/postprocessor/__init__.py for a list.
- * when: When to run the postprocessor. Can be one of
- pre_process|before_dl|post_process|after_move.
+ * when: When to run the postprocessor. Allowed values are
+ the entries of utils.POSTPROCESS_WHEN
Assumed to be 'post_process' if not given
- post_hooks: Deprecated - Register a custom postprocessor instead
- A list of functions that get called as the final step
- for each video file, after all postprocessors have been
- called. The filename will be passed as the only argument.
progress_hooks: A list of functions that get called on download
progress, with a dictionary with the entries
* status: One of "downloading", "error", or "finished".
- "detect_or_warn": check whether we can do anything
about it, warn otherwise (default)
source_address: Client-side IP address to bind to.
- call_home: Boolean, true iff we are allowed to contact the
- yt-dlp servers for debugging. (BROKEN)
sleep_interval_requests: Number of seconds to sleep between requests
during extraction
sleep_interval: Number of seconds to sleep before each download when
sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
listformats: Print an overview of available video formats and exit.
list_thumbnails: Print a table of all thumbnails and exit.
- match_filter: A function that gets called with the info_dict of
- every video.
- If it returns a message, the video is ignored.
- If it returns None, the video is downloaded.
+ match_filter: A function that gets called for every video with the signature
+ (info_dict, *, incomplete: bool) -> Optional[str]
+ For backward compatibility with youtube-dl, the signature
+ (info_dict) -> Optional[str] is also allowed.
+ - If it returns a message, the video is ignored.
+ - If it returns None, the video is downloaded.
+ - If it returns utils.NO_DEFAULT, the user is interactively
+ asked whether to download the video.
match_filter_func in utils.py is one example for this.
no_color: Do not emit color codes in output.
geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
geo_bypass_ip_block:
IP range in CIDR notation that will be used similarly to
geo_bypass_country
-
- The following options determine which downloader is picked:
external_downloader: A dictionary of protocol keys and the executable of the
external downloader to use for it. The allowed protocols
are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
Set the value to 'native' to use the native downloader
- hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
- or {'m3u8': 'ffmpeg'} instead.
- Use the native HLS downloader instead of ffmpeg/avconv
- if True, otherwise use ffmpeg/avconv if False, otherwise
- use downloader suggested by extractor if None.
compat_opts: Compatibility options. See "Differences in default behavior".
The following options do not work when used through the API:
filename, abort-on-error, multistreams, no-live-chat, format-sort
Allowed keys are 'download', 'postprocess',
'download-title' (console title) and 'postprocess-title'.
The template is mapped on a dictionary with keys 'progress' and 'info'
+ retry_sleep_functions: Dictionary of functions that takes the number of attempts
+ as argument and returns the time to sleep in seconds.
+ Allowed keys are 'http', 'fragment', 'file_access'
+ download_ranges: A function that gets called for every video with the signature
+ (info_dict, *, ydl) -> Iterable[Section].
+ Only the returned sections will be downloaded. Each Section contains:
+ * start_time: Start time of the section in seconds
+ * end_time: End time of the section in seconds
+ * title: Section title (Optional)
+ * index: Section number (Optional)
The following parameters are not used by YoutubeDL itself, they are used by
the downloader (see yt_dlp/downloader/common.py):
nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
- max_filesize, test, noresizebuffer, retries, fragment_retries, continuedl,
- noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
+ max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
+ continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
external_downloader_args, concurrent_fragment_downloads.
The following options are used by the post processors:
- prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
- otherwise prefer ffmpeg. (avconv support is deprecated)
ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
to the binary or its containing directory.
postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
extractor_args: A dictionary of arguments to be passed to the extractors.
See "EXTRACTOR ARGUMENTS" for details.
Eg: {'youtube': {'skip': ['dash', 'hls']}}
- youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
+ mark_watched: Mark videos watched (even with --simulate). Only for YouTube
+
+ The following options are deprecated and may be removed in the future:
+
+ forceurl: - Use forceprint
+ Force printing final URL.
+ forcetitle: - Use forceprint
+ Force printing title.
+ forceid: - Use forceprint
+ Force printing ID.
+ forcethumbnail: - Use forceprint
+ Force printing thumbnail URL.
+ forcedescription: - Use forceprint
+ Force printing description.
+ forcefilename: - Use forceprint
+ Force printing final filename.
+ forceduration: - Use forceprint
+ Force printing duration.
+ allsubtitles: - Use subtitleslangs = ['all']
+ Downloads all the subtitles of the video
+ (requires writesubtitles or writeautomaticsub)
+ include_ads: - Doesn't work
+ Download ads as well
+ call_home: - Not implemented
+ Boolean, true iff we are allowed to contact the
+ yt-dlp servers for debugging.
+ post_hooks: - Register a custom postprocessor
+ A list of functions that get called as the final step
+ for each video file, after all postprocessors have been
+ called. The filename will be passed as the only argument.
+ hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
+ Use the native HLS downloader instead of ffmpeg/avconv
+ if True, otherwise use ffmpeg/avconv if False, otherwise
+ use downloader suggested by extractor if None.
+ prefer_ffmpeg: - avconv support is deprecated
+ If False, use avconv instead of ffmpeg if both are available,
+ otherwise prefer ffmpeg.
+ youtube_include_dash_manifest: - Use extractor_args
If True (default), DASH manifests and related
data will be downloaded and processed by extractor.
You can reduce network I/O by disabling it if you don't
care about DASH. (only for youtube)
- youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
+ youtube_include_hls_manifest: - Use extractor_args
If True (default), HLS manifests and related
data will be downloaded and processed by extractor.
You can reduce network I/O by disabling it if you don't
care about HLS. (only for youtube)
"""
- _NUMERIC_FIELDS = set((
+ _NUMERIC_FIELDS = {
'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
'timestamp', 'release_timestamp',
'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
'start_time', 'end_time',
'chapter_number', 'season_number', 'episode_number',
'track_number', 'disc_number', 'release_year',
- ))
+ }
+ _format_fields = {
+ # NB: Keep in sync with the docstring of extractor/common.py
+ 'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
+ 'width', 'height', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr',
+ 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx',
+ 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
+ 'preference', 'language', 'language_preference', 'quality', 'source_preference',
+ 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options',
+ 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
+ }
_format_selection_exts = {
'audio': {'m4a', 'mp3', 'ogg', 'aac'},
'video': {'mp4', 'flv', 'webm', '3gp'},
'storyboards': {'mhtml'},
}
- params = None
- _ies = {}
- _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
- _printed_messages = set()
- _first_webpage_request = True
- _download_retcode = None
- _num_downloads = None
- _playlist_level = 0
- _playlist_urls = set()
- _screen_file = None
-
def __init__(self, params=None, auto_init=True):
"""Create a FileDownloader object with the given options.
@param auto_init Whether to load the default extractors and print header (if verbose).
"""
if params is None:
params = {}
+ self.params = params
self._ies = {}
self._ies_instances = {}
- self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
+ self._pps = {k: [] for k in POSTPROCESS_WHEN}
self._printed_messages = set()
self._first_webpage_request = True
self._post_hooks = []
self._postprocessor_hooks = []
self._download_retcode = 0
self._num_downloads = 0
- self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
- self._err_file = sys.stderr
- self.params = params
+ self._num_videos = 0
+ self._playlist_level = 0
+ self._playlist_urls = set()
self.cache = Cache(self)
windows_enable_vt_mode()
- self._allow_colors = {
- 'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file),
- 'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file),
- }
+ stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout
+ self._out_files = Namespace(
+ out=stdout,
+ error=sys.stderr,
+ screen=sys.stderr if self.params.get('quiet') else stdout,
+ console=None if compat_os_name == 'nt' else next(
+ filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
+ )
+ self._allow_colors = Namespace(**{
+ type_: not self.params.get('no_color') and supports_terminal_sequences(stream)
+ for type_, stream in self._out_files.items_ if type_ != 'console'
+ })
if sys.version_info < (3, 6):
self.report_warning(
def check_deprecated(param, option, suggestion):
if self.params.get(param) is not None:
- self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
+ self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
return True
return False
for msg in self.params.get('_deprecation_warnings', []):
self.deprecation_warning(msg)
- if 'list-formats' in self.params.get('compat_opts', []):
+ self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
+ if not compat_has_legacy:
+ self.params['compat_opts'].add('no-compat-legacy')
+ if 'list-formats' in self.params['compat_opts']:
self.params['listformats_table'] = False
if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
else:
self.params['nooverwrites'] = not self.params['overwrites']
- if params.get('bidi_workaround', False):
+ self.params.setdefault('forceprint', {})
+ self.params.setdefault('print_to_file', {})
+
+ # Compatibility with older syntax
+ if not isinstance(params['forceprint'], dict):
+ self.params['forceprint'] = {'video': params['forceprint']}
+
+ if self.params.get('bidi_workaround', False):
try:
import pty
master, slave = pty.openpty()
width = compat_get_terminal_size().columns
- if width is None:
- width_args = []
- else:
- width_args = ['-w', str(width)]
- sp_kwargs = dict(
- stdin=subprocess.PIPE,
- stdout=slave,
- stderr=self._err_file)
+ width_args = [] if width is None else ['-w', str(width)]
+ sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
try:
self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
except OSError:
else:
raise
+ if auto_init:
+ if auto_init != 'no_verbose_header':
+ self.print_debug_header()
+ self.add_default_info_extractors()
+
if (sys.platform != 'win32'
and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
- and not params.get('restrictfilenames', False)):
+ and not self.params.get('restrictfilenames', False)):
# Unicode filesystem API will throw errors (#1474, #13027)
self.report_warning(
'Assuming --restrict-filenames since file system encoding '
else self.params['format'] if callable(self.params['format'])
else self.build_format_selector(self.params['format']))
- self._setup_opener()
-
- if auto_init:
- if auto_init != 'no_verbose_header':
- self.print_debug_header()
- self.add_default_info_extractors()
+ # Set http_headers defaults according to std_headers
+ self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))
hooks = {
'post_hooks': self.add_post_hook,
pp_def = dict(pp_def_raw)
when = pp_def.pop('when', 'post_process')
self.add_post_processor(
- get_postprocessor(pp_def.pop('key'))(self, **compat_kwargs(pp_def)),
+ get_postprocessor(pp_def.pop('key'))(self, **pp_def),
when=when)
+ self._setup_opener()
register_socks_protocols()
def preload_download_archive(fn):
with locked_file(fn, 'r', encoding='utf-8') as archive_file:
for line in archive_file:
self.archive.add(line.strip())
- except IOError as ioe:
+ except OSError as ioe:
if ioe.errno != errno.ENOENT:
raise
return False
assert hasattr(self, '_output_process')
assert isinstance(message, compat_str)
line_count = message.count('\n') + 1
- self._output_process.stdin.write((message + '\n').encode('utf-8'))
+ self._output_process.stdin.write((message + '\n').encode())
self._output_process.stdin.flush()
- res = ''.join(self._output_channel.readline().decode('utf-8')
+ res = ''.join(self._output_channel.readline().decode()
for _ in range(line_count))
return res[:-len('\n')]
self._printed_messages.add(message)
write_string(message, out=out, encoding=self.params.get('encoding'))
- def to_stdout(self, message, skip_eol=False, quiet=False):
+ def to_stdout(self, message, skip_eol=False, quiet=None):
"""Print message to stdout"""
+ if quiet is not None:
+ self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. Use "YoutubeDL.to_screen" instead')
+ if skip_eol is not False:
+ self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. Use "YoutubeDL.to_screen" instead')
+ self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)
+
+ def to_screen(self, message, skip_eol=False, quiet=None):
+ """Print message to screen if not in quiet mode"""
if self.params.get('logger'):
self.params['logger'].debug(message)
- elif not quiet or self.params.get('verbose'):
- self._write_string(
- '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
- self._err_file if quiet else self._screen_file)
+ return
+ if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
+ return
+ self._write_string(
+ '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
+ self._out_files.screen)
def to_stderr(self, message, only_once=False):
"""Print message to stderr"""
if self.params.get('logger'):
self.params['logger'].error(message)
else:
- self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
+ self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)
+
+ def _send_console_code(self, code):
+ if compat_os_name == 'nt' or not self._out_files.console:
+ return
+ self._write_string(code, self._out_files.console)
def to_console_title(self, message):
if not self.params.get('consoletitle', False):
# c_wchar_p() might not be necessary if `message` is
# already of type unicode()
ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
- elif 'TERM' in os.environ:
- self._write_string('\033]0;%s\007' % message, self._screen_file)
+ else:
+ self._send_console_code(f'\033]0;{message}\007')
def save_console_title(self):
- if not self.params.get('consoletitle', False):
+ if not self.params.get('consoletitle') or self.params.get('simulate'):
return
- if self.params.get('simulate'):
- return
- if compat_os_name != 'nt' and 'TERM' in os.environ:
- # Save the title on stack
- self._write_string('\033[22;0t', self._screen_file)
+ self._send_console_code('\033[22;0t') # Save the title on stack
def restore_console_title(self):
- if not self.params.get('consoletitle', False):
+ if not self.params.get('consoletitle') or self.params.get('simulate'):
return
- if self.params.get('simulate'):
- return
- if compat_os_name != 'nt' and 'TERM' in os.environ:
- # Restore the title from stack
- self._write_string('\033[23;0t', self._screen_file)
+ self._send_console_code('\033[23;0t') # Restore the title from stack
def __enter__(self):
self.save_console_title()
raise DownloadError(message, exc_info)
self._download_retcode = 1
- def to_screen(self, message, skip_eol=False):
- """Print message to stdout if not in quiet mode"""
- self.to_stdout(
- message, skip_eol, quiet=self.params.get('quiet', False))
-
- class Styles(Enum):
- HEADERS = 'yellow'
- EMPHASIS = 'light blue'
- ID = 'green'
- DELIM = 'blue'
- ERROR = 'red'
- WARNING = 'yellow'
- SUPPRESS = 'light black'
+ Styles = Namespace(
+ HEADERS='yellow',
+ EMPHASIS='light blue',
+ FILENAME='green',
+ ID='green',
+ DELIM='blue',
+ ERROR='red',
+ WARNING='yellow',
+ SUPPRESS='light black',
+ )
def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
+ text = str(text)
if test_encoding:
original_text = text
- encoding = self.params.get('encoding') or getattr(handle, 'encoding', 'ascii')
+ # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
+ encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
text = text.encode(encoding, 'ignore').decode(encoding)
if fallback is not None and text != original_text:
text = fallback
- if isinstance(f, self.Styles):
- f = f.value
return format_text(text, f) if allow_colors else text if fallback is None else fallback
+ def _format_out(self, *args, **kwargs):
+ return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)
+
def _format_screen(self, *args, **kwargs):
- return self._format_text(
- self._screen_file, self._allow_colors['screen'], *args, **kwargs)
+ return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs)
def _format_err(self, *args, **kwargs):
- return self._format_text(
- self._err_file, self._allow_colors['err'], *args, **kwargs)
+ return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)
def report_warning(self, message, only_once=False):
'''
def deprecation_warning(self, message):
if self.params.get('logger') is not None:
- self.params['logger'].warning('DeprecationWarning: {message}')
+ self.params['logger'].warning(f'DeprecationWarning: {message}')
else:
self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
'''Log debug message or Print message to stderr'''
if not self.params.get('verbose', False):
return
- message = '[debug] %s' % message
+ message = f'[debug] {message}'
if self.params.get('logger'):
self.params['logger'].debug(message)
else:
except UnicodeEncodeError:
self.to_screen('Deleting existing file')
- def raise_no_formats(self, info, forced=False):
- has_drm = info.get('__has_drm')
- msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
- expected = self.params.get('ignore_no_formats_error')
- if forced or not expected:
+ def raise_no_formats(self, info, forced=False, *, msg=None):
+ has_drm = info.get('_has_drm')
+ ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
+ msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
+ if forced or not ignored:
raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
- expected=has_drm or expected)
+ expected=has_drm or ignored or expected)
else:
self.report_warning(msg)
outtmpl_dict.update({
k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
if outtmpl_dict.get(k) is None})
- for key, val in outtmpl_dict.items():
+ for _, val in outtmpl_dict.items():
if isinstance(val, bytes):
- self.report_warning(
- 'Parameter outtmpl is bytes, but should be a unicode string. '
- 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
+ self.report_warning('Parameter outtmpl is bytes, but should be a unicode string')
return outtmpl_dict
def get_output_path(self, dir_type='', filename=None):
expand_path(paths.get('home', '').strip()),
expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
filename or '')
-
- # Temporary fix for #4787
- # 'Treat' all problem characters by passing filename through preferredencoding
- # to workaround encoding issues with subprocess on python2 @ Windows
- if sys.version_info < (3, 0) and sys.platform == 'win32':
- path = encodeFilename(path, True).decode(preferredencoding())
return sanitize_path(path, force=self.params.get('windowsfilenames'))
@staticmethod
# '%%' intact for template dict substitution step. Working around
# with boundary-alike separator hack.
sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
- outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
+ outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')
# outtmpl should be expand_path'ed before template dict substitution
# because meta fields may contain env variables we don't want to
def validate_outtmpl(cls, outtmpl):
''' @return None or Exception object '''
outtmpl = re.sub(
- STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBU]'),
+ STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBUDS]'),
lambda mobj: f'{mobj.group(0)[:-1]}s',
cls._outtmpl_expandpath(outtmpl))
try:
@staticmethod
def _copy_infodict(info_dict):
info_dict = dict(info_dict)
- for key in ('__original_infodict', '__postprocessors'):
- info_dict.pop(key, None)
+ info_dict.pop('__postprocessors', None)
+ info_dict.pop('__pending_error', None)
return info_dict
- def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
- """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict """
+ def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
+ """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
+ @param sanitize Whether to sanitize the output as a filename.
+ For backward compatibility, a function can also be passed
+ """
+
info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
info_dict = self._copy_infodict(info_dict)
formatSeconds(info_dict['duration'], '-' if sanitize else ':')
if info_dict.get('duration', None) is not None
else None)
- info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
+ info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
+ info_dict['video_autonumber'] = self._num_videos
if info_dict.get('resolution') is None:
info_dict['resolution'] = self.format_resolution(info_dict, default=None)
# For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
# of %(field)s to %(field)0Nd for backward compatibility
field_size_compat_map = {
- 'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0),
+ 'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),
'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
'autonumber': self.params.get('autonumber_size') or 5,
}
TMPL_DICT = {}
- EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBU]'))
+ EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBUDS]'))
MATH_FUNCTIONS = {
'+': float.__add__,
'-': float.__sub__,
# Field is of the form key1.key2...
# where keys (except first) can be string, int or slice
FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
- MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
+ MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
- INTERNAL_FORMAT_RE = re.compile(r'''(?x)
+ INTERNAL_FORMAT_RE = re.compile(rf'''(?x)
(?P<negate>-)?
- (?P<fields>{field})
- (?P<maths>(?:{math_op}{math_field})*)
+ (?P<fields>{FIELD_RE})
+ (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
(?:>(?P<strf_format>.+?))?
- (?P<alternate>(?<!\\),[^|&)]+)?
- (?:&(?P<replacement>.*?))?
- (?:\|(?P<default>.*?))?
- $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
+ (?P<remaining>
+ (?P<alternate>(?<!\\),[^|&)]+)?
+ (?:&(?P<replacement>.*?))?
+ (?:\|(?P<default>.*?))?
+ )$''')
def _traverse_infodict(k):
k = k.split('.')
na = self.params.get('outtmpl_na_placeholder', 'NA')
+ def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
+ return sanitize_filename(str(value), restricted=restricted, is_id=(
+ bool(re.search(r'(^|[_.])id(\.|$)', key))
+ if 'filename-sanitization' in self.params['compat_opts']
+ else NO_DEFAULT))
+
+ sanitizer = sanitize if callable(sanitize) else filename_sanitizer
+ sanitize = bool(sanitize)
+
def _dumpjson_default(obj):
if isinstance(obj, (set, LazyList)):
return list(obj)
return outer_mobj.group(0)
key = outer_mobj.group('key')
mobj = re.match(INTERNAL_FORMAT_RE, key)
- initial_field = mobj.group('fields').split('.')[-1] if mobj else ''
+ initial_field = mobj.group('fields') if mobj else ''
value, replacement, default = None, None, na
while mobj:
mobj = mobj.groupdict()
value = get_value(mobj)
replacement = mobj['replacement']
if value is None and mobj['alternate']:
- mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
+ mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
else:
break
fmt = outer_mobj.group('format')
if fmt == 's' and value is not None and key in field_size_compat_map.keys():
- fmt = '0{:d}d'.format(field_size_compat_map[key])
+ fmt = f'0{field_size_compat_map[key]:d}d'
value = default if value is None else value if replacement is None else replacement
str_fmt = f'{fmt[:-1]}s'
if fmt[-1] == 'l': # list
delim = '\n' if '#' in flags else ', '
- value, fmt = delim.join(variadic(value)), str_fmt
+ value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
elif fmt[-1] == 'j': # json
value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
elif fmt[-1] == 'q': # quoted
value = map(str, variadic(value) if '#' in flags else [value])
value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
elif fmt[-1] == 'B': # bytes
- value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
+ value = f'%{str_fmt}'.encode() % str(value).encode()
value, fmt = value.decode('utf-8', 'ignore'), 's'
elif fmt[-1] == 'U': # unicode normalized
value, fmt = unicodedata.normalize(
# "+" = compatibility equivalence, "#" = NFD
'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
value), str_fmt
+ elif fmt[-1] == 'D': # decimal suffix
+ num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
+ value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
+ factor=1024 if '#' in flags else 1000)
+ elif fmt[-1] == 'S': # filename sanitization
+ value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
elif fmt[-1] == 'c':
if value:
value = str(value)[0]
# So we convert it to repr first
value, fmt = repr(value), str_fmt
if fmt[-1] in 'csr':
- value = sanitize(initial_field, value)
+ value = sanitizer(initial_field, value)
key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
TMPL_DICT[key] = value
outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
return self.escape_outtmpl(outtmpl) % info_dict
- def _prepare_filename(self, info_dict, tmpl_type='default'):
+ def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
+ assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
+ if outtmpl is None:
+ outtmpl = self.outtmpl_dict.get(tmpl_type or 'default', self.outtmpl_dict['default'])
try:
- sanitize = lambda k, v: sanitize_filename(
- compat_str(v),
- restricted=self.params.get('restrictfilenames'),
- is_id=(k == 'id' or k.endswith('_id')))
- outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
- filename = self.evaluate_outtmpl(outtmpl, info_dict, sanitize)
+ outtmpl = self._outtmpl_expandpath(outtmpl)
+ filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
+ if not filename:
+ return None
- force_ext = OUTTMPL_TYPES.get(tmpl_type)
- if filename and force_ext is not None:
- filename = replace_extension(filename, force_ext, info_dict.get('ext'))
+ if tmpl_type in ('', 'temp'):
+ final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
+ if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
+ filename = replace_extension(filename, ext, final_ext)
+ elif tmpl_type:
+ force_ext = OUTTMPL_TYPES[tmpl_type]
+ if force_ext:
+ filename = replace_extension(filename, force_ext, info_dict.get('ext'))
# https://github.com/blackjack4494/youtube-dlc/issues/85
trim_file_name = self.params.get('trim_file_name', False)
self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
return None
- def prepare_filename(self, info_dict, dir_type='', warn=False):
- """Generate the output filename."""
-
- filename = self._prepare_filename(info_dict, dir_type or 'default')
+ def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):
+ """Generate the output filename"""
+ if outtmpl:
+ assert not dir_type, 'outtmpl and dir_type are mutually exclusive'
+ dir_type = None
+ filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)
if not filename and dir_type not in ('', 'temp'):
return ''
if date is not None:
dateRange = self.params.get('daterange', DateRange())
if date not in dateRange:
- return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
+ return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'
view_count = info_dict.get('view_count')
if view_count is not None:
min_views = self.params.get('min_views')
except TypeError:
# For backward compatibility
ret = None if incomplete else match_filter(info_dict)
- if ret is not None:
+ if ret is NO_DEFAULT:
+ while True:
+ filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)
+ reply = input(self._format_screen(
+ f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()
+ if reply in {'y', ''}:
+ return None
+ elif reply == 'n':
+ return f'Skipping {video_title}'
+ elif ret is not None:
return ret
return None
else:
self.report_error('no suitable InfoExtractor for URL %s' % url)
- def __handle_extraction_exceptions(func):
+ def _handle_extraction_exceptions(func):
@functools.wraps(func)
def wrapper(self, *args, **kwargs):
- try:
- return func(self, *args, **kwargs)
- except GeoRestrictedError as e:
- msg = e.msg
- if e.countries:
- msg += '\nThis video is available in %s.' % ', '.join(
- map(ISO3166Utils.short2full, e.countries))
- msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
- self.report_error(msg)
- except ExtractorError as e: # An error we somewhat expected
- self.report_error(compat_str(e), e.format_traceback())
- except ReExtractInfo as e:
- if e.expected:
- self.to_screen(f'{e}; Re-extracting data')
- else:
- self.to_stderr('\r')
- self.report_warning(f'{e}; Re-extracting data')
- return wrapper(self, *args, **kwargs)
- except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
- raise
- except Exception as e:
- if self.params.get('ignoreerrors'):
- self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
- else:
+ while True:
+ try:
+ return func(self, *args, **kwargs)
+ except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
raise
+ except ReExtractInfo as e:
+ if e.expected:
+ self.to_screen(f'{e}; Re-extracting data')
+ else:
+ self.to_stderr('\r')
+ self.report_warning(f'{e}; Re-extracting data')
+ continue
+ except GeoRestrictedError as e:
+ msg = e.msg
+ if e.countries:
+ msg += '\nThis video is available in %s.' % ', '.join(
+ map(ISO3166Utils.short2full, e.countries))
+ msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
+ self.report_error(msg)
+ except ExtractorError as e: # An error we somewhat expected
+ self.report_error(str(e), e.format_traceback())
+ except Exception as e:
+ if self.params.get('ignoreerrors'):
+ self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
+ else:
+ raise
+ break
return wrapper
def _wait_for_video(self, ie_result):
min_wait, max_wait = self.params.get('wait_for_video')
diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
if diff is None and ie_result.get('live_status') == 'is_upcoming':
- diff = random.randrange(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait)
+ diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
self.report_warning('Release time of video is not known')
elif (diff or 0) <= 0:
self.report_warning('Video should already be available according to extracted info')
self.to_screen('')
raise
- @__handle_extraction_exceptions
+ @_handle_extraction_exceptions
def __extract_info(self, url, ie, download, extra_info, process):
ie_result = ie.extract(url)
if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
self.add_extra_info(ie_result, {
'webpage_url': url,
'original_url': url,
- 'webpage_url_basename': url_basename(url),
- 'webpage_url_domain': get_domain(url),
+ })
+ webpage_url = ie_result.get('webpage_url')
+ if webpage_url:
+ self.add_extra_info(ie_result, {
+ 'webpage_url_basename': url_basename(webpage_url),
+ 'webpage_url_domain': get_domain(webpage_url),
})
if ie is not None:
self.add_extra_info(ie_result, {
self.add_extra_info(info_copy, extra_info)
info_copy, _ = self.pre_process(info_copy)
self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
+ self._raise_pending_errors(info_copy)
if self.params.get('force_write_download_archive', False):
self.record_download_archive(info_copy)
return ie_result
if result_type == 'video':
self.add_extra_info(ie_result, extra_info)
ie_result = self.process_video_result(ie_result, download=download)
+ self._raise_pending_errors(ie_result)
additional_urls = (ie_result or {}).get('additional_urls')
if additional_urls:
# TODO: Improve MetadataParserPP to allow setting a list
self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
ie_result['additional_entries'] = [
self.extract_info(
- url, download, extra_info,
+ url, download, extra_info=extra_info,
force_generic_extractor=self.params.get('force_generic_extractor'))
for url in additional_urls
]
if not info:
return info
- force_properties = dict(
- (k, v) for k, v in ie_result.items() if v is not None)
- for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
- if f in force_properties:
- del force_properties[f]
new_result = info.copy()
- new_result.update(force_properties)
+ new_result.update(filter_dict(ie_result, lambda k, v: (
+ v is not None and k not in {'_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'})))
# Extracted info may not be a video result (i.e.
# info.get('_type', 'video') != video) but rather an url or
self._playlist_level += 1
self._playlist_urls.add(webpage_url)
+ self._fill_common_fields(ie_result, False)
self._sanitize_thumbnails(ie_result)
try:
return self.__process_playlist(ie_result, download)
def _ensure_dir_exists(self, path):
return make_dir(path, self.report_error)
- def __process_playlist(self, ie_result, download):
- # We process each entry in the playlist
- playlist = ie_result.get('title') or ie_result.get('id')
- self.to_screen('[download] Downloading playlist: %s' % playlist)
-
- if 'entries' not in ie_result:
- raise EntryNotInPlaylist('There are no entries')
-
- MissingEntry = object()
- incomplete_entries = bool(ie_result.get('requested_entries'))
- if incomplete_entries:
- def fill_missing_entries(entries, indices):
- ret = [MissingEntry] * max(indices)
- for i, entry in zip(indices, entries):
- ret[i - 1] = entry
- return ret
- ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
-
- playlist_results = []
-
- playliststart = self.params.get('playliststart', 1)
- playlistend = self.params.get('playlistend')
- # For backwards compatibility, interpret -1 as whole list
- if playlistend == -1:
- playlistend = None
-
- playlistitems_str = self.params.get('playlist_items')
- playlistitems = None
- if playlistitems_str is not None:
- def iter_playlistitems(format):
- for string_segment in format.split(','):
- if '-' in string_segment:
- start, end = string_segment.split('-')
- for item in range(int(start), int(end) + 1):
- yield int(item)
- else:
- yield int(string_segment)
- playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
-
- ie_entries = ie_result['entries']
- msg = (
- 'Downloading %d videos' if not isinstance(ie_entries, list)
- else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
-
- if isinstance(ie_entries, list):
- def get_entry(i):
- return ie_entries[i - 1]
- else:
- if not isinstance(ie_entries, (PagedList, LazyList)):
- ie_entries = LazyList(ie_entries)
-
- def get_entry(i):
- return YoutubeDL.__handle_extraction_exceptions(
- lambda self, i: ie_entries[i - 1]
- )(self, i)
-
- entries = []
- items = playlistitems if playlistitems is not None else itertools.count(playliststart)
- for i in items:
- if i == 0:
- continue
- if playlistitems is None and playlistend is not None and playlistend < i:
- break
- entry = None
- try:
- entry = get_entry(i)
- if entry is MissingEntry:
- raise EntryNotInPlaylist()
- except (IndexError, EntryNotInPlaylist):
- if incomplete_entries:
- raise EntryNotInPlaylist(f'Entry {i} cannot be found')
- elif not playlistitems:
- break
- entries.append(entry)
- try:
- if entry is not None:
- self._match_entry(entry, incomplete=True, silent=True)
- except (ExistingVideoReached, RejectedVideoReached):
- break
- ie_result['entries'] = entries
+ @staticmethod
+ def _playlist_infodict(ie_result, **kwargs):
+ return {
+ **ie_result,
+ 'playlist': ie_result.get('title') or ie_result.get('id'),
+ 'playlist_id': ie_result.get('id'),
+ 'playlist_title': ie_result.get('title'),
+ 'playlist_uploader': ie_result.get('uploader'),
+ 'playlist_uploader_id': ie_result.get('uploader_id'),
+ 'playlist_index': 0,
+ **kwargs,
+ }
- # Save playlist_index before re-ordering
- entries = [
- ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
- for i, entry in enumerate(entries, 1)
- if entry is not None]
- n_entries = len(entries)
+ def __process_playlist(self, ie_result, download):
+ """Process each entry in the playlist"""
+ title = ie_result.get('title') or ie_result.get('id') or '<Untitled>'
+ self.to_screen(f'[download] Downloading playlist: {title}')
- if not playlistitems and (playliststart != 1 or playlistend):
- playlistitems = list(range(playliststart, playliststart + n_entries))
- ie_result['requested_entries'] = playlistitems
+ all_entries = PlaylistEntries(self, ie_result)
+ entries = orderedSet(all_entries.get_requested_items())
+ ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*entries)) or ([], [])
+ n_entries, ie_result['playlist_count'] = len(entries), all_entries.full_count
_infojson_written = False
- if not self.params.get('simulate') and self.params.get('allow_playlist_files', True):
- ie_copy = {
- 'playlist': playlist,
- 'playlist_id': ie_result.get('id'),
- 'playlist_title': ie_result.get('title'),
- 'playlist_uploader': ie_result.get('uploader'),
- 'playlist_uploader_id': ie_result.get('uploader_id'),
- 'playlist_index': 0,
- 'n_entries': n_entries,
- }
- ie_copy.update(dict(ie_result))
-
+ write_playlist_files = self.params.get('allow_playlist_files', True)
+ if write_playlist_files and self.params.get('list_thumbnails'):
+ self.list_thumbnails(ie_result)
+ if write_playlist_files and not self.params.get('simulate'):
+ ie_copy = self._playlist_infodict(ie_result, n_entries=n_entries)
_infojson_written = self._write_info_json(
'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
if _infojson_written is None:
if self.params.get('playlistrandom', False):
random.shuffle(entries)
- x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
+ self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} videos'
+ f'{format_field(ie_result, "playlist_count", " of %s")}')
- self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
failures = 0
max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
- for i, entry_tuple in enumerate(entries, 1):
- playlist_index, entry = entry_tuple
+ for i, (playlist_index, entry) in enumerate(entries, 1):
+ # TODO: Add auto-generated fields
+ if self._match_entry(entry, incomplete=True) is not None:
+ continue
+
if 'playlist-index' in self.params.get('compat_opts', []):
- playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
- self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
- # This __x_forwarded_for_ip thing is a bit ugly but requires
- # minimal changes
- if x_forwarded_for:
- entry['__x_forwarded_for_ip'] = x_forwarded_for
- extra = {
+ playlist_index = ie_result['requested_entries'][i - 1]
+ self.to_screen('[download] Downloading video %s of %s' % (
+ self._format_screen(i, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
+
+ entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
+ entry_result = self.__process_iterable_entry(entry, download, {
'n_entries': n_entries,
- '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
+ '__last_playlist_index': max(ie_result['requested_entries']),
+ 'playlist_count': ie_result.get('playlist_count'),
'playlist_index': playlist_index,
'playlist_autonumber': i,
- 'playlist': playlist,
+ 'playlist': title,
'playlist_id': ie_result.get('id'),
'playlist_title': ie_result.get('title'),
'playlist_uploader': ie_result.get('uploader'),
'webpage_url_basename': url_basename(ie_result['webpage_url']),
'webpage_url_domain': get_domain(ie_result['webpage_url']),
'extractor_key': ie_result['extractor_key'],
- }
-
- if self._match_entry(entry, incomplete=True) is not None:
- continue
-
- entry_result = self.__process_iterable_entry(entry, download, extra)
+ })
if not entry_result:
failures += 1
if failures >= max_failures:
self.report_error(
- 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
+ f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
break
- playlist_results.append(entry_result)
- ie_result['entries'] = playlist_results
+ entries[i - 1] = (playlist_index, entry_result)
+
+ # Update with processed data
+ ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*entries)) or ([], [])
# Write the updated info to json
- if _infojson_written and self._write_info_json(
+ if _infojson_written is True and self._write_info_json(
'updated playlist', ie_result,
self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
return
- self.to_screen('[download] Finished downloading playlist: %s' % playlist)
+
+ ie_result = self.run_all_pps('playlist', ie_result)
+ self.to_screen(f'[download] Finished downloading playlist: {title}')
return ie_result
- @__handle_extraction_exceptions
+ @_handle_extraction_exceptions
def __process_iterable_entry(self, entry, download, extra_info):
return self.process_ie_result(
entry, download=download, extra_info=extra_info)
'^=': lambda attr, value: attr.startswith(value),
'$=': lambda attr, value: attr.endswith(value),
'*=': lambda attr, value: value in attr,
+ '~=': lambda attr, value: value.search(attr) is not None
}
str_operator_rex = re.compile(r'''(?x)\s*
(?P<key>[a-zA-Z0-9._-]+)\s*
- (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
- (?P<value>[a-zA-Z0-9._-]+)\s*
+ (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
+ (?P<quote>["'])?
+ (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
+ (?(quote)(?P=quote))\s*
''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
m = str_operator_rex.fullmatch(filter_spec)
if m:
- comparison_value = m.group('value')
+ if m.group('op') == '~=':
+ comparison_value = re.compile(m.group('value'))
+ else:
+ comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
str_op = STR_OPERATORS[m.group('op')]
if m.group('negation'):
op = lambda attr, value: not str_op(attr, value)
temp_file.close()
try:
success, _ = self.dl(temp_file.name, f, test=True)
- except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
+ except (DownloadError, OSError, ValueError) + network_exceptions:
success = False
finally:
if os.path.exists(temp_file.name):
and download
and (
not can_merge()
- or info_dict.get('is_live', False)
+ or info_dict.get('is_live') and not self.params.get('live_from_start')
or self.outtmpl_dict['default'] == '-'))
compat = (
prefer_best
or self.params.get('allow_multiple_audio_streams', False)
- or 'format-spec' in self.params.get('compat_opts', []))
+ or 'format-spec' in self.params['compat_opts'])
return (
'best/bestvideo+bestaudio' if prefer_best
def syntax_error(note, start):
message = (
'Invalid format specification: '
- '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
+ '{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))
return SyntaxError(message)
PICKFIRST = 'PICKFIRST'
raise syntax_error('Expected a selector', start)
current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
else:
- raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
+ raise syntax_error(f'Operator not recognized: "{string}"', start)
elif type == tokenize.ENDMARKER:
break
if current_selector:
yield from _check_formats(ctx['formats'][::-1])
elif format_spec == 'mergeall':
def selector_function(ctx):
- formats = list(_check_formats(ctx['formats']))
+ formats = list(_check_formats(
+ f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))
if not formats:
return
merged_format = formats[-1]
yield merged_format
else:
- format_fallback, format_reverse, format_idx = False, True, 1
+ format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1
mobj = re.match(
r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
format_spec)
filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
elif format_spec in self._format_selection_exts['video']:
filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
+ seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'
elif format_spec in self._format_selection_exts['storyboards']:
filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
else:
def selector_function(ctx):
formats = list(ctx['formats'])
matches = list(filter(filter_f, formats)) if filter_f is not None else formats
- if format_fallback and ctx['incomplete_formats'] and not matches:
- # for extractors with incomplete formats (audio only (soundcloud)
- # or video only (imgur)) best/worst will fallback to
- # best/worst {video,audio}-only format
- matches = formats
+ if not matches:
+ if format_fallback and ctx['incomplete_formats']:
+ # for extractors with incomplete formats (audio only (soundcloud)
+ # or video only (imgur)) best/worst will fallback to
+ # best/worst {video,audio}-only format
+ matches = formats
+ elif seperate_fallback and not ctx['has_merged_format']:
+ # for compatibility with youtube-dl when there is no pre-merged format
+ matches = list(filter(seperate_fallback, formats))
matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
try:
yield matches[format_idx - 1]
- except IndexError:
+ except LazyList.IndexError:
return
filters = [self._build_format_filter(f) for f in selector.filters]
return selector_function(ctx_copy)
return final_selector
- stream = io.BytesIO(format_spec.encode('utf-8'))
+ stream = io.BytesIO(format_spec.encode())
try:
- tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
+ tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))
except tokenize.TokenError:
raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
- class TokenIterator(object):
+ class TokenIterator:
def __init__(self, tokens):
self.tokens = tokens
self.counter = 0
return _build_selector_function(parsed_selector)
def _calc_headers(self, info_dict):
- res = std_headers.copy()
+ res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
- add_headers = info_dict.get('http_headers')
- if add_headers:
- res.update(add_headers)
-
- cookies = self._calc_cookies(info_dict)
+ cookies = self._calc_cookies(info_dict['url'])
if cookies:
res['Cookie'] = cookies
return res
- def _calc_cookies(self, info_dict):
- pr = sanitized_Request(info_dict['url'])
+ def _calc_cookies(self, url):
+ pr = sanitized_Request(url)
self.cookiejar.add_cookie_header(pr)
return pr.get_header('Cookie')
else:
info_dict['thumbnails'] = thumbnails
+ def _fill_common_fields(self, info_dict, is_video=True):
+ # TODO: move sanitization here
+ if is_video:
+ # playlists are allowed to lack "title"
+ title = info_dict.get('title', NO_DEFAULT)
+ if title is NO_DEFAULT:
+ raise ExtractorError('Missing "title" field in extractor result',
+ video_id=info_dict['id'], ie=info_dict['extractor'])
+ info_dict['fulltitle'] = title
+ if not title:
+ if title == '':
+ self.write_debug('Extractor gave empty title. Creating a generic title')
+ else:
+ self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
+ info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
+
+ if info_dict.get('duration') is not None:
+ info_dict['duration_string'] = formatSeconds(info_dict['duration'])
+
+ for ts_key, date_key in (
+ ('timestamp', 'upload_date'),
+ ('release_timestamp', 'release_date'),
+ ('modified_timestamp', 'modified_date'),
+ ):
+ if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
+ # Working around out-of-range timestamp values (e.g. negative ones on Windows,
+ # see http://bugs.python.org/issue1646728)
+ with contextlib.suppress(ValueError, OverflowError, OSError):
+ upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
+ info_dict[date_key] = upload_date.strftime('%Y%m%d')
+
+ live_keys = ('is_live', 'was_live')
+ live_status = info_dict.get('live_status')
+ if live_status is None:
+ for key in live_keys:
+ if info_dict.get(key) is False:
+ continue
+ if info_dict.get(key):
+ live_status = key
+ break
+ if all(info_dict.get(key) is False for key in live_keys):
+ live_status = 'not_live'
+ if live_status:
+ info_dict['live_status'] = live_status
+ for key in live_keys:
+ if info_dict.get(key) is None:
+ info_dict[key] = (live_status == key)
+
+ # Auto generate title fields corresponding to the *_number fields when missing
+ # in order to always have clean titles. This is very common for TV series.
+ for field in ('chapter', 'season', 'episode'):
+ if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
+ info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
+
+ def _raise_pending_errors(self, info):
+ err = info.pop('__pending_error', None)
+ if err:
+ self.report_error(err, tb=False)
+
def process_video_result(self, info_dict, download=True):
assert info_dict.get('_type', 'video') == 'video'
+ self._num_videos += 1
if 'id' not in info_dict:
- raise ExtractorError('Missing "id" field in extractor result')
- if 'title' not in info_dict:
- raise ExtractorError('Missing "title" field in extractor result',
- video_id=info_dict['id'], ie=info_dict['extractor'])
+ raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
+ elif not info_dict.get('id'):
+ raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
def report_force_conversion(field, field_not, conversion):
self.report_warning(
def sanitize_numeric_fields(info):
for numeric_field in self._NUMERIC_FIELDS:
field = info.get(numeric_field)
- if field is None or isinstance(field, compat_numeric_types):
+ if field is None or isinstance(field, (int, float)):
continue
report_force_conversion(numeric_field, 'numeric', 'int')
info[numeric_field] = int_or_none(field)
sanitize_string_field(info_dict, 'id')
sanitize_numeric_fields(info_dict)
+ if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
+ self.report_warning('"duration" field is negative, there is an error in extractor')
if 'playlist' not in info_dict:
# It isn't part of a playlist
if info_dict.get('display_id') is None and 'id' in info_dict:
info_dict['display_id'] = info_dict['id']
- if info_dict.get('duration') is not None:
- info_dict['duration_string'] = formatSeconds(info_dict['duration'])
-
- for ts_key, date_key in (
- ('timestamp', 'upload_date'),
- ('release_timestamp', 'release_date'),
- ):
- if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
- # Working around out-of-range timestamp values (e.g. negative ones on Windows,
- # see http://bugs.python.org/issue1646728)
- try:
- upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
- info_dict[date_key] = upload_date.strftime('%Y%m%d')
- except (ValueError, OverflowError, OSError):
- pass
-
- live_keys = ('is_live', 'was_live')
- live_status = info_dict.get('live_status')
- if live_status is None:
- for key in live_keys:
- if info_dict.get(key) is False:
- continue
- if info_dict.get(key):
- live_status = key
- break
- if all(info_dict.get(key) is False for key in live_keys):
- live_status = 'not_live'
- if live_status:
- info_dict['live_status'] = live_status
- for key in live_keys:
- if info_dict.get(key) is None:
- info_dict[key] = (live_status == key)
-
- # Auto generate title fields corresponding to the *_number fields when missing
- # in order to always have clean titles. This is very common for TV series.
- for field in ('chapter', 'season', 'episode'):
- if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
- info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
+ self._fill_common_fields(info_dict)
for cc_kind in ('subtitles', 'automatic_captions'):
cc = info_dict.get(cc_kind)
else:
formats = info_dict['formats']
- info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
+ # or None ensures --clean-infojson removes it
+ info_dict['_has_drm'] = any(f.get('has_drm') for f in formats) or None
if not self.params.get('allow_unplayable_formats'):
formats = [f for f in formats if not f.get('has_drm')]
+ if info_dict['_has_drm'] and all(
+ f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
+ self.report_warning(
+ 'This video is DRM protected and only images are available for download. '
+ 'Use --list-formats to see them')
- if info_dict.get('is_live'):
- get_from_start = bool(self.params.get('live_from_start'))
+ get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
+ if not get_from_start:
+ info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
+ if info_dict.get('is_live') and formats:
formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
+ if get_from_start and not formats:
+ self.raise_no_formats(info_dict, msg=(
+ '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
+ 'If you want to download from the current time, use --no-live-from-start'))
if not formats:
self.raise_no_formats(info_dict)
format['dynamic_range'] = 'SDR'
if (info_dict.get('duration') and format.get('tbr')
and not format.get('filesize') and not format.get('filesize_approx')):
- format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
+ format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))
# Add HTTP headers, so that external programs can use them from the
# json output
if '__x_forwarded_for_ip' in info_dict:
del info_dict['__x_forwarded_for_ip']
- # TODO Central sorting goes here
-
if self.params.get('check_formats') is True:
formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
info_dict, _ = self.pre_process(info_dict)
+ if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
+ return info_dict
+
+ self.post_extract(info_dict)
+ info_dict, _ = self.pre_process(info_dict, 'after_filter')
+
# The pre-processors may have modified the formats
formats = info_dict.get('formats', [info_dict])
info_dict['id'], automatic_captions, 'automatic captions')
self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
if self.params.get('listformats') or interactive_format_selection:
- if not info_dict.get('formats') and not info_dict.get('url'):
- self.to_screen('%s has no formats' % info_dict['id'])
- else:
- self.list_formats(info_dict)
+ self.list_formats(info_dict)
if list_only:
# Without this printing, -F --print-json will not work
self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
- return
+ return info_dict
format_selector = self.format_selector
if format_selector is None:
self.report_error(err, tb=False, is_error=False)
continue
- # While in format selection we may need to have an access to the original
- # format set in order to calculate some metrics or do some processing.
- # For now we need to be able to guess whether original formats provided
- # by extractor are incomplete or not (i.e. whether extractor provides only
- # video-only or audio-only formats) for proper formats selection for
- # extractors with such incomplete formats (see
- # https://github.com/ytdl-org/youtube-dl/pull/5556).
- # Since formats may be filtered during format selection and may not match
- # the original formats the results may be incorrect. Thus original formats
- # or pre-calculated metrics should be passed to format selection routines
- # as well.
- # We will pass a context object containing all necessary additional data
- # instead of just formats.
- # This fixes incorrect format selection issue (see
- # https://github.com/ytdl-org/youtube-dl/issues/10083).
- incomplete_formats = (
- # All formats are video-only or
- all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
- # all formats are audio-only
- or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
-
- ctx = {
+ formats_to_download = list(format_selector({
'formats': formats,
- 'incomplete_formats': incomplete_formats,
- }
-
- formats_to_download = list(format_selector(ctx))
+ 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
+ 'incomplete_formats': (
+ # All formats are video-only or
+ all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
+ # all formats are audio-only
+ or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)),
+ }))
if interactive_format_selection and not formats_to_download:
self.report_error('Requested format is not available', tb=False, is_error=False)
continue
if not formats_to_download:
if not self.params.get('ignore_no_formats_error'):
- raise ExtractorError('Requested format is not available', expected=True,
- video_id=info_dict['id'], ie=info_dict['extractor'])
- else:
- self.report_warning('Requested format is not available')
- # Process what we can, even without any available formats.
- self.process_info(dict(info_dict))
- elif download:
- self.to_screen(
- '[info] %s: Downloading %d format(s): %s' % (
- info_dict['id'], len(formats_to_download),
- ", ".join([f['format_id'] for f in formats_to_download])))
- for fmt in formats_to_download:
- new_info = dict(info_dict)
- # Save a reference to the original info_dict so that it can be modified in process_info if needed
- new_info['__original_infodict'] = info_dict
+ raise ExtractorError(
+ 'Requested format is not available. Use --list-formats for a list of available formats',
+ expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])
+ self.report_warning('Requested format is not available')
+ # Process what we can, even without any available formats.
+ formats_to_download = [{}]
+
+ requested_ranges = self.params.get('download_ranges')
+ if requested_ranges:
+ requested_ranges = tuple(requested_ranges(info_dict, self))
+
+ best_format, downloaded_formats = formats_to_download[-1], []
+ if download:
+ if best_format:
+ def to_screen(*msg):
+ self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
+
+ to_screen(f'Downloading {len(formats_to_download)} format(s):',
+ (f['format_id'] for f in formats_to_download))
+ if requested_ranges:
+ to_screen(f'Downloading {len(requested_ranges)} time ranges:',
+ (f'{int(c["start_time"])}-{int(c["end_time"])}' for c in requested_ranges))
+ max_downloads_reached = False
+
+ for fmt, chapter in itertools.product(formats_to_download, requested_ranges or [{}]):
+ new_info = self._copy_infodict(info_dict)
new_info.update(fmt)
- self.process_info(new_info)
+ if chapter:
+ new_info.update({
+ 'section_start': chapter.get('start_time'),
+ 'section_end': chapter.get('end_time', 0),
+ 'section_title': chapter.get('title'),
+ 'section_number': chapter.get('index'),
+ })
+ downloaded_formats.append(new_info)
+ try:
+ self.process_info(new_info)
+ except MaxDownloadsReached:
+ max_downloads_reached = True
+ self._raise_pending_errors(new_info)
+ # Remove copied info
+ for key, val in tuple(new_info.items()):
+ if info_dict.get(key) == val:
+ new_info.pop(key)
+ if max_downloads_reached:
+ break
+
+ write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats}
+ assert write_archive.issubset({True, False, 'ignore'})
+ if True in write_archive and False not in write_archive:
+ self.record_download_archive(info_dict)
+
+ info_dict['requested_downloads'] = downloaded_formats
+ info_dict = self.run_all_pps('after_video', info_dict)
+ if max_downloads_reached:
+ raise MaxDownloadsReached()
+
# We update the info dict with the selected best quality format (backwards compatibility)
- if formats_to_download:
- info_dict.update(formats_to_download[-1])
+ info_dict.update(best_format)
return info_dict
def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
"""Select the requested subtitles and their format"""
- available_subs = {}
+ available_subs, normal_sub_langs = {}, []
if normal_subtitles and self.params.get('writesubtitles'):
available_subs.update(normal_subtitles)
+ normal_sub_langs = tuple(normal_subtitles.keys())
if automatic_captions and self.params.get('writeautomaticsub'):
for lang, cap_info in automatic_captions.items():
if lang not in available_subs:
available_subs):
return None
- all_sub_langs = available_subs.keys()
+ all_sub_langs = tuple(available_subs.keys())
if self.params.get('allsubtitles', False):
requested_langs = all_sub_langs
elif self.params.get('subtitleslangs', False):
# given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
requested_langs = []
for lang_re in self.params.get('subtitleslangs'):
- if lang_re == 'all':
- requested_langs.extend(all_sub_langs)
- continue
discard = lang_re[0] == '-'
if discard:
lang_re = lang_re[1:]
+ if lang_re == 'all':
+ if discard:
+ requested_langs = []
+ else:
+ requested_langs.extend(all_sub_langs)
+ continue
current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
if discard:
for lang in current_langs:
else:
requested_langs.extend(current_langs)
requested_langs = orderedSet(requested_langs)
- elif 'en' in available_subs:
- requested_langs = ['en']
+ elif normal_sub_langs:
+ requested_langs = ['en'] if 'en' in normal_sub_langs else normal_sub_langs[:1]
else:
- requested_langs = [list(all_sub_langs)[0]]
+ requested_langs = ['en'] if 'en' in all_sub_langs else all_sub_langs[:1]
if requested_langs:
self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
for lang in requested_langs:
formats = available_subs.get(lang)
if formats is None:
- self.report_warning('%s subtitles not available for %s' % (lang, video_id))
+ self.report_warning(f'{lang} subtitles not available for {video_id}')
continue
for ext in formats_preference:
if ext == 'best':
subs[lang] = f
return subs
+ def _forceprint(self, key, info_dict):
+ if info_dict is None:
+ return
+ info_copy = info_dict.copy()
+ info_copy['formats_table'] = self.render_formats_table(info_dict)
+ info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
+ info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
+ info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
+
+ def format_tmpl(tmpl):
+ mobj = re.match(r'\w+(=?)$', tmpl)
+ if mobj and mobj.group(1):
+ return f'{tmpl[:-1]} = %({tmpl[:-1]})r'
+ elif mobj:
+ return f'%({tmpl})s'
+ return tmpl
+
+ for tmpl in self.params['forceprint'].get(key, []):
+ self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
+
+ for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
+ filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)
+ tmpl = format_tmpl(tmpl)
+ self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
+ if self._ensure_dir_exists(filename):
+ with open(filename, 'a', encoding='utf-8') as f:
+ f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')
+
def __forced_printings(self, info_dict, filename, incomplete):
def print_mandatory(field, actual_field=None):
if actual_field is None:
if info_dict.get('requested_formats') is not None:
# For RTMP URLs, also include the playpath
info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
- elif 'url' in info_dict:
+ elif info_dict.get('url'):
info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
- if self.params.get('forceprint') or self.params.get('forcejson'):
+ if (self.params.get('forcejson')
+ or self.params['forceprint'].get('video')
+ or self.params['print_to_file'].get('video')):
self.post_extract(info_dict)
- for tmpl in self.params.get('forceprint', []):
- mobj = re.match(r'\w+(=?)$', tmpl)
- if mobj and mobj.group(1):
- tmpl = f'{tmpl[:-1]} = %({tmpl[:-1]})s'
- elif mobj:
- tmpl = '%({})s'.format(tmpl)
- self.to_stdout(self.evaluate_outtmpl(tmpl, info_dict))
+ self._forceprint('video', info_dict)
print_mandatory('title')
print_mandatory('id')
if not test:
for ph in self._progress_hooks:
fd.add_progress_hook(ph)
- urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
- self.write_debug('Invoking downloader on "%s"' % urls)
+ urls = '", "'.join(
+ (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
+ for f in info.get('requested_formats', []) or [info])
+ self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"')
# Note: Ideally info should be a deep-copied so that hooks cannot modify it.
# But it may contain objects that are not deep-copyable
new_info['http_headers'] = self._calc_headers(new_info)
return fd.download(name, new_info, subtitle)
- def process_info(self, info_dict):
- """Process a single resolved IE result."""
+ def existing_file(self, filepaths, *, default_overwrite=True):
+ existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
+ if existing_files and not self.params.get('overwrites', default_overwrite):
+ return existing_files[0]
- assert info_dict.get('_type', 'video') == 'video'
-
- max_downloads = self.params.get('max_downloads')
- if max_downloads is not None:
- if self._num_downloads >= int(max_downloads):
- raise MaxDownloadsReached()
+ for file in existing_files:
+ self.report_file_delete(file)
+ os.remove(file)
+ return None
- if info_dict.get('is_live') and not self.params.get('live_from_start'):
- info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
+ def process_info(self, info_dict):
+ """Process a single resolved IE result. (Modifies it in-place)"""
- # TODO: backward compatibility, to be removed
- info_dict['fulltitle'] = info_dict['title']
+ assert info_dict.get('_type', 'video') == 'video'
+ original_infodict = info_dict
if 'format' not in info_dict and 'ext' in info_dict:
info_dict['format'] = info_dict['ext']
+ # This is mostly just for backward compatibility of process_info
+ # As a side-effect, this allows for format-specific filters
if self._match_entry(info_dict) is not None:
+ info_dict['__write_download_archive'] = 'ignore'
return
+ # Does nothing under normal operation - for backward compatibility of process_info
self.post_extract(info_dict)
self._num_downloads += 1
# Forced printings
self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
+ def check_max_downloads():
+ if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):
+ raise MaxDownloadsReached()
+
if self.params.get('simulate'):
- if self.params.get('force_write_download_archive', False):
- self.record_download_archive(info_dict)
- # Do nothing else if in simulate mode
+ info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
+ check_max_downloads()
return
if full_filename is None:
else:
try:
self.to_screen('[info] Writing video annotations to: ' + annofn)
- with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
+ with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
annofile.write(info_dict['annotations'])
except (KeyError, TypeError):
self.report_warning('There are no annotations to write.')
- except (OSError, IOError):
+ except OSError:
self.report_error('Cannot write annotations file: ' + annofn)
return
# Write internet shortcut files
def _write_link_file(link_type):
- if 'webpage_url' not in info_dict:
- self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
- return False
+ url = try_get(info_dict['webpage_url'], iri_to_uri)
+ if not url:
+ self.report_warning(
+ f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
+ return True
linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
if not self._ensure_dir_exists(encodeFilename(linkfn)):
return False
return True
try:
self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
- with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
- newline='\r\n' if link_type == 'url' else '\n') as linkfile:
- template_vars = {'url': iri_to_uri(info_dict['webpage_url'])}
+ with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
+ newline='\r\n' if link_type == 'url' else '\n') as linkfile:
+ template_vars = {'url': url}
if link_type == 'desktop':
template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
- except (OSError, IOError):
+ except OSError:
self.report_error(f'Cannot write internet shortcut {linkfn}')
return False
return True
for link_type, should_write in write_links.items()):
return
- try:
- info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
- except PostProcessingError as err:
- self.report_error('Preprocessing: %s' % str(err))
- return
+ def replace_info_dict(new_info):
+ nonlocal info_dict
+ if new_info == info_dict:
+ return
+ info_dict.clear()
+ info_dict.update(new_info)
+
+ new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
+ replace_info_dict(new_info)
- must_record_download_archive = False
- if self.params.get('skip_download', False):
+ if self.params.get('skip_download'):
info_dict['filepath'] = temp_filename
info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
info_dict['__files_to_move'] = files_to_move
- info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
+ replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
+ info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
else:
# Download
info_dict.setdefault('__postprocessors', [])
try:
- def existing_file(*filepaths):
+ def existing_video_file(*filepaths):
ext = info_dict.get('ext')
- final_ext = self.params.get('final_ext', ext)
- existing_files = []
- for file in orderedSet(filepaths):
- if final_ext != ext:
- converted = replace_extension(file, final_ext, ext)
- if os.path.exists(encodeFilename(converted)):
- existing_files.append(converted)
- if os.path.exists(encodeFilename(file)):
- existing_files.append(file)
-
- if not existing_files or self.params.get('overwrites', False):
- for file in orderedSet(existing_files):
- self.report_file_delete(file)
- os.remove(encodeFilename(file))
- return None
-
- info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
- return existing_files[0]
+ converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
+ file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
+ default_overwrite=False)
+ if file:
+ info_dict['ext'] = os.path.splitext(file)[1][1:]
+ return file
success = True
+ merger, fd = FFmpegMergerPP(self), None
+ if info_dict.get('protocol') or info_dict.get('url'):
+ fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
+ if fd is not FFmpegFD and (
+ info_dict.get('section_start') or info_dict.get('section_end')):
+ msg = ('This format cannot be partially downloaded' if merger.available
+ else 'You have requested downloading the video partially, but ffmpeg is not installed')
+ self.report_error(f'{msg}. Aborting')
+ return
+
if info_dict.get('requested_formats') is not None:
def compatible_formats(formats):
return False
# Check extension
- exts = set(format.get('ext') for format in formats)
+ exts = {format.get('ext') for format in formats}
COMPATIBLE_EXTS = (
- set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
- set(('webm',)),
+ {'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'},
+ {'webm'},
)
for ext_sets in COMPATIBLE_EXTS:
if ext_sets.issuperset(exts):
and info_dict.get('thumbnails')
# check with type instead of pp_key, __name__, or isinstance
# since we dont want any custom PPs to trigger this
- and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
+ and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])): # noqa: E721
info_dict['ext'] = 'mkv'
self.report_warning(
'webm doesn\'t support embedding a thumbnail, mkv will be used')
os.path.splitext(filename)[0]
if filename_real_ext in (old_ext, new_ext)
else filename)
- return '%s.%s' % (filename_wo_ext, ext)
+ return f'{filename_wo_ext}.{ext}'
# Ensure filename always has a correct extension for successful merge
full_filename = correct_ext(full_filename)
temp_filename = correct_ext(temp_filename)
- dl_filename = existing_file(full_filename, temp_filename)
+ dl_filename = existing_video_file(full_filename, temp_filename)
info_dict['__real_download'] = False
downloaded = []
- merger = FFmpegMergerPP(self)
-
- fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
if dl_filename is not None:
self.report_file_already_downloaded(dl_filename)
elif fd:
'while also allowing unplayable formats to be downloaded. '
'The formats won\'t be merged to prevent data corruption.')
elif not merger.available:
- self.report_warning(
- 'You have requested merging of multiple formats but ffmpeg is not installed. '
- 'The formats won\'t be merged.')
+ msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
+ if not self.params.get('ignoreerrors'):
+ self.report_error(f'{msg}. Aborting due to --abort-on-error')
+ return
+ self.report_warning(f'{msg}. The formats won\'t be merged')
if temp_filename == '-':
reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
files_to_move[file] = None
else:
# Just a single file
- dl_filename = existing_file(full_filename, temp_filename)
+ dl_filename = existing_video_file(full_filename, temp_filename)
if dl_filename is None or dl_filename == temp_filename:
# dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
# So we should try to resume the download
except network_exceptions as err:
self.report_error('unable to download video data: %s' % error_to_compat_str(err))
return
- except (OSError, IOError) as err:
+ except OSError as err:
raise UnavailableVideoError(err)
except (ContentTooShortError, ) as err:
- self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
+ self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')
return
+ self._raise_pending_errors(info_dict)
if success and full_filename != '-':
def fixup():
if fixup_policy in ('ignore', 'never'):
return
elif fixup_policy == 'warn':
- do_fixup = False
+ do_fixup = 'warn'
elif fixup_policy != 'force':
assert fixup_policy in ('detect_or_warn', None)
if not info_dict.get('__real_download'):
do_fixup = False
def ffmpeg_fixup(cndn, msg, cls):
- if not cndn:
+ if not (do_fixup and cndn):
return
- if not do_fixup:
+ elif do_fixup == 'warn':
self.report_warning(f'{vid}: {msg}')
return
pp = cls(self)
FFmpegFixupM4aPP)
downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
- downloader = downloader.__name__ if downloader else None
+ downloader = downloader.FD_NAME if downloader else None
if info_dict.get('requested_formats') is None: # Not necessary if doing merger
- ffmpeg_fixup(downloader == 'HlsFD',
+ ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')
+ or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
FFmpegFixupM3u8PP)
ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
- ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
- ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP)
+ ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
+ ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP)
fixup()
try:
- info_dict = self.post_process(dl_filename, info_dict, files_to_move)
+ replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
except PostProcessingError as err:
self.report_error('Postprocessing: %s' % str(err))
return
except Exception as err:
self.report_error('post hooks: %s' % str(err))
return
- must_record_download_archive = True
+ info_dict['__write_download_archive'] = True
- if must_record_download_archive or self.params.get('force_write_download_archive', False):
- self.record_download_archive(info_dict)
- max_downloads = self.params.get('max_downloads')
- if max_downloads is not None and self._num_downloads >= int(max_downloads):
- raise MaxDownloadsReached()
+ assert info_dict is original_infodict # Make sure the info_dict was modified in-place
+ if self.params.get('force_write_download_archive'):
+ info_dict['__write_download_archive'] = True
+ check_max_downloads()
def __download_wrapper(self, func):
@functools.wraps(func)
res = func(*args, **kwargs)
except UnavailableVideoError as e:
self.report_error(e)
- except MaxDownloadsReached as e:
- self.to_screen(f'[info] {e}')
- raise
except DownloadCancelled as e:
self.to_screen(f'[info] {e}')
if not self.params.get('break_per_url'):
if info_dict is None:
return info_dict
info_dict.setdefault('epoch', int(time.time()))
- remove_keys = {'__original_infodict'} # Always remove this since this may contain a copy of the entire dict
- keep_keys = ['_type'] # Always keep this to facilitate load-info-json
+ info_dict.setdefault('_type', 'video')
+
if remove_private_keys:
- remove_keys |= {
- 'requested_formats', 'requested_subtitles', 'requested_entries', 'entries',
- 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',
+ reject = lambda k, v: v is None or k.startswith('__') or k in {
+ 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
+ 'entries', 'filepath', '_filename', 'infojson_filename', 'original_url', 'playlist_autonumber',
}
- empty_values = (None, {}, [], set(), tuple())
- reject = lambda k, v: k not in keep_keys and (
- k.startswith('_') or k in remove_keys or v in empty_values)
else:
- reject = lambda k, v: k in remove_keys
+ reject = lambda k, v: False
def filter_fn(obj):
if isinstance(obj, dict):
''' Alias of sanitize_info for backward compatibility '''
return YoutubeDL.sanitize_info(info_dict, actually_filter)
+ def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):
+ for filename in set(filter(None, files_to_delete)):
+ if msg:
+ self.to_screen(msg % filename)
+ try:
+ os.remove(filename)
+ except OSError:
+ self.report_warning(f'Unable to delete file {filename}')
+ if filename in info.get('__files_to_move', []): # NB: Delete even if None
+ del info['__files_to_move'][filename]
+
+ @staticmethod
+ def post_extract(info_dict):
+ def actual_post_extract(info_dict):
+ if info_dict.get('_type') in ('playlist', 'multi_video'):
+ for video_dict in info_dict.get('entries', {}):
+ actual_post_extract(video_dict or {})
+ return
+
+ post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
+ info_dict.update(post_extractor())
+
+ actual_post_extract(info_dict or {})
+
def run_pp(self, pp, infodict):
files_to_delete = []
if '__files_to_move' not in infodict:
for f in files_to_delete:
infodict['__files_to_move'].setdefault(f, '')
else:
- for old_filename in set(files_to_delete):
- self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
- try:
- os.remove(encodeFilename(old_filename))
- except (IOError, OSError):
- self.report_warning('Unable to remove downloaded original file')
- if old_filename in infodict['__files_to_move']:
- del infodict['__files_to_move'][old_filename]
+ self._delete_downloaded_files(
+ *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
return infodict
- @staticmethod
- def post_extract(info_dict):
- def actual_post_extract(info_dict):
- if info_dict.get('_type') in ('playlist', 'multi_video'):
- for video_dict in info_dict.get('entries', {}):
- actual_post_extract(video_dict or {})
- return
-
- post_extractor = info_dict.get('__post_extractor') or (lambda: {})
- extra = post_extractor().items()
- info_dict.update(extra)
- info_dict.pop('__post_extractor', None)
-
- original_infodict = info_dict.get('__original_infodict') or {}
- original_infodict.update(extra)
- original_infodict.pop('__post_extractor', None)
-
- actual_post_extract(info_dict or {})
+ def run_all_pps(self, key, info, *, additional_pps=None):
+ self._forceprint(key, info)
+ for pp in (additional_pps or []) + self._pps[key]:
+ info = self.run_pp(pp, info)
+ return info
def pre_process(self, ie_info, key='pre_process', files_to_move=None):
info = dict(ie_info)
info['__files_to_move'] = files_to_move or {}
- for pp in self._pps[key]:
- info = self.run_pp(pp, info)
+ try:
+ info = self.run_all_pps(key, info)
+ except PostProcessingError as err:
+ msg = f'Preprocessing: {err}'
+ info.setdefault('__pending_error', msg)
+ self.report_error(msg, is_error=False)
return info, info.pop('__files_to_move', None)
- def post_process(self, filename, ie_info, files_to_move=None):
+ def post_process(self, filename, info, files_to_move=None):
"""Run all the postprocessors on the given file."""
- info = dict(ie_info)
info['filepath'] = filename
info['__files_to_move'] = files_to_move or {}
-
- for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
- info = self.run_pp(pp, info)
+ info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
del info['__files_to_move']
- for pp in self._pps['after_move']:
- info = self.run_pp(pp, info)
- return info
+ return self.run_all_pps('after_move', info)
def _make_archive_id(self, info_dict):
video_id = info_dict.get('id')
break
else:
return
- return '%s %s' % (extractor.lower(), video_id)
+ return f'{extractor.lower()} {video_id}'
def in_download_archive(self, info_dict):
fn = self.params.get('download_archive')
return
vid_id = self._make_archive_id(info_dict)
assert vid_id
+ self.write_debug(f'Adding to archive: {vid_id}')
with locked_file(fn, 'a', encoding='utf-8') as archive_file:
archive_file.write(vid_id + '\n')
self.archive.add(vid_id)
return '%dx?' % format['width']
return default
+ def _list_format_headers(self, *headers):
+ if self.params.get('listformats_table', True) is not False:
+ return [self._format_out(header, self.Styles.HEADERS) for header in headers]
+ return headers
+
def _format_note(self, fdict):
res = ''
if fdict.get('ext') in ['f4f', 'f4m']:
res += '~' + format_bytes(fdict['filesize_approx'])
return res
- def _list_format_headers(self, *headers):
- if self.params.get('listformats_table', True) is not False:
- return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
- return headers
+ def render_formats_table(self, info_dict):
+ if not info_dict.get('formats') and not info_dict.get('url'):
+ return None
- def list_formats(self, info_dict):
formats = info_dict.get('formats', [info_dict])
- new_format = self.params.get('listformats_table', True) is not False
- if new_format:
- delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
- table = [
- [
- self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
- format_field(f, 'ext'),
- format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
- format_field(f, 'fps', '\t%d'),
- format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
- delim,
- format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
- format_field(f, 'tbr', '\t%dk'),
- shorten_protocol_name(f.get('protocol', '').replace('native', 'n')),
- delim,
- format_field(f, 'vcodec', default='unknown').replace(
- 'none',
- 'images' if f.get('acodec') == 'none'
- else self._format_screen('audio only', self.Styles.SUPPRESS)),
- format_field(f, 'vbr', '\t%dk'),
- format_field(f, 'acodec', default='unknown').replace(
- 'none',
- '' if f.get('vcodec') == 'none'
- else self._format_screen('video only', self.Styles.SUPPRESS)),
- format_field(f, 'abr', '\t%dk'),
- format_field(f, 'asr', '\t%dHz'),
- join_nonempty(
- self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
- format_field(f, 'language', '[%s]'),
- join_nonempty(
- format_field(f, 'format_note'),
- format_field(f, 'container', ignore=(None, f.get('ext'))),
- delim=', '),
- delim=' '),
- ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
- header_line = self._list_format_headers(
- 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
- delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
- else:
+ if not self.params.get('listformats_table', True) is not False:
table = [
[
format_field(f, 'format_id'),
format_field(f, 'ext'),
self.format_resolution(f),
- self._format_note(f)]
- for f in formats
- if f.get('preference') is None or f['preference'] >= -1000]
- header_line = ['format code', 'extension', 'resolution', 'note']
-
- self.to_screen(
- '[info] Available formats for %s:' % info_dict['id'])
- self.to_stdout(render_table(
- header_line, table,
- extra_gap=(0 if new_format else 1),
- hide_empty=new_format,
- delim=new_format and self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True)))
-
- def list_thumbnails(self, info_dict):
- thumbnails = list(info_dict.get('thumbnails'))
+ self._format_note(f)
+ ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
+ return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
+
+ delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True)
+ table = [
+ [
+ self._format_out(format_field(f, 'format_id'), self.Styles.ID),
+ format_field(f, 'ext'),
+ format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
+ format_field(f, 'fps', '\t%d'),
+ format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
+ delim,
+ format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
+ format_field(f, 'tbr', '\t%dk'),
+ shorten_protocol_name(f.get('protocol', '')),
+ delim,
+ format_field(f, 'vcodec', default='unknown').replace(
+ 'none', 'images' if f.get('acodec') == 'none'
+ else self._format_out('audio only', self.Styles.SUPPRESS)),
+ format_field(f, 'vbr', '\t%dk'),
+ format_field(f, 'acodec', default='unknown').replace(
+ 'none', '' if f.get('vcodec') == 'none'
+ else self._format_out('video only', self.Styles.SUPPRESS)),
+ format_field(f, 'abr', '\t%dk'),
+ format_field(f, 'asr', '\t%dHz'),
+ join_nonempty(
+ self._format_out('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
+ format_field(f, 'language', '[%s]'),
+ join_nonempty(format_field(f, 'format_note'),
+ format_field(f, 'container', ignore=(None, f.get('ext'))),
+ delim=', '),
+ delim=' '),
+ ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
+ header_line = self._list_format_headers(
+ 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
+ delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
+
+ return render_table(
+ header_line, table, hide_empty=True,
+ delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True))
+
+ def render_thumbnails_table(self, info_dict):
+ thumbnails = list(info_dict.get('thumbnails') or [])
if not thumbnails:
- self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
- return
-
- self.to_screen(
- '[info] Thumbnails for %s:' % info_dict['id'])
- self.to_stdout(render_table(
+ return None
+ return render_table(
self._list_format_headers('ID', 'Width', 'Height', 'URL'),
- [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
-
- def list_subtitles(self, video_id, subtitles, name='subtitles'):
- if not subtitles:
- self.to_screen('%s has no %s' % (video_id, name))
- return
- self.to_screen(
- 'Available %s for %s:' % (name, video_id))
+ [[t.get('id'), t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])
+ def render_subtitles_table(self, video_id, subtitles):
def _row(lang, formats):
exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
if len(set(names)) == 1:
names = [] if names[0] == 'unknown' else names[:1]
return [lang, ', '.join(names), ', '.join(exts)]
- self.to_stdout(render_table(
+ if not subtitles:
+ return None
+ return render_table(
self._list_format_headers('Language', 'Name', 'Formats'),
[_row(lang, formats) for lang, formats in subtitles.items()],
- hide_empty=True))
+ hide_empty=True)
+
+ def __list_table(self, video_id, name, func, *args):
+ table = func(*args)
+ if not table:
+ self.to_screen(f'{video_id} has no {name}')
+ return
+ self.to_screen(f'[info] Available {name} for {video_id}:')
+ self.to_stdout(table)
+
+ def list_formats(self, info_dict):
+ self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
+
+ def list_thumbnails(self, info_dict):
+ self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
+
+ def list_subtitles(self, video_id, subtitles, name='subtitles'):
+ self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
def urlopen(self, req):
""" Start an HTTP download """
- if isinstance(req, compat_basestring):
+ if isinstance(req, str):
req = sanitized_Request(req)
return self._opener.open(req, timeout=self._socket_timeout)
if not self.params.get('verbose'):
return
+ # These imports can be slow. So import them only as needed
+ from .extractor.extractors import _LAZY_LOADER
+ from .extractor.extractors import _PLUGIN_CLASSES as plugin_extractors
+
def get_encoding(stream):
- ret = getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)
+ ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
if not supports_terminal_sequences(stream):
- from .compat import WINDOWS_VT_MODE
+ from .utils import WINDOWS_VT_MODE # Must be imported locally
ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
return ret
- encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
+ encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % (
locale.getpreferredencoding(),
sys.getfilesystemencoding(),
- get_encoding(self._screen_file), get_encoding(self._err_file),
- self.get_encoding())
+ self.get_encoding(),
+ ', '.join(
+ f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_
+ if stream is not None and key != 'console')
+ )
logger = self.params.get('logger')
if logger:
write_debug('Plugins: %s' % [
'%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
- if self.params.get('compat_opts'):
- write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
+ if self.params['compat_opts']:
+ write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts']))
if source == 'source':
try:
- sp = Popen(
+ stdout, _, _ = Popen.run(
['git', 'rev-parse', '--short', 'HEAD'],
- stdout=subprocess.PIPE, stderr=subprocess.PIPE,
- cwd=os.path.dirname(os.path.abspath(__file__)))
- out, err = sp.communicate_or_kill()
- out = out.decode().strip()
- if re.match('[0-9a-f]+', out):
- write_debug('Git HEAD: %s' % out)
+ text=True, cwd=os.path.dirname(os.path.abspath(__file__)),
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ if re.fullmatch('[0-9a-f]+', stdout.strip()):
+ write_debug(f'Git HEAD: {stdout.strip()}')
except Exception:
- try:
+ with contextlib.suppress(Exception):
sys.exc_clear()
- except Exception:
- pass
def python_implementation():
impl_name = platform.python_implementation()
exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
if ffmpeg_features:
- exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features)
+ exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features))
exe_versions['rtmpdump'] = rtmpdump_version()
exe_versions['phantomjs'] = PhantomJSwrapper._version()
) or 'none'
write_debug('exe versions: %s' % exe_str)
- from .downloader.websocket import has_websockets
- from .postprocessor.embedthumbnail import has_mutagen
- from .cookies import SQLITE_AVAILABLE, KEYRING_AVAILABLE
+ from .compat.compat_utils import get_package_info
+ from .dependencies import available_dependencies
- lib_str = join_nonempty(
- compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
- KEYRING_AVAILABLE and 'keyring',
- has_mutagen and 'mutagen',
- SQLITE_AVAILABLE and 'sqlite',
- has_websockets and 'websockets',
- delim=', ') or 'none'
- write_debug('Optional libraries: %s' % lib_str)
+ write_debug('Optional libraries: %s' % (', '.join(sorted({
+ join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
+ })) or 'none'))
+ self._setup_opener()
proxy_map = {}
for handler in self._opener.handlers:
if hasattr(handler, 'proxies'):
# Not implemented
if False and self.params.get('call_home'):
- ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
+ ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode()
write_debug('Public IP address: %s' % ipaddr)
latest_version = self.urlopen(
- 'https://yt-dl.org/latest/version').read().decode('utf-8')
+ 'https://yt-dl.org/latest/version').read().decode()
if version_tuple(latest_version) > version_tuple(__version__):
self.report_warning(
'You are using an outdated version (newest version: %s)! '
latest_version)
def _setup_opener(self):
+ if hasattr(self, '_opener'):
+ return
timeout_val = self.params.get('socket_timeout')
self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
redirect_handler = YoutubeDLRedirectHandler()
- data_handler = compat_urllib_request_DataHandler()
+ data_handler = urllib.request.DataHandler()
# When passing our own FileHandler instance, build_opener won't add the
# default FileHandler and allows us to disable the file protocol, which
return encoding
def _write_info_json(self, label, ie_result, infofn, overwrite=None):
- ''' Write infojson and returns True = written, False = skip, None = error '''
+ ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
if overwrite is None:
overwrite = self.params.get('overwrites', True)
if not self.params.get('writeinfojson'):
return None
elif not overwrite and os.path.exists(infofn):
self.to_screen(f'[info] {label.title()} metadata is already present')
- else:
- self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
- try:
- write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
- except (OSError, IOError):
- self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
- return None
- return True
+ return 'exists'
+
+ self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
+ try:
+ write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
+ return True
+ except OSError:
+ self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
+ return None
def _write_description(self, label, ie_result, descfn):
''' Write description and returns True = written, False = skip, None = error '''
else:
try:
self.to_screen(f'[info] Writing {label} description to: {descfn}')
- with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
+ with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
descfile.write(ie_result['description'])
- except (OSError, IOError):
+ except OSError:
self.report_error(f'Cannot write {label} description file {descfn}')
return None
return True
sub_format = sub_info['ext']
sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
- if not self.params.get('overwrites', True) and os.path.exists(sub_filename):
+ existing_sub = self.existing_file((sub_filename_final, sub_filename))
+ if existing_sub:
self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
- sub_info['filepath'] = sub_filename
- ret.append((sub_filename, sub_filename_final))
+ sub_info['filepath'] = existing_sub
+ ret.append((existing_sub, sub_filename_final))
continue
self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
try:
# Use newline='' to prevent conversion of newline characters
# See https://github.com/ytdl-org/youtube-dl/issues/10268
- with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
+ with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
subfile.write(sub_info['data'])
sub_info['filepath'] = sub_filename
ret.append((sub_filename, sub_filename_final))
continue
- except (OSError, IOError):
+ except OSError:
self.report_error(f'Cannot write video subtitles file {sub_filename}')
return None
self.dl(sub_filename, sub_copy, subtitle=True)
sub_info['filepath'] = sub_filename
ret.append((sub_filename, sub_filename_final))
- except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
- self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
- continue
+ except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
+ msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
+ if self.params.get('ignoreerrors') is not True: # False or 'only_download'
+ if not self.params.get('ignoreerrors'):
+ self.report_error(msg)
+ raise DownloadError(msg)
+ self.report_warning(msg)
return ret
def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
- if not self.params.get('overwrites', True) and os.path.exists(thumb_filename):
- ret.append((thumb_filename, thumb_filename_final))
- t['filepath'] = thumb_filename
+ existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
+ if existing_thumb:
self.to_screen('[info] %s is already present' % (
thumb_display_id if multiple else f'{label} thumbnail').capitalize())
+ t['filepath'] = existing_thumb
+ ret.append((existing_thumb, thumb_filename_final))
else:
self.to_screen(f'[info] Downloading {thumb_display_id} ...')
try:
- uf = self.urlopen(t['url'])
+ uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))
self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
with open(encodeFilename(thumb_filename), 'wb') as thumbf:
shutil.copyfileobj(uf, thumbf)