#!/usr/bin/env python3
-# coding: utf-8
-
-from __future__ import absolute_import, unicode_literals
-
import collections
import contextlib
-import copy
import datetime
import errno
import fileinput
import operator
import os
import platform
+import random
import re
import shutil
import subprocess
import time
import tokenize
import traceback
-import random
import unicodedata
-
-from enum import Enum
+import urllib.request
from string import ascii_letters
+from .cache import Cache
from .compat import (
- compat_basestring,
+ HAS_LEGACY as compat_has_legacy,
compat_get_terminal_size,
- compat_kwargs,
- compat_numeric_types,
compat_os_name,
- compat_pycrypto_AES,
compat_shlex_quote,
compat_str,
- compat_tokenize_tokenize,
compat_urllib_error,
compat_urllib_request,
- compat_urllib_request_DataHandler,
- windows_enable_vt_mode,
)
from .cookies import load_cookies
+from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
+from .downloader.rtmp import rtmpdump_version
+from .extractor import gen_extractor_classes, get_info_extractor
+from .extractor.openload import PhantomJSwrapper
+from .minicurses import format_text
+from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors
+from .postprocessor import (
+ EmbedThumbnailPP,
+ FFmpegFixupDuplicateMoovPP,
+ FFmpegFixupDurationPP,
+ FFmpegFixupM3u8PP,
+ FFmpegFixupM4aPP,
+ FFmpegFixupStretchedPP,
+ FFmpegFixupTimestampPP,
+ FFmpegMergerPP,
+ FFmpegPostProcessor,
+ MoveFilesAfterDownloadPP,
+ get_postprocessor,
+)
+from .update import detect_variant
from .utils import (
+ DEFAULT_OUTTMPL,
+ LINK_TEMPLATES,
+ NO_DEFAULT,
+ NUMBER_RE,
+ OUTTMPL_TYPES,
+ POSTPROCESS_WHEN,
+ STR_FORMAT_RE_TMPL,
+ STR_FORMAT_TYPES,
+ ContentTooShortError,
+ DateRange,
+ DownloadCancelled,
+ DownloadError,
+ EntryNotInPlaylist,
+ ExistingVideoReached,
+ ExtractorError,
+ GeoRestrictedError,
+ HEADRequest,
+ ISO3166Utils,
+ LazyList,
+ MaxDownloadsReached,
+ Namespace,
+ PagedList,
+ PerRequestProxyHandler,
+ PlaylistEntries,
+ Popen,
+ PostProcessingError,
+ ReExtractInfo,
+ RejectedVideoReached,
+ SameFileError,
+ UnavailableVideoError,
+ YoutubeDLCookieProcessor,
+ YoutubeDLHandler,
+ YoutubeDLRedirectHandler,
age_restricted,
args_to_str,
- ContentTooShortError,
date_from_str,
- DateRange,
- DEFAULT_OUTTMPL,
determine_ext,
determine_protocol,
- DownloadError,
encode_compat_str,
encodeFilename,
- EntryNotInPlaylist,
error_to_compat_str,
- ExistingVideoReached,
expand_path,
- ExtractorError,
+ filter_dict,
float_or_none,
format_bytes,
+ format_decimal_suffix,
format_field,
formatSeconds,
- GeoRestrictedError,
- HEADRequest,
+ get_domain,
int_or_none,
iri_to_uri,
- ISO3166Utils,
- LazyList,
- LINK_TEMPLATES,
+ join_nonempty,
locked_file,
make_dir,
make_HTTPS_handler,
- MaxDownloadsReached,
+ merge_headers,
network_exceptions,
number_of_digits,
orderedSet,
- OUTTMPL_TYPES,
- PagedList,
parse_filesize,
- PerRequestProxyHandler,
platform_name,
- Popen,
- PostProcessingError,
preferredencoding,
prepend_extension,
register_socks_protocols,
- RejectedVideoReached,
+ remove_terminal_sequences,
render_table,
replace_extension,
- SameFileError,
sanitize_filename,
sanitize_path,
sanitize_url,
sanitized_Request,
std_headers,
- STR_FORMAT_RE_TMPL,
- STR_FORMAT_TYPES,
str_or_none,
strftime_or_none,
subtitles_filename,
supports_terminal_sequences,
- ThrottledDownload,
+ timetuple_from_msec,
to_high_limit_path,
traverse_obj,
try_get,
- UnavailableVideoError,
url_basename,
variadic,
version_tuple,
+ windows_enable_vt_mode,
write_json_file,
write_string,
- YoutubeDLCookieProcessor,
- YoutubeDLHandler,
- YoutubeDLRedirectHandler,
-)
-from .cache import Cache
-from .minicurses import format_text
-from .extractor import (
- gen_extractor_classes,
- get_info_extractor,
- _LAZY_LOADER,
- _PLUGIN_CLASSES as plugin_extractors
-)
-from .extractor.openload import PhantomJSwrapper
-from .downloader import (
- FFmpegFD,
- get_suitable_downloader,
- shorten_protocol_name
-)
-from .downloader.rtmp import rtmpdump_version
-from .postprocessor import (
- get_postprocessor,
- EmbedThumbnailPP,
- FFmpegFixupDurationPP,
- FFmpegFixupM3u8PP,
- FFmpegFixupM4aPP,
- FFmpegFixupStretchedPP,
- FFmpegFixupTimestampPP,
- FFmpegMergerPP,
- FFmpegPostProcessor,
- MoveFilesAfterDownloadPP,
- _PLUGIN_CLASSES as plugin_postprocessors
)
-from .update import detect_variant
-from .version import __version__
+from .version import RELEASE_GIT_HEAD, __version__
if compat_os_name == 'nt':
import ctypes
-class YoutubeDL(object):
+class YoutubeDL:
"""YoutubeDL class.
YoutubeDL objects are the ones responsible of downloading the
verbose: Print additional info to stdout.
quiet: Do not print messages to stdout.
no_warnings: Do not print out anything for warnings.
- forceprint: A list of templates to force print
- forceurl: Force printing final URL. (Deprecated)
- forcetitle: Force printing title. (Deprecated)
- forceid: Force printing ID. (Deprecated)
- forcethumbnail: Force printing thumbnail URL. (Deprecated)
- forcedescription: Force printing description. (Deprecated)
- forcefilename: Force printing final filename. (Deprecated)
- forceduration: Force printing duration. (Deprecated)
+ forceprint: A dict with keys WHEN mapped to a list of templates to
+ print to stdout. The allowed keys are video or any of the
+ items in utils.POSTPROCESS_WHEN.
+ For compatibility, a single list is also accepted
+ print_to_file: A dict with keys WHEN (same as forceprint) mapped to
+ a list of tuples with (template, filename)
forcejson: Force printing info_dict as JSON.
dump_single_json: Force printing the info_dict of the whole playlist
(or video) as a single JSON line.
simulate: Do not download the video files. If unset (or None),
simulate only if listsubtitles, listformats or list_thumbnails is used
format: Video format code. see "FORMAT SELECTION" for more details.
+ You can also pass a function. The function takes 'ctx' as
+ argument and returns the formats to download.
+ See "build_format_selector" for an implementation
allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
ignore_no_formats_error: Ignore "No video formats" error. Usefull for
extracting metadata even if the video is not actually
available for download (experimental)
- format_sort: How to sort the video formats. see "Sorting Formats"
- for more details.
+ format_sort: A list of fields by which to sort the video formats.
+ See "Sorting Formats" for more details.
format_sort_force: Force the given format_sort. see "Sorting Formats"
for more details.
+ prefer_free_formats: Whether to prefer video formats with free containers
+ over non-free ones of same quality.
allow_multiple_video_streams: Allow multiple video streams to be merged
into a single file
allow_multiple_audio_streams: Allow multiple audio streams to be merged
writedesktoplink: Write a Linux internet shortcut file (.desktop)
writesubtitles: Write the video subtitles to a file
writeautomaticsub: Write the automatically generated subtitles to a file
- allsubtitles: Deprecated - Use subtitleslangs = ['all']
- Downloads all the subtitles of the video
- (requires writesubtitles or writeautomaticsub)
listsubtitles: Lists all available subtitles for the video
subtitlesformat: The format code for subtitles
subtitleslangs: List of languages of the subtitles to download (can be regex).
file that is in the archive.
break_on_reject: Stop the download process when encountering a video that
has been filtered out.
- cookiefile: File name where cookies should be read from and dumped to
- cookiesfrombrowser: A tuple containing the name of the browser and the profile
- name/path from where cookies are loaded.
- Eg: ('chrome', ) or ('vivaldi', 'default')
- nocheckcertificate:Do not verify SSL certificates
+ break_per_url: Whether break_on_reject and break_on_existing
+ should act on each input URL as opposed to for the entire queue
+ cookiefile: File name or text stream from where cookies should be read and dumped to
+ cookiesfrombrowser: A tuple containing the name of the browser, the profile
+ name/pathfrom where cookies are loaded, and the name of the
+ keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
+ legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
+ support RFC 5746 secure renegotiation
+ nocheckcertificate: Do not verify SSL certificates
+ client_certificate: Path to client certificate file in PEM format. May include the private key
+ client_certificate_key: Path to private key file for client certificate
+ client_certificate_password: Password for client certificate private key, if encrypted.
+ If not provided and the key is encrypted, yt-dlp will ask interactively
prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
At the moment, this is only supported by YouTube.
+ http_headers: A dictionary of custom headers to be used for all requests
proxy: URL of the proxy server to use
geo_verification_proxy: URL of the proxy to use for IP address verification
on geo-restricted sites.
bidi_workaround: Work around buggy terminals without bidirectional text
support, using fridibi
debug_printtraffic:Print out sent and received HTTP traffic
- include_ads: Download ads as well
default_search: Prepend this string if an input url is not valid.
'auto' for elaborate guessing
encoding: Use this encoding instead of the system-specified.
extract_flat: Do not resolve URLs, return the immediate result.
Pass in 'in_playlist' to only show this behavior for
playlist items.
+ wait_for_video: If given, wait for scheduled streams to become available.
+ The value should be a tuple containing the range
+ (min_secs, max_secs) to wait between retries
postprocessors: A list of dictionaries, each with an entry
* key: The name of the postprocessor. See
yt_dlp/postprocessor/__init__.py for a list.
- * when: When to run the postprocessor. Can be one of
- pre_process|before_dl|post_process|after_move.
+ * when: When to run the postprocessor. Allowed values are
+ the entries of utils.POSTPROCESS_WHEN
Assumed to be 'post_process' if not given
- post_hooks: Deprecated - Register a custom postprocessor instead
- A list of functions that get called as the final step
- for each video file, after all postprocessors have been
- called. The filename will be passed as the only argument.
progress_hooks: A list of functions that get called on download
progress, with a dictionary with the entries
* status: One of "downloading", "error", or "finished".
(with status "started" and "finished") if the processing is successful.
merge_output_format: Extension to use when merging formats.
final_ext: Expected final extension; used to detect when the file was
- already downloaded and converted. "merge_output_format" is
- replaced by this extension when given
+ already downloaded and converted
fixup: Automatically correct known faults of the file.
One of:
- "never": do nothing
- "detect_or_warn": check whether we can do anything
about it, warn otherwise (default)
source_address: Client-side IP address to bind to.
- call_home: Boolean, true iff we are allowed to contact the
- yt-dlp servers for debugging. (BROKEN)
sleep_interval_requests: Number of seconds to sleep between requests
during extraction
sleep_interval: Number of seconds to sleep before each download when
sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
listformats: Print an overview of available video formats and exit.
list_thumbnails: Print a table of all thumbnails and exit.
- match_filter: A function that gets called with the info_dict of
- every video.
- If it returns a message, the video is ignored.
- If it returns None, the video is downloaded.
+ match_filter: A function that gets called for every video with the signature
+ (info_dict, *, incomplete: bool) -> Optional[str]
+ For backward compatibility with youtube-dl, the signature
+ (info_dict) -> Optional[str] is also allowed.
+ - If it returns a message, the video is ignored.
+ - If it returns None, the video is downloaded.
+ - If it returns utils.NO_DEFAULT, the user is interactively
+ asked whether to download the video.
match_filter_func in utils.py is one example for this.
no_color: Do not emit color codes in output.
geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
geo_bypass_ip_block:
IP range in CIDR notation that will be used similarly to
geo_bypass_country
-
- The following options determine which downloader is picked:
external_downloader: A dictionary of protocol keys and the executable of the
external downloader to use for it. The allowed protocols
are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
Set the value to 'native' to use the native downloader
- hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
- or {'m3u8': 'ffmpeg'} instead.
- Use the native HLS downloader instead of ffmpeg/avconv
- if True, otherwise use ffmpeg/avconv if False, otherwise
- use downloader suggested by extractor if None.
compat_opts: Compatibility options. See "Differences in default behavior".
The following options do not work when used through the API:
filename, abort-on-error, multistreams, no-live-chat, format-sort
- no-clean-infojson, no-playlist-metafiles, no-keep-subs.
+ no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
Refer __init__.py for their implementation
progress_template: Dictionary of templates for progress outputs.
Allowed keys are 'download', 'postprocess',
'download-title' (console title) and 'postprocess-title'.
The template is mapped on a dictionary with keys 'progress' and 'info'
+ retry_sleep_functions: Dictionary of functions that takes the number of attempts
+ as argument and returns the time to sleep in seconds.
+ Allowed keys are 'http', 'fragment', 'file_access'
+ download_ranges: A function that gets called for every video with the signature
+ (info_dict, *, ydl) -> Iterable[Section].
+ Only the returned sections will be downloaded. Each Section contains:
+ * start_time: Start time of the section in seconds
+ * end_time: End time of the section in seconds
+ * title: Section title (Optional)
+ * index: Section number (Optional)
The following parameters are not used by YoutubeDL itself, they are used by
the downloader (see yt_dlp/downloader/common.py):
nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
- max_filesize, test, noresizebuffer, retries, fragment_retries, continuedl,
- noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
- external_downloader_args.
+ max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
+ continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
+ external_downloader_args, concurrent_fragment_downloads.
The following options are used by the post processors:
- prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
- otherwise prefer ffmpeg. (avconv support is deprecated)
ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
to the binary or its containing directory.
postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
extractor_args: A dictionary of arguments to be passed to the extractors.
See "EXTRACTOR ARGUMENTS" for details.
Eg: {'youtube': {'skip': ['dash', 'hls']}}
- youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
+ mark_watched: Mark videos watched (even with --simulate). Only for YouTube
+
+ The following options are deprecated and may be removed in the future:
+
+ forceurl: - Use forceprint
+ Force printing final URL.
+ forcetitle: - Use forceprint
+ Force printing title.
+ forceid: - Use forceprint
+ Force printing ID.
+ forcethumbnail: - Use forceprint
+ Force printing thumbnail URL.
+ forcedescription: - Use forceprint
+ Force printing description.
+ forcefilename: - Use forceprint
+ Force printing final filename.
+ forceduration: - Use forceprint
+ Force printing duration.
+ allsubtitles: - Use subtitleslangs = ['all']
+ Downloads all the subtitles of the video
+ (requires writesubtitles or writeautomaticsub)
+ include_ads: - Doesn't work
+ Download ads as well
+ call_home: - Not implemented
+ Boolean, true iff we are allowed to contact the
+ yt-dlp servers for debugging.
+ post_hooks: - Register a custom postprocessor
+ A list of functions that get called as the final step
+ for each video file, after all postprocessors have been
+ called. The filename will be passed as the only argument.
+ hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
+ Use the native HLS downloader instead of ffmpeg/avconv
+ if True, otherwise use ffmpeg/avconv if False, otherwise
+ use downloader suggested by extractor if None.
+ prefer_ffmpeg: - avconv support is deprecated
+ If False, use avconv instead of ffmpeg if both are available,
+ otherwise prefer ffmpeg.
+ youtube_include_dash_manifest: - Use extractor_args
If True (default), DASH manifests and related
data will be downloaded and processed by extractor.
You can reduce network I/O by disabling it if you don't
care about DASH. (only for youtube)
- youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
+ youtube_include_hls_manifest: - Use extractor_args
If True (default), HLS manifests and related
data will be downloaded and processed by extractor.
You can reduce network I/O by disabling it if you don't
care about HLS. (only for youtube)
"""
- _NUMERIC_FIELDS = set((
+ _NUMERIC_FIELDS = {
'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
'timestamp', 'release_timestamp',
'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
'start_time', 'end_time',
'chapter_number', 'season_number', 'episode_number',
'track_number', 'disc_number', 'release_year',
- ))
+ }
+ _format_fields = {
+ # NB: Keep in sync with the docstring of extractor/common.py
+ 'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
+ 'width', 'height', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr',
+ 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx',
+ 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
+ 'preference', 'language', 'language_preference', 'quality', 'source_preference',
+ 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options',
+ 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
+ }
_format_selection_exts = {
'audio': {'m4a', 'mp3', 'ogg', 'aac'},
'video': {'mp4', 'flv', 'webm', '3gp'},
'storyboards': {'mhtml'},
}
- params = None
- _ies = {}
- _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
- _printed_messages = set()
- _first_webpage_request = True
- _download_retcode = None
- _num_downloads = None
- _playlist_level = 0
- _playlist_urls = set()
- _screen_file = None
-
def __init__(self, params=None, auto_init=True):
"""Create a FileDownloader object with the given options.
@param auto_init Whether to load the default extractors and print header (if verbose).
"""
if params is None:
params = {}
+ self.params = params
self._ies = {}
self._ies_instances = {}
- self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
+ self._pps = {k: [] for k in POSTPROCESS_WHEN}
self._printed_messages = set()
self._first_webpage_request = True
self._post_hooks = []
self._postprocessor_hooks = []
self._download_retcode = 0
self._num_downloads = 0
- self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
- self._err_file = sys.stderr
- self.params = params
+ self._num_videos = 0
+ self._playlist_level = 0
+ self._playlist_urls = set()
self.cache = Cache(self)
windows_enable_vt_mode()
- # FIXME: This will break if we ever print color to stdout
- self._allow_colors = {
- 'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file),
- 'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file),
- }
+ stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout
+ self._out_files = Namespace(
+ out=stdout,
+ error=sys.stderr,
+ screen=sys.stderr if self.params.get('quiet') else stdout,
+ console=None if compat_os_name == 'nt' else next(
+ filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
+ )
+ self._allow_colors = Namespace(**{
+ type_: not self.params.get('no_color') and supports_terminal_sequences(stream)
+ for type_, stream in self._out_files.items_ if type_ != 'console'
+ })
if sys.version_info < (3, 6):
self.report_warning(
def check_deprecated(param, option, suggestion):
if self.params.get(param) is not None:
- self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
+ self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
return True
return False
for msg in self.params.get('_warnings', []):
self.report_warning(msg)
+ for msg in self.params.get('_deprecation_warnings', []):
+ self.deprecation_warning(msg)
- if 'list-formats' in self.params.get('compat_opts', []):
+ self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
+ if not compat_has_legacy:
+ self.params['compat_opts'].add('no-compat-legacy')
+ if 'list-formats' in self.params['compat_opts']:
self.params['listformats_table'] = False
if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
else:
self.params['nooverwrites'] = not self.params['overwrites']
- if params.get('bidi_workaround', False):
+ self.params.setdefault('forceprint', {})
+ self.params.setdefault('print_to_file', {})
+
+ # Compatibility with older syntax
+ if not isinstance(params['forceprint'], dict):
+ self.params['forceprint'] = {'video': params['forceprint']}
+
+ if self.params.get('bidi_workaround', False):
try:
import pty
master, slave = pty.openpty()
width = compat_get_terminal_size().columns
- if width is None:
- width_args = []
- else:
- width_args = ['-w', str(width)]
- sp_kwargs = dict(
- stdin=subprocess.PIPE,
- stdout=slave,
- stderr=self._err_file)
+ width_args = [] if width is None else ['-w', str(width)]
+ sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
try:
self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
except OSError:
else:
raise
+ if auto_init:
+ if auto_init != 'no_verbose_header':
+ self.print_debug_header()
+ self.add_default_info_extractors()
+
if (sys.platform != 'win32'
and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
- and not params.get('restrictfilenames', False)):
+ and not self.params.get('restrictfilenames', False)):
# Unicode filesystem API will throw errors (#1474, #13027)
self.report_warning(
'Assuming --restrict-filenames since file system encoding '
# Creating format selector here allows us to catch syntax errors before the extraction
self.format_selector = (
- None if self.params.get('format') is None
+ self.params.get('format') if self.params.get('format') in (None, '-')
+ else self.params['format'] if callable(self.params['format'])
else self.build_format_selector(self.params['format']))
- self._setup_opener()
+ # Set http_headers defaults according to std_headers
+ self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))
- if auto_init:
- if auto_init != 'no_verbose_header':
- self.print_debug_header()
- self.add_default_info_extractors()
+ hooks = {
+ 'post_hooks': self.add_post_hook,
+ 'progress_hooks': self.add_progress_hook,
+ 'postprocessor_hooks': self.add_postprocessor_hook,
+ }
+ for opt, fn in hooks.items():
+ for ph in self.params.get(opt, []):
+ fn(ph)
for pp_def_raw in self.params.get('postprocessors', []):
pp_def = dict(pp_def_raw)
when = pp_def.pop('when', 'post_process')
- pp_class = get_postprocessor(pp_def.pop('key'))
- pp = pp_class(self, **compat_kwargs(pp_def))
- self.add_post_processor(pp, when=when)
-
- for ph in self.params.get('post_hooks', []):
- self.add_post_hook(ph)
-
- for ph in self.params.get('progress_hooks', []):
- self.add_progress_hook(ph)
+ self.add_post_processor(
+ get_postprocessor(pp_def.pop('key'))(self, **pp_def),
+ when=when)
+ self._setup_opener()
register_socks_protocols()
def preload_download_archive(fn):
with locked_file(fn, 'r', encoding='utf-8') as archive_file:
for line in archive_file:
self.archive.add(line.strip())
- except IOError as ioe:
+ except OSError as ioe:
if ioe.errno != errno.ENOENT:
raise
return False
def add_postprocessor_hook(self, ph):
"""Add the postprocessing progress hook"""
self._postprocessor_hooks.append(ph)
+ for pps in self._pps.values():
+ for pp in pps:
+ pp.add_progress_hook(ph)
def _bidi_workaround(self, message):
if not hasattr(self, '_output_channel'):
assert hasattr(self, '_output_process')
assert isinstance(message, compat_str)
line_count = message.count('\n') + 1
- self._output_process.stdin.write((message + '\n').encode('utf-8'))
+ self._output_process.stdin.write((message + '\n').encode())
self._output_process.stdin.flush()
- res = ''.join(self._output_channel.readline().decode('utf-8')
+ res = ''.join(self._output_channel.readline().decode()
for _ in range(line_count))
return res[:-len('\n')]
self._printed_messages.add(message)
write_string(message, out=out, encoding=self.params.get('encoding'))
- def to_stdout(self, message, skip_eol=False, quiet=False):
+ def to_stdout(self, message, skip_eol=False, quiet=None):
"""Print message to stdout"""
+ if quiet is not None:
+ self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. Use "YoutubeDL.to_screen" instead')
+ if skip_eol is not False:
+ self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. Use "YoutubeDL.to_screen" instead')
+ self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)
+
+ def to_screen(self, message, skip_eol=False, quiet=None):
+ """Print message to screen if not in quiet mode"""
if self.params.get('logger'):
self.params['logger'].debug(message)
- elif not quiet or self.params.get('verbose'):
- self._write_string(
- '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
- self._err_file if quiet else self._screen_file)
+ return
+ if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
+ return
+ self._write_string(
+ '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
+ self._out_files.screen)
def to_stderr(self, message, only_once=False):
"""Print message to stderr"""
if self.params.get('logger'):
self.params['logger'].error(message)
else:
- self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
+ self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)
+
+ def _send_console_code(self, code):
+ if compat_os_name == 'nt' or not self._out_files.console:
+ return
+ self._write_string(code, self._out_files.console)
def to_console_title(self, message):
if not self.params.get('consoletitle', False):
return
+ message = remove_terminal_sequences(message)
if compat_os_name == 'nt':
if ctypes.windll.kernel32.GetConsoleWindow():
# c_wchar_p() might not be necessary if `message` is
# already of type unicode()
ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
- elif 'TERM' in os.environ:
- self._write_string('\033]0;%s\007' % message, self._screen_file)
+ else:
+ self._send_console_code(f'\033]0;{message}\007')
def save_console_title(self):
- if not self.params.get('consoletitle', False):
- return
- if self.params.get('simulate'):
+ if not self.params.get('consoletitle') or self.params.get('simulate'):
return
- if compat_os_name != 'nt' and 'TERM' in os.environ:
- # Save the title on stack
- self._write_string('\033[22;0t', self._screen_file)
+ self._send_console_code('\033[22;0t') # Save the title on stack
def restore_console_title(self):
- if not self.params.get('consoletitle', False):
- return
- if self.params.get('simulate'):
+ if not self.params.get('consoletitle') or self.params.get('simulate'):
return
- if compat_os_name != 'nt' and 'TERM' in os.environ:
- # Restore the title from stack
- self._write_string('\033[23;0t', self._screen_file)
+ self._send_console_code('\033[23;0t') # Restore the title from stack
def __enter__(self):
self.save_console_title()
if self.params.get('cookiefile') is not None:
self.cookiejar.save(ignore_discard=True, ignore_expires=True)
- def trouble(self, message=None, tb=None):
+ def trouble(self, message=None, tb=None, is_error=True):
"""Determine action to take when a download problem appears.
Depending on if the downloader has been configured to ignore
download errors or not, this method may throw an exception or
not when errors are found, after printing the message.
- tb, if given, is additional traceback information.
+ @param tb If given, is additional traceback information
+ @param is_error Whether to raise error according to ignorerrors
"""
if message is not None:
self.to_stderr(message)
tb = ''.join(tb_data)
if tb:
self.to_stderr(tb)
+ if not is_error:
+ return
if not self.params.get('ignoreerrors'):
if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
exc_info = sys.exc_info()[1].exc_info
raise DownloadError(message, exc_info)
self._download_retcode = 1
- def to_screen(self, message, skip_eol=False):
- """Print message to stdout if not in quiet mode"""
- self.to_stdout(
- message, skip_eol, quiet=self.params.get('quiet', False))
-
- class Styles(Enum):
- HEADERS = 'yellow'
- EMPHASIS = 'blue'
- ID = 'green'
- DELIM = 'blue'
- ERROR = 'red'
- WARNING = 'yellow'
-
- def __format_text(self, out, text, f, fallback=None, *, test_encoding=False):
- assert out in ('screen', 'err')
+ Styles = Namespace(
+ HEADERS='yellow',
+ EMPHASIS='light blue',
+ FILENAME='green',
+ ID='green',
+ DELIM='blue',
+ ERROR='red',
+ WARNING='yellow',
+ SUPPRESS='light black',
+ )
+
+ def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
+ text = str(text)
if test_encoding:
original_text = text
- handle = self._screen_file if out == 'screen' else self._err_file
- encoding = self.params.get('encoding') or getattr(handle, 'encoding', 'ascii')
+ # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
+ encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
text = text.encode(encoding, 'ignore').decode(encoding)
if fallback is not None and text != original_text:
text = fallback
- if isinstance(f, self.Styles):
- f = f._value_
- return format_text(text, f) if self._allow_colors[out] else text if fallback is None else fallback
+ return format_text(text, f) if allow_colors else text if fallback is None else fallback
+
+ def _format_out(self, *args, **kwargs):
+ return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)
def _format_screen(self, *args, **kwargs):
- return self.__format_text('screen', *args, **kwargs)
+ return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs)
def _format_err(self, *args, **kwargs):
- return self.__format_text('err', *args, **kwargs)
+ return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)
def report_warning(self, message, only_once=False):
'''
return
self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
- def report_error(self, message, tb=None):
+ def deprecation_warning(self, message):
+ if self.params.get('logger') is not None:
+ self.params['logger'].warning(f'DeprecationWarning: {message}')
+ else:
+ self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
+
+ def report_error(self, message, *args, **kwargs):
'''
Do the same as trouble, but prefixes the message with 'ERROR:', colored
in red if stderr is a tty file.
'''
- self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', tb)
+ self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
def write_debug(self, message, only_once=False):
'''Log debug message or Print message to stderr'''
if not self.params.get('verbose', False):
return
- message = '[debug] %s' % message
+ message = f'[debug] {message}'
if self.params.get('logger'):
self.params['logger'].debug(message)
else:
except UnicodeEncodeError:
self.to_screen('Deleting existing file')
- def raise_no_formats(self, info, forced=False):
- has_drm = info.get('__has_drm')
- msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
- expected = self.params.get('ignore_no_formats_error')
- if forced or not expected:
+ def raise_no_formats(self, info, forced=False, *, msg=None):
+ has_drm = info.get('_has_drm')
+ ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
+ msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
+ if forced or not ignored:
raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
- expected=has_drm or expected)
+ expected=has_drm or ignored or expected)
else:
self.report_warning(msg)
outtmpl_dict.update({
k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
if outtmpl_dict.get(k) is None})
- for key, val in outtmpl_dict.items():
+ for _, val in outtmpl_dict.items():
if isinstance(val, bytes):
- self.report_warning(
- 'Parameter outtmpl is bytes, but should be a unicode string. '
- 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
+ self.report_warning('Parameter outtmpl is bytes, but should be a unicode string')
return outtmpl_dict
def get_output_path(self, dir_type='', filename=None):
expand_path(paths.get('home', '').strip()),
expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
filename or '')
-
- # Temporary fix for #4787
- # 'Treat' all problem characters by passing filename through preferredencoding
- # to workaround encoding issues with subprocess on python2 @ Windows
- if sys.version_info < (3, 0) and sys.platform == 'win32':
- path = encodeFilename(path, True).decode(preferredencoding())
return sanitize_path(path, force=self.params.get('windowsfilenames'))
@staticmethod
# '%%' intact for template dict substitution step. Working around
# with boundary-alike separator hack.
sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
- outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
+ outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')
# outtmpl should be expand_path'ed before template dict substitution
# because meta fields may contain env variables we don't want to
def validate_outtmpl(cls, outtmpl):
''' @return None or Exception object '''
outtmpl = re.sub(
- STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBU]'),
+ STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBUDS]'),
lambda mobj: f'{mobj.group(0)[:-1]}s',
cls._outtmpl_expandpath(outtmpl))
try:
@staticmethod
def _copy_infodict(info_dict):
info_dict = dict(info_dict)
- for key in ('__original_infodict', '__postprocessors'):
- info_dict.pop(key, None)
+ info_dict.pop('__postprocessors', None)
+ info_dict.pop('__pending_error', None)
return info_dict
- def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
- """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict """
+ def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
+ """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
+ @param sanitize Whether to sanitize the output as a filename.
+ For backward compatibility, a function can also be passed
+ """
+
info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
info_dict = self._copy_infodict(info_dict)
formatSeconds(info_dict['duration'], '-' if sanitize else ':')
if info_dict.get('duration', None) is not None
else None)
- info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
+ info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
+ info_dict['video_autonumber'] = self._num_videos
if info_dict.get('resolution') is None:
info_dict['resolution'] = self.format_resolution(info_dict, default=None)
# For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
# of %(field)s to %(field)0Nd for backward compatibility
field_size_compat_map = {
- 'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0),
+ 'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),
'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
'autonumber': self.params.get('autonumber_size') or 5,
}
TMPL_DICT = {}
- EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBU]'))
+ EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBUDS]'))
MATH_FUNCTIONS = {
'+': float.__add__,
'-': float.__sub__,
# Field is of the form key1.key2...
# where keys (except first) can be string, int or slice
FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
- MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
+ MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
- INTERNAL_FORMAT_RE = re.compile(r'''(?x)
+ INTERNAL_FORMAT_RE = re.compile(rf'''(?x)
(?P<negate>-)?
- (?P<fields>{field})
- (?P<maths>(?:{math_op}{math_field})*)
+ (?P<fields>{FIELD_RE})
+ (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
(?:>(?P<strf_format>.+?))?
- (?P<alternate>(?<!\\),[^|)]+)?
- (?:\|(?P<default>.*?))?
- $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
+ (?P<remaining>
+ (?P<alternate>(?<!\\),[^|&)]+)?
+ (?:&(?P<replacement>.*?))?
+ (?:\|(?P<default>.*?))?
+ )$''')
def _traverse_infodict(k):
k = k.split('.')
na = self.params.get('outtmpl_na_placeholder', 'NA')
+ def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
+ return sanitize_filename(str(value), restricted=restricted, is_id=(
+ bool(re.search(r'(^|[_.])id(\.|$)', key))
+ if 'filename-sanitization' in self.params['compat_opts']
+ else NO_DEFAULT))
+
+ sanitizer = sanitize if callable(sanitize) else filename_sanitizer
+ sanitize = bool(sanitize)
+
def _dumpjson_default(obj):
if isinstance(obj, (set, LazyList)):
return list(obj)
- raise TypeError(f'Object of type {type(obj).__name__} is not JSON serializable')
+ return repr(obj)
def create_key(outer_mobj):
if not outer_mobj.group('has_key'):
return outer_mobj.group(0)
key = outer_mobj.group('key')
mobj = re.match(INTERNAL_FORMAT_RE, key)
- initial_field = mobj.group('fields').split('.')[-1] if mobj else ''
- value, default = None, na
+ initial_field = mobj.group('fields') if mobj else ''
+ value, replacement, default = None, None, na
while mobj:
mobj = mobj.groupdict()
default = mobj['default'] if mobj['default'] is not None else default
value = get_value(mobj)
+ replacement = mobj['replacement']
if value is None and mobj['alternate']:
- mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
+ mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
else:
break
fmt = outer_mobj.group('format')
if fmt == 's' and value is not None and key in field_size_compat_map.keys():
- fmt = '0{:d}d'.format(field_size_compat_map[key])
+ fmt = f'0{field_size_compat_map[key]:d}d'
- value = default if value is None else value
+ value = default if value is None else value if replacement is None else replacement
+ flags = outer_mobj.group('conversion') or ''
str_fmt = f'{fmt[:-1]}s'
if fmt[-1] == 'l': # list
- delim = '\n' if '#' in (outer_mobj.group('conversion') or '') else ', '
- value, fmt = delim.join(variadic(value)), str_fmt
+ delim = '\n' if '#' in flags else ', '
+ value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
elif fmt[-1] == 'j': # json
- value, fmt = json.dumps(value, default=_dumpjson_default), str_fmt
+ value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
elif fmt[-1] == 'q': # quoted
- value, fmt = compat_shlex_quote(str(value)), str_fmt
+ value = map(str, variadic(value) if '#' in flags else [value])
+ value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
elif fmt[-1] == 'B': # bytes
- value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
+ value = f'%{str_fmt}'.encode() % str(value).encode()
value, fmt = value.decode('utf-8', 'ignore'), 's'
elif fmt[-1] == 'U': # unicode normalized
- opts = outer_mobj.group('conversion') or ''
value, fmt = unicodedata.normalize(
# "+" = compatibility equivalence, "#" = NFD
- 'NF%s%s' % ('K' if '+' in opts else '', 'D' if '#' in opts else 'C'),
+ 'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
value), str_fmt
+ elif fmt[-1] == 'D': # decimal suffix
+ num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
+ value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
+ factor=1024 if '#' in flags else 1000)
+ elif fmt[-1] == 'S': # filename sanitization
+ value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
elif fmt[-1] == 'c':
if value:
value = str(value)[0]
# So we convert it to repr first
value, fmt = repr(value), str_fmt
if fmt[-1] in 'csr':
- value = sanitize(initial_field, value)
+ value = sanitizer(initial_field, value)
key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
TMPL_DICT[key] = value
outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
return self.escape_outtmpl(outtmpl) % info_dict
- def _prepare_filename(self, info_dict, tmpl_type='default'):
+ def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
+ assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
+ if outtmpl is None:
+ outtmpl = self.outtmpl_dict.get(tmpl_type or 'default', self.outtmpl_dict['default'])
try:
- sanitize = lambda k, v: sanitize_filename(
- compat_str(v),
- restricted=self.params.get('restrictfilenames'),
- is_id=(k == 'id' or k.endswith('_id')))
- outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
- filename = self.evaluate_outtmpl(outtmpl, info_dict, sanitize)
+ outtmpl = self._outtmpl_expandpath(outtmpl)
+ filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
+ if not filename:
+ return None
- force_ext = OUTTMPL_TYPES.get(tmpl_type)
- if filename and force_ext is not None:
- filename = replace_extension(filename, force_ext, info_dict.get('ext'))
+ if tmpl_type in ('', 'temp'):
+ final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
+ if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
+ filename = replace_extension(filename, ext, final_ext)
+ elif tmpl_type:
+ force_ext = OUTTMPL_TYPES[tmpl_type]
+ if force_ext:
+ filename = replace_extension(filename, force_ext, info_dict.get('ext'))
# https://github.com/blackjack4494/youtube-dlc/issues/85
trim_file_name = self.params.get('trim_file_name', False)
if trim_file_name:
- fn_groups = filename.rsplit('.')
- ext = fn_groups[-1]
- sub_ext = ''
- if len(fn_groups) > 2:
- sub_ext = fn_groups[-2]
- filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
+ no_ext, *ext = filename.rsplit('.', 2)
+ filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
return filename
except ValueError as err:
self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
return None
- def prepare_filename(self, info_dict, dir_type='', warn=False):
- """Generate the output filename."""
-
- filename = self._prepare_filename(info_dict, dir_type or 'default')
+ def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):
+ """Generate the output filename"""
+ if outtmpl:
+ assert not dir_type, 'outtmpl and dir_type are mutually exclusive'
+ dir_type = None
+ filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)
if not filename and dir_type not in ('', 'temp'):
return ''
if date is not None:
dateRange = self.params.get('daterange', DateRange())
if date not in dateRange:
- return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
+ return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'
view_count = info_dict.get('view_count')
if view_count is not None:
min_views = self.params.get('min_views')
except TypeError:
# For backward compatibility
ret = None if incomplete else match_filter(info_dict)
- if ret is not None:
+ if ret is NO_DEFAULT:
+ while True:
+ filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)
+ reply = input(self._format_screen(
+ f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()
+ if reply in {'y', ''}:
+ return None
+ elif reply == 'n':
+ return f'Skipping {video_title}'
+ elif ret is not None:
return ret
return None
temp_id = ie.get_temp_id(url)
if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
- self.to_screen("[%s] %s: has already been recorded in archive" % (
- ie_key, temp_id))
+ self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
+ if self.params.get('break_on_existing', False):
+ raise ExistingVideoReached()
break
return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
else:
self.report_error('no suitable InfoExtractor for URL %s' % url)
- def __handle_extraction_exceptions(func):
+ def _handle_extraction_exceptions(func):
@functools.wraps(func)
def wrapper(self, *args, **kwargs):
- try:
- return func(self, *args, **kwargs)
- except GeoRestrictedError as e:
- msg = e.msg
- if e.countries:
- msg += '\nThis video is available in %s.' % ', '.join(
- map(ISO3166Utils.short2full, e.countries))
- msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
- self.report_error(msg)
- except ExtractorError as e: # An error we somewhat expected
- self.report_error(compat_str(e), e.format_traceback())
- except ThrottledDownload:
- self.to_stderr('\r')
- self.report_warning('The download speed is below throttle limit. Re-extracting data')
- return wrapper(self, *args, **kwargs)
- except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached, LazyList.IndexError):
- raise
- except Exception as e:
- if self.params.get('ignoreerrors'):
- self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
- else:
+ while True:
+ try:
+ return func(self, *args, **kwargs)
+ except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
raise
+ except ReExtractInfo as e:
+ if e.expected:
+ self.to_screen(f'{e}; Re-extracting data')
+ else:
+ self.to_stderr('\r')
+ self.report_warning(f'{e}; Re-extracting data')
+ continue
+ except GeoRestrictedError as e:
+ msg = e.msg
+ if e.countries:
+ msg += '\nThis video is available in %s.' % ', '.join(
+ map(ISO3166Utils.short2full, e.countries))
+ msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
+ self.report_error(msg)
+ except ExtractorError as e: # An error we somewhat expected
+ self.report_error(str(e), e.format_traceback())
+ except Exception as e:
+ if self.params.get('ignoreerrors'):
+ self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
+ else:
+ raise
+ break
return wrapper
- @__handle_extraction_exceptions
+ def _wait_for_video(self, ie_result):
+ if (not self.params.get('wait_for_video')
+ or ie_result.get('_type', 'video') != 'video'
+ or ie_result.get('formats') or ie_result.get('url')):
+ return
+
+ format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
+ last_msg = ''
+
+ def progress(msg):
+ nonlocal last_msg
+ self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
+ last_msg = msg
+
+ min_wait, max_wait = self.params.get('wait_for_video')
+ diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
+ if diff is None and ie_result.get('live_status') == 'is_upcoming':
+ diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
+ self.report_warning('Release time of video is not known')
+ elif (diff or 0) <= 0:
+ self.report_warning('Video should already be available according to extracted info')
+ diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
+ self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
+
+ wait_till = time.time() + diff
+ try:
+ while True:
+ diff = wait_till - time.time()
+ if diff <= 0:
+ progress('')
+ raise ReExtractInfo('[wait] Wait period ended', expected=True)
+ progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
+ time.sleep(1)
+ except KeyboardInterrupt:
+ progress('')
+ raise ReExtractInfo('[wait] Interrupted by user', expected=True)
+ except BaseException as e:
+ if not isinstance(e, ReExtractInfo):
+ self.to_screen('')
+ raise
+
+ @_handle_extraction_exceptions
def __extract_info(self, url, ie, download, extra_info, process):
ie_result = ie.extract(url)
if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
ie_result.setdefault('original_url', extra_info['original_url'])
self.add_default_extra_info(ie_result, ie, url)
if process:
+ self._wait_for_video(ie_result)
return self.process_ie_result(ie_result, download, extra_info)
else:
return ie_result
self.add_extra_info(ie_result, {
'webpage_url': url,
'original_url': url,
- 'webpage_url_basename': url_basename(url),
+ })
+ webpage_url = ie_result.get('webpage_url')
+ if webpage_url:
+ self.add_extra_info(ie_result, {
+ 'webpage_url_basename': url_basename(webpage_url),
+ 'webpage_url_domain': get_domain(webpage_url),
})
if ie is not None:
self.add_extra_info(ie_result, {
info_copy['id'] = ie.get_temp_id(ie_result['url'])
self.add_default_extra_info(info_copy, ie, ie_result['url'])
self.add_extra_info(info_copy, extra_info)
+ info_copy, _ = self.pre_process(info_copy)
self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
+ self._raise_pending_errors(info_copy)
if self.params.get('force_write_download_archive', False):
self.record_download_archive(info_copy)
return ie_result
if result_type == 'video':
self.add_extra_info(ie_result, extra_info)
ie_result = self.process_video_result(ie_result, download=download)
+ self._raise_pending_errors(ie_result)
additional_urls = (ie_result or {}).get('additional_urls')
if additional_urls:
# TODO: Improve MetadataParserPP to allow setting a list
self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
ie_result['additional_entries'] = [
self.extract_info(
- url, download, extra_info,
+ url, download, extra_info=extra_info,
force_generic_extractor=self.params.get('force_generic_extractor'))
for url in additional_urls
]
if not info:
return info
- force_properties = dict(
- (k, v) for k, v in ie_result.items() if v is not None)
- for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
- if f in force_properties:
- del force_properties[f]
new_result = info.copy()
- new_result.update(force_properties)
+ new_result.update(filter_dict(ie_result, lambda k, v: (
+ v is not None and k not in {'_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'})))
# Extracted info may not be a video result (i.e.
# info.get('_type', 'video') != video) but rather an url or
self._playlist_level += 1
self._playlist_urls.add(webpage_url)
+ self._fill_common_fields(ie_result, False)
self._sanitize_thumbnails(ie_result)
try:
return self.__process_playlist(ie_result, download)
'extractor': ie_result['extractor'],
'webpage_url': ie_result['webpage_url'],
'webpage_url_basename': url_basename(ie_result['webpage_url']),
+ 'webpage_url_domain': get_domain(ie_result['webpage_url']),
'extractor_key': ie_result['extractor_key'],
})
return r
def _ensure_dir_exists(self, path):
return make_dir(path, self.report_error)
- def __process_playlist(self, ie_result, download):
- # We process each entry in the playlist
- playlist = ie_result.get('title') or ie_result.get('id')
- self.to_screen('[download] Downloading playlist: %s' % playlist)
-
- if 'entries' not in ie_result:
- raise EntryNotInPlaylist()
- incomplete_entries = bool(ie_result.get('requested_entries'))
- if incomplete_entries:
- def fill_missing_entries(entries, indexes):
- ret = [None] * max(*indexes)
- for i, entry in zip(indexes, entries):
- ret[i - 1] = entry
- return ret
- ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
-
- playlist_results = []
-
- playliststart = self.params.get('playliststart', 1)
- playlistend = self.params.get('playlistend')
- # For backwards compatibility, interpret -1 as whole list
- if playlistend == -1:
- playlistend = None
-
- playlistitems_str = self.params.get('playlist_items')
- playlistitems = None
- if playlistitems_str is not None:
- def iter_playlistitems(format):
- for string_segment in format.split(','):
- if '-' in string_segment:
- start, end = string_segment.split('-')
- for item in range(int(start), int(end) + 1):
- yield int(item)
- else:
- yield int(string_segment)
- playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
-
- ie_entries = ie_result['entries']
- msg = (
- 'Downloading %d videos' if not isinstance(ie_entries, list)
- else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
-
- if isinstance(ie_entries, list):
- def get_entry(i):
- return ie_entries[i - 1]
- else:
- if not isinstance(ie_entries, PagedList):
- ie_entries = LazyList(ie_entries)
-
- def get_entry(i):
- return YoutubeDL.__handle_extraction_exceptions(
- lambda self, i: ie_entries[i - 1]
- )(self, i)
-
- entries = []
- items = playlistitems if playlistitems is not None else itertools.count(playliststart)
- for i in items:
- if i == 0:
- continue
- if playlistitems is None and playlistend is not None and playlistend < i:
- break
- entry = None
- try:
- entry = get_entry(i)
- if entry is None:
- raise EntryNotInPlaylist()
- except (IndexError, EntryNotInPlaylist):
- if incomplete_entries:
- raise EntryNotInPlaylist()
- elif not playlistitems:
- break
- entries.append(entry)
- try:
- if entry is not None:
- self._match_entry(entry, incomplete=True, silent=True)
- except (ExistingVideoReached, RejectedVideoReached):
- break
- ie_result['entries'] = entries
-
- # Save playlist_index before re-ordering
- entries = [
- ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
- for i, entry in enumerate(entries, 1)
- if entry is not None]
- n_entries = len(entries)
-
- if not playlistitems and (playliststart or playlistend):
- playlistitems = list(range(playliststart, playliststart + n_entries))
- ie_result['requested_entries'] = playlistitems
-
- if not self.params.get('simulate') and self.params.get('allow_playlist_files', True):
- ie_copy = {
- 'playlist': playlist,
- 'playlist_id': ie_result.get('id'),
- 'playlist_title': ie_result.get('title'),
- 'playlist_uploader': ie_result.get('uploader'),
- 'playlist_uploader_id': ie_result.get('uploader_id'),
- 'playlist_index': 0,
- 'n_entries': n_entries,
- }
- ie_copy.update(dict(ie_result))
+ @staticmethod
+ def _playlist_infodict(ie_result, **kwargs):
+ return {
+ **ie_result,
+ 'playlist': ie_result.get('title') or ie_result.get('id'),
+ 'playlist_id': ie_result.get('id'),
+ 'playlist_title': ie_result.get('title'),
+ 'playlist_uploader': ie_result.get('uploader'),
+ 'playlist_uploader_id': ie_result.get('uploader_id'),
+ 'playlist_index': 0,
+ **kwargs,
+ }
- if self._write_info_json('playlist', ie_result,
- self.prepare_filename(ie_copy, 'pl_infojson')) is None:
+ def __process_playlist(self, ie_result, download):
+ """Process each entry in the playlist"""
+ title = ie_result.get('title') or ie_result.get('id') or '<Untitled>'
+ self.to_screen(f'[download] Downloading playlist: {title}')
+
+ all_entries = PlaylistEntries(self, ie_result)
+ entries = orderedSet(all_entries.get_requested_items())
+ ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*entries)) or ([], [])
+ n_entries, ie_result['playlist_count'] = len(entries), all_entries.full_count
+
+ _infojson_written = False
+ write_playlist_files = self.params.get('allow_playlist_files', True)
+ if write_playlist_files and self.params.get('list_thumbnails'):
+ self.list_thumbnails(ie_result)
+ if write_playlist_files and not self.params.get('simulate'):
+ ie_copy = self._playlist_infodict(ie_result, n_entries=n_entries)
+ _infojson_written = self._write_info_json(
+ 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
+ if _infojson_written is None:
return
if self._write_description('playlist', ie_result,
self.prepare_filename(ie_copy, 'pl_description')) is None:
if self.params.get('playlistrandom', False):
random.shuffle(entries)
- x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
+ self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} videos'
+ f'{format_field(ie_result, "playlist_count", " of %s")}')
- self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
failures = 0
max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
- for i, entry_tuple in enumerate(entries, 1):
- playlist_index, entry = entry_tuple
+ for i, (playlist_index, entry) in enumerate(entries, 1):
+ # TODO: Add auto-generated fields
+ if self._match_entry(entry, incomplete=True) is not None:
+ continue
+
if 'playlist-index' in self.params.get('compat_opts', []):
- playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
- self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
- # This __x_forwarded_for_ip thing is a bit ugly but requires
- # minimal changes
- if x_forwarded_for:
- entry['__x_forwarded_for_ip'] = x_forwarded_for
- extra = {
+ playlist_index = ie_result['requested_entries'][i - 1]
+ self.to_screen('[download] Downloading video %s of %s' % (
+ self._format_screen(i, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
+
+ entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
+ entry_result = self.__process_iterable_entry(entry, download, {
'n_entries': n_entries,
- '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
+ '__last_playlist_index': max(ie_result['requested_entries']),
+ 'playlist_count': ie_result.get('playlist_count'),
'playlist_index': playlist_index,
'playlist_autonumber': i,
- 'playlist': playlist,
+ 'playlist': title,
'playlist_id': ie_result.get('id'),
'playlist_title': ie_result.get('title'),
'playlist_uploader': ie_result.get('uploader'),
'extractor': ie_result['extractor'],
'webpage_url': ie_result['webpage_url'],
'webpage_url_basename': url_basename(ie_result['webpage_url']),
+ 'webpage_url_domain': get_domain(ie_result['webpage_url']),
'extractor_key': ie_result['extractor_key'],
- }
-
- if self._match_entry(entry, incomplete=True) is not None:
- continue
-
- entry_result = self.__process_iterable_entry(entry, download, extra)
+ })
if not entry_result:
failures += 1
if failures >= max_failures:
self.report_error(
- 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
+ f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
break
- # TODO: skip failed (empty) entries?
- playlist_results.append(entry_result)
- ie_result['entries'] = playlist_results
- self.to_screen('[download] Finished downloading playlist: %s' % playlist)
+ entries[i - 1] = (playlist_index, entry_result)
+
+ # Update with processed data
+ ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*entries)) or ([], [])
+
+ # Write the updated info to json
+ if _infojson_written is True and self._write_info_json(
+ 'updated playlist', ie_result,
+ self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
+ return
+
+ ie_result = self.run_all_pps('playlist', ie_result)
+ self.to_screen(f'[download] Finished downloading playlist: {title}')
return ie_result
- @__handle_extraction_exceptions
+ @_handle_extraction_exceptions
def __process_iterable_entry(self, entry, download, extra_info):
return self.process_ie_result(
entry, download=download, extra_info=extra_info)
'^=': lambda attr, value: attr.startswith(value),
'$=': lambda attr, value: attr.endswith(value),
'*=': lambda attr, value: value in attr,
+ '~=': lambda attr, value: value.search(attr) is not None
}
str_operator_rex = re.compile(r'''(?x)\s*
(?P<key>[a-zA-Z0-9._-]+)\s*
- (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
- (?P<value>[a-zA-Z0-9._-]+)\s*
+ (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
+ (?P<quote>["'])?
+ (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
+ (?(quote)(?P=quote))\s*
''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
m = str_operator_rex.fullmatch(filter_spec)
if m:
- comparison_value = m.group('value')
+ if m.group('op') == '~=':
+ comparison_value = re.compile(m.group('value'))
+ else:
+ comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
str_op = STR_OPERATORS[m.group('op')]
if m.group('negation'):
op = lambda attr, value: not str_op(attr, value)
def _check_formats(self, formats):
for f in formats:
self.to_screen('[info] Testing format %s' % f['format_id'])
- temp_file = tempfile.NamedTemporaryFile(
- suffix='.tmp', delete=False,
- dir=self.get_output_path('temp') or None)
+ path = self.get_output_path('temp')
+ if not self._ensure_dir_exists(f'{path}/'):
+ continue
+ temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
temp_file.close()
try:
success, _ = self.dl(temp_file.name, f, test=True)
- except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
+ except (DownloadError, OSError, ValueError) + network_exceptions:
success = False
finally:
if os.path.exists(temp_file.name):
and download
and (
not can_merge()
- or info_dict.get('is_live', False)
+ or info_dict.get('is_live') and not self.params.get('live_from_start')
or self.outtmpl_dict['default'] == '-'))
compat = (
prefer_best
or self.params.get('allow_multiple_audio_streams', False)
- or 'format-spec' in self.params.get('compat_opts', []))
+ or 'format-spec' in self.params['compat_opts'])
return (
'best/bestvideo+bestaudio' if prefer_best
def syntax_error(note, start):
message = (
'Invalid format specification: '
- '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
+ '{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))
return SyntaxError(message)
PICKFIRST = 'PICKFIRST'
raise syntax_error('Expected a selector', start)
current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
else:
- raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
+ raise syntax_error(f'Operator not recognized: "{string}"', start)
elif type == tokenize.ENDMARKER:
break
if current_selector:
'format_id': '+'.join(filtered('format_id')),
'ext': output_ext,
'protocol': '+'.join(map(determine_protocol, formats_info)),
- 'language': '+'.join(orderedSet(filtered('language'))),
- 'format_note': '+'.join(orderedSet(filtered('format_note'))),
- 'filesize_approx': sum(filtered('filesize', 'filesize_approx')),
+ 'language': '+'.join(orderedSet(filtered('language'))) or None,
+ 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
+ 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
'tbr': sum(filtered('tbr', 'vbr', 'abr')),
}
selector_1, selector_2 = map(_build_selector_function, selector.selector)
def selector_function(ctx):
- for pair in itertools.product(
- selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
+ for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
yield _merge(pair)
elif selector.type == SINGLE: # atom
# TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
if format_spec == 'all':
def selector_function(ctx):
- yield from _check_formats(ctx['formats'])
+ yield from _check_formats(ctx['formats'][::-1])
elif format_spec == 'mergeall':
def selector_function(ctx):
- formats = list(_check_formats(ctx['formats']))
+ formats = list(_check_formats(
+ f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))
if not formats:
return
merged_format = formats[-1]
yield merged_format
else:
- format_fallback, format_reverse, format_idx = False, True, 1
+ format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1
mobj = re.match(
r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
format_spec)
filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
elif format_spec in self._format_selection_exts['video']:
filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
+ seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'
elif format_spec in self._format_selection_exts['storyboards']:
filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
else:
def selector_function(ctx):
formats = list(ctx['formats'])
matches = list(filter(filter_f, formats)) if filter_f is not None else formats
- if format_fallback and ctx['incomplete_formats'] and not matches:
- # for extractors with incomplete formats (audio only (soundcloud)
- # or video only (imgur)) best/worst will fallback to
- # best/worst {video,audio}-only format
- matches = formats
+ if not matches:
+ if format_fallback and ctx['incomplete_formats']:
+ # for extractors with incomplete formats (audio only (soundcloud)
+ # or video only (imgur)) best/worst will fallback to
+ # best/worst {video,audio}-only format
+ matches = formats
+ elif seperate_fallback and not ctx['has_merged_format']:
+ # for compatibility with youtube-dl when there is no pre-merged format
+ matches = list(filter(seperate_fallback, formats))
matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
try:
yield matches[format_idx - 1]
- except IndexError:
+ except LazyList.IndexError:
return
filters = [self._build_format_filter(f) for f in selector.filters]
def final_selector(ctx):
- ctx_copy = copy.deepcopy(ctx)
+ ctx_copy = dict(ctx)
for _filter in filters:
ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
return selector_function(ctx_copy)
return final_selector
- stream = io.BytesIO(format_spec.encode('utf-8'))
+ stream = io.BytesIO(format_spec.encode())
try:
- tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
+ tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))
except tokenize.TokenError:
raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
- class TokenIterator(object):
+ class TokenIterator:
def __init__(self, tokens):
self.tokens = tokens
self.counter = 0
return _build_selector_function(parsed_selector)
def _calc_headers(self, info_dict):
- res = std_headers.copy()
-
- add_headers = info_dict.get('http_headers')
- if add_headers:
- res.update(add_headers)
+ res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
- cookies = self._calc_cookies(info_dict)
+ cookies = self._calc_cookies(info_dict['url'])
if cookies:
res['Cookie'] = cookies
return res
- def _calc_cookies(self, info_dict):
- pr = sanitized_Request(info_dict['url'])
+ def _calc_cookies(self, url):
+ pr = sanitized_Request(url)
self.cookiejar.add_cookie_header(pr)
return pr.get_header('Cookie')
t['url'] = sanitize_url(t['url'])
if self.params.get('check_formats') is True:
- info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1])).reverse()
+ info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
else:
info_dict['thumbnails'] = thumbnails
+ def _fill_common_fields(self, info_dict, is_video=True):
+ # TODO: move sanitization here
+ if is_video:
+ # playlists are allowed to lack "title"
+ title = info_dict.get('title', NO_DEFAULT)
+ if title is NO_DEFAULT:
+ raise ExtractorError('Missing "title" field in extractor result',
+ video_id=info_dict['id'], ie=info_dict['extractor'])
+ info_dict['fulltitle'] = title
+ if not title:
+ if title == '':
+ self.write_debug('Extractor gave empty title. Creating a generic title')
+ else:
+ self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
+ info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
+
+ if info_dict.get('duration') is not None:
+ info_dict['duration_string'] = formatSeconds(info_dict['duration'])
+
+ for ts_key, date_key in (
+ ('timestamp', 'upload_date'),
+ ('release_timestamp', 'release_date'),
+ ('modified_timestamp', 'modified_date'),
+ ):
+ if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
+ # Working around out-of-range timestamp values (e.g. negative ones on Windows,
+ # see http://bugs.python.org/issue1646728)
+ with contextlib.suppress(ValueError, OverflowError, OSError):
+ upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
+ info_dict[date_key] = upload_date.strftime('%Y%m%d')
+
+ live_keys = ('is_live', 'was_live')
+ live_status = info_dict.get('live_status')
+ if live_status is None:
+ for key in live_keys:
+ if info_dict.get(key) is False:
+ continue
+ if info_dict.get(key):
+ live_status = key
+ break
+ if all(info_dict.get(key) is False for key in live_keys):
+ live_status = 'not_live'
+ if live_status:
+ info_dict['live_status'] = live_status
+ for key in live_keys:
+ if info_dict.get(key) is None:
+ info_dict[key] = (live_status == key)
+
+ # Auto generate title fields corresponding to the *_number fields when missing
+ # in order to always have clean titles. This is very common for TV series.
+ for field in ('chapter', 'season', 'episode'):
+ if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
+ info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
+
+ def _raise_pending_errors(self, info):
+ err = info.pop('__pending_error', None)
+ if err:
+ self.report_error(err, tb=False)
+
def process_video_result(self, info_dict, download=True):
assert info_dict.get('_type', 'video') == 'video'
+ self._num_videos += 1
if 'id' not in info_dict:
- raise ExtractorError('Missing "id" field in extractor result')
- if 'title' not in info_dict:
- raise ExtractorError('Missing "title" field in extractor result',
- video_id=info_dict['id'], ie=info_dict['extractor'])
+ raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
+ elif not info_dict.get('id'):
+ raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
def report_force_conversion(field, field_not, conversion):
self.report_warning(
def sanitize_numeric_fields(info):
for numeric_field in self._NUMERIC_FIELDS:
field = info.get(numeric_field)
- if field is None or isinstance(field, compat_numeric_types):
+ if field is None or isinstance(field, (int, float)):
continue
report_force_conversion(numeric_field, 'numeric', 'int')
info[numeric_field] = int_or_none(field)
sanitize_string_field(info_dict, 'id')
sanitize_numeric_fields(info_dict)
+ if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
+ self.report_warning('"duration" field is negative, there is an error in extractor')
if 'playlist' not in info_dict:
# It isn't part of a playlist
if info_dict.get('display_id') is None and 'id' in info_dict:
info_dict['display_id'] = info_dict['id']
- if info_dict.get('duration') is not None:
- info_dict['duration_string'] = formatSeconds(info_dict['duration'])
-
- for ts_key, date_key in (
- ('timestamp', 'upload_date'),
- ('release_timestamp', 'release_date'),
- ):
- if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
- # Working around out-of-range timestamp values (e.g. negative ones on Windows,
- # see http://bugs.python.org/issue1646728)
- try:
- upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
- info_dict[date_key] = upload_date.strftime('%Y%m%d')
- except (ValueError, OverflowError, OSError):
- pass
-
- live_keys = ('is_live', 'was_live')
- live_status = info_dict.get('live_status')
- if live_status is None:
- for key in live_keys:
- if info_dict.get(key) is False:
- continue
- if info_dict.get(key):
- live_status = key
- break
- if all(info_dict.get(key) is False for key in live_keys):
- live_status = 'not_live'
- if live_status:
- info_dict['live_status'] = live_status
- for key in live_keys:
- if info_dict.get(key) is None:
- info_dict[key] = (live_status == key)
-
- # Auto generate title fields corresponding to the *_number fields when missing
- # in order to always have clean titles. This is very common for TV series.
- for field in ('chapter', 'season', 'episode'):
- if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
- info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
+ self._fill_common_fields(info_dict)
for cc_kind in ('subtitles', 'automatic_captions'):
cc = info_dict.get(cc_kind)
else:
formats = info_dict['formats']
- info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
+ # or None ensures --clean-infojson removes it
+ info_dict['_has_drm'] = any(f.get('has_drm') for f in formats) or None
if not self.params.get('allow_unplayable_formats'):
formats = [f for f in formats if not f.get('has_drm')]
+ if info_dict['_has_drm'] and all(
+ f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
+ self.report_warning(
+ 'This video is DRM protected and only images are available for download. '
+ 'Use --list-formats to see them')
+
+ get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
+ if not get_from_start:
+ info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
+ if info_dict.get('is_live') and formats:
+ formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
+ if get_from_start and not formats:
+ self.raise_no_formats(info_dict, msg=(
+ '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
+ 'If you want to download from the current time, use --no-live-from-start'))
if not formats:
self.raise_no_formats(info_dict)
format['dynamic_range'] = 'SDR'
if (info_dict.get('duration') and format.get('tbr')
and not format.get('filesize') and not format.get('filesize_approx')):
- format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
+ format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))
# Add HTTP headers, so that external programs can use them from the
# json output
if '__x_forwarded_for_ip' in info_dict:
del info_dict['__x_forwarded_for_ip']
- # TODO Central sorting goes here
-
if self.params.get('check_formats') is True:
- formats = LazyList(self._check_formats(formats[::-1])).reverse()
+ formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
if not formats or formats[0] is not info_dict:
# only set the 'formats' fields if the original info_dict list them
info_dict, _ = self.pre_process(info_dict)
+ if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
+ return info_dict
+
+ self.post_extract(info_dict)
+ info_dict, _ = self.pre_process(info_dict, 'after_filter')
+
+ # The pre-processors may have modified the formats
+ formats = info_dict.get('formats', [info_dict])
+
+ list_only = self.params.get('simulate') is None and (
+ self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
+ interactive_format_selection = not list_only and self.format_selector == '-'
if self.params.get('list_thumbnails'):
self.list_thumbnails(info_dict)
- if self.params.get('listformats'):
- if not info_dict.get('formats') and not info_dict.get('url'):
- self.to_screen('%s has no formats' % info_dict['id'])
- else:
- self.list_formats(info_dict)
if self.params.get('listsubtitles'):
if 'automatic_captions' in info_dict:
self.list_subtitles(
info_dict['id'], automatic_captions, 'automatic captions')
self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
- list_only = self.params.get('simulate') is None and (
- self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
+ if self.params.get('listformats') or interactive_format_selection:
+ self.list_formats(info_dict)
if list_only:
# Without this printing, -F --print-json will not work
self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
- return
+ return info_dict
format_selector = self.format_selector
if format_selector is None:
self.write_debug('Default format spec: %s' % req_format)
format_selector = self.build_format_selector(req_format)
- # While in format selection we may need to have an access to the original
- # format set in order to calculate some metrics or do some processing.
- # For now we need to be able to guess whether original formats provided
- # by extractor are incomplete or not (i.e. whether extractor provides only
- # video-only or audio-only formats) for proper formats selection for
- # extractors with such incomplete formats (see
- # https://github.com/ytdl-org/youtube-dl/pull/5556).
- # Since formats may be filtered during format selection and may not match
- # the original formats the results may be incorrect. Thus original formats
- # or pre-calculated metrics should be passed to format selection routines
- # as well.
- # We will pass a context object containing all necessary additional data
- # instead of just formats.
- # This fixes incorrect format selection issue (see
- # https://github.com/ytdl-org/youtube-dl/issues/10083).
- incomplete_formats = (
- # All formats are video-only or
- all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
- # all formats are audio-only
- or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
-
- ctx = {
- 'formats': formats,
- 'incomplete_formats': incomplete_formats,
- }
+ while True:
+ if interactive_format_selection:
+ req_format = input(
+ self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
+ try:
+ format_selector = self.build_format_selector(req_format)
+ except SyntaxError as err:
+ self.report_error(err, tb=False, is_error=False)
+ continue
+
+ formats_to_download = list(format_selector({
+ 'formats': formats,
+ 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
+ 'incomplete_formats': (
+ # All formats are video-only or
+ all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
+ # all formats are audio-only
+ or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)),
+ }))
+ if interactive_format_selection and not formats_to_download:
+ self.report_error('Requested format is not available', tb=False, is_error=False)
+ continue
+ break
- formats_to_download = list(format_selector(ctx))
if not formats_to_download:
if not self.params.get('ignore_no_formats_error'):
- raise ExtractorError('Requested format is not available', expected=True,
- video_id=info_dict['id'], ie=info_dict['extractor'])
- else:
- self.report_warning('Requested format is not available')
- # Process what we can, even without any available formats.
- self.process_info(dict(info_dict))
- elif download:
- self.to_screen(
- '[info] %s: Downloading %d format(s): %s' % (
- info_dict['id'], len(formats_to_download),
- ", ".join([f['format_id'] for f in formats_to_download])))
- for fmt in formats_to_download:
- new_info = dict(info_dict)
- # Save a reference to the original info_dict so that it can be modified in process_info if needed
- new_info['__original_infodict'] = info_dict
+ raise ExtractorError(
+ 'Requested format is not available. Use --list-formats for a list of available formats',
+ expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])
+ self.report_warning('Requested format is not available')
+ # Process what we can, even without any available formats.
+ formats_to_download = [{}]
+
+ requested_ranges = self.params.get('download_ranges')
+ if requested_ranges:
+ requested_ranges = tuple(requested_ranges(info_dict, self))
+
+ best_format, downloaded_formats = formats_to_download[-1], []
+ if download:
+ if best_format:
+ def to_screen(*msg):
+ self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
+
+ to_screen(f'Downloading {len(formats_to_download)} format(s):',
+ (f['format_id'] for f in formats_to_download))
+ if requested_ranges:
+ to_screen(f'Downloading {len(requested_ranges)} time ranges:',
+ (f'{int(c["start_time"])}-{int(c["end_time"])}' for c in requested_ranges))
+ max_downloads_reached = False
+
+ for fmt, chapter in itertools.product(formats_to_download, requested_ranges or [{}]):
+ new_info = self._copy_infodict(info_dict)
new_info.update(fmt)
- self.process_info(new_info)
+ if chapter:
+ new_info.update({
+ 'section_start': chapter.get('start_time'),
+ 'section_end': chapter.get('end_time', 0),
+ 'section_title': chapter.get('title'),
+ 'section_number': chapter.get('index'),
+ })
+ downloaded_formats.append(new_info)
+ try:
+ self.process_info(new_info)
+ except MaxDownloadsReached:
+ max_downloads_reached = True
+ self._raise_pending_errors(new_info)
+ # Remove copied info
+ for key, val in tuple(new_info.items()):
+ if info_dict.get(key) == val:
+ new_info.pop(key)
+ if max_downloads_reached:
+ break
+
+ write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats}
+ assert write_archive.issubset({True, False, 'ignore'})
+ if True in write_archive and False not in write_archive:
+ self.record_download_archive(info_dict)
+
+ info_dict['requested_downloads'] = downloaded_formats
+ info_dict = self.run_all_pps('after_video', info_dict)
+ if max_downloads_reached:
+ raise MaxDownloadsReached()
+
# We update the info dict with the selected best quality format (backwards compatibility)
- if formats_to_download:
- info_dict.update(formats_to_download[-1])
+ info_dict.update(best_format)
return info_dict
def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
"""Select the requested subtitles and their format"""
- available_subs = {}
+ available_subs, normal_sub_langs = {}, []
if normal_subtitles and self.params.get('writesubtitles'):
available_subs.update(normal_subtitles)
+ normal_sub_langs = tuple(normal_subtitles.keys())
if automatic_captions and self.params.get('writeautomaticsub'):
for lang, cap_info in automatic_captions.items():
if lang not in available_subs:
available_subs):
return None
- all_sub_langs = available_subs.keys()
+ all_sub_langs = tuple(available_subs.keys())
if self.params.get('allsubtitles', False):
requested_langs = all_sub_langs
elif self.params.get('subtitleslangs', False):
# given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
requested_langs = []
for lang_re in self.params.get('subtitleslangs'):
- if lang_re == 'all':
- requested_langs.extend(all_sub_langs)
- continue
discard = lang_re[0] == '-'
if discard:
lang_re = lang_re[1:]
+ if lang_re == 'all':
+ if discard:
+ requested_langs = []
+ else:
+ requested_langs.extend(all_sub_langs)
+ continue
current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
if discard:
for lang in current_langs:
else:
requested_langs.extend(current_langs)
requested_langs = orderedSet(requested_langs)
- elif 'en' in available_subs:
- requested_langs = ['en']
+ elif normal_sub_langs:
+ requested_langs = ['en'] if 'en' in normal_sub_langs else normal_sub_langs[:1]
else:
- requested_langs = [list(all_sub_langs)[0]]
+ requested_langs = ['en'] if 'en' in all_sub_langs else all_sub_langs[:1]
if requested_langs:
self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
for lang in requested_langs:
formats = available_subs.get(lang)
if formats is None:
- self.report_warning('%s subtitles not available for %s' % (lang, video_id))
+ self.report_warning(f'{lang} subtitles not available for {video_id}')
continue
for ext in formats_preference:
if ext == 'best':
subs[lang] = f
return subs
+ def _forceprint(self, key, info_dict):
+ if info_dict is None:
+ return
+ info_copy = info_dict.copy()
+ info_copy['formats_table'] = self.render_formats_table(info_dict)
+ info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
+ info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
+ info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
+
+ def format_tmpl(tmpl):
+ mobj = re.match(r'\w+(=?)$', tmpl)
+ if mobj and mobj.group(1):
+ return f'{tmpl[:-1]} = %({tmpl[:-1]})r'
+ elif mobj:
+ return f'%({tmpl})s'
+ return tmpl
+
+ for tmpl in self.params['forceprint'].get(key, []):
+ self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
+
+ for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
+ filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)
+ tmpl = format_tmpl(tmpl)
+ self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
+ if self._ensure_dir_exists(filename):
+ with open(filename, 'a', encoding='utf-8') as f:
+ f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')
+
def __forced_printings(self, info_dict, filename, incomplete):
def print_mandatory(field, actual_field=None):
if actual_field is None:
if info_dict.get('requested_formats') is not None:
# For RTMP URLs, also include the playpath
info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
- elif 'url' in info_dict:
+ elif info_dict.get('url'):
info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
- if self.params.get('forceprint') or self.params.get('forcejson'):
+ if (self.params.get('forcejson')
+ or self.params['forceprint'].get('video')
+ or self.params['print_to_file'].get('video')):
self.post_extract(info_dict)
- for tmpl in self.params.get('forceprint', []):
- mobj = re.match(r'\w+(=?)$', tmpl)
- if mobj and mobj.group(1):
- tmpl = f'{tmpl[:-1]} = %({tmpl[:-1]})s'
- elif mobj:
- tmpl = '%({})s'.format(tmpl)
- self.to_stdout(self.evaluate_outtmpl(tmpl, info_dict))
+ self._forceprint('video', info_dict)
print_mandatory('title')
print_mandatory('id')
if not test:
for ph in self._progress_hooks:
fd.add_progress_hook(ph)
- urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
- self.write_debug('Invoking downloader on "%s"' % urls)
-
- new_info = copy.deepcopy(self._copy_infodict(info))
+ urls = '", "'.join(
+ (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
+ for f in info.get('requested_formats', []) or [info])
+ self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"')
+
+ # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
+ # But it may contain objects that are not deep-copyable
+ new_info = self._copy_infodict(info)
if new_info.get('http_headers') is None:
new_info['http_headers'] = self._calc_headers(new_info)
return fd.download(name, new_info, subtitle)
- def process_info(self, info_dict):
- """Process a single resolved IE result."""
+ def existing_file(self, filepaths, *, default_overwrite=True):
+ existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
+ if existing_files and not self.params.get('overwrites', default_overwrite):
+ return existing_files[0]
- assert info_dict.get('_type', 'video') == 'video'
+ for file in existing_files:
+ self.report_file_delete(file)
+ os.remove(file)
+ return None
- max_downloads = self.params.get('max_downloads')
- if max_downloads is not None:
- if self._num_downloads >= int(max_downloads):
- raise MaxDownloadsReached()
+ def process_info(self, info_dict):
+ """Process a single resolved IE result. (Modifies it in-place)"""
- # TODO: backward compatibility, to be removed
- info_dict['fulltitle'] = info_dict['title']
+ assert info_dict.get('_type', 'video') == 'video'
+ original_infodict = info_dict
if 'format' not in info_dict and 'ext' in info_dict:
info_dict['format'] = info_dict['ext']
+ # This is mostly just for backward compatibility of process_info
+ # As a side-effect, this allows for format-specific filters
if self._match_entry(info_dict) is not None:
+ info_dict['__write_download_archive'] = 'ignore'
return
+ # Does nothing under normal operation - for backward compatibility of process_info
self.post_extract(info_dict)
self._num_downloads += 1
# Forced printings
self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
+ def check_max_downloads():
+ if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):
+ raise MaxDownloadsReached()
+
if self.params.get('simulate'):
- if self.params.get('force_write_download_archive', False):
- self.record_download_archive(info_dict)
- # Do nothing else if in simulate mode
+ info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
+ check_max_downloads()
return
if full_filename is None:
infofn = self.prepare_filename(info_dict, 'infojson')
_infojson_written = self._write_info_json('video', info_dict, infofn)
if _infojson_written:
+ info_dict['infojson_filename'] = infofn
+ # For backward compatibility, even though it was a private field
info_dict['__infojson_filename'] = infofn
elif _infojson_written is None:
return
else:
try:
self.to_screen('[info] Writing video annotations to: ' + annofn)
- with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
+ with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
annofile.write(info_dict['annotations'])
except (KeyError, TypeError):
self.report_warning('There are no annotations to write.')
- except (OSError, IOError):
+ except OSError:
self.report_error('Cannot write annotations file: ' + annofn)
return
# Write internet shortcut files
def _write_link_file(link_type):
- if 'webpage_url' not in info_dict:
- self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
- return False
+ url = try_get(info_dict['webpage_url'], iri_to_uri)
+ if not url:
+ self.report_warning(
+ f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
+ return True
linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
+ if not self._ensure_dir_exists(encodeFilename(linkfn)):
+ return False
if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
return True
try:
self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
- with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
- newline='\r\n' if link_type == 'url' else '\n') as linkfile:
- template_vars = {'url': iri_to_uri(info_dict['webpage_url'])}
+ with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
+ newline='\r\n' if link_type == 'url' else '\n') as linkfile:
+ template_vars = {'url': url}
if link_type == 'desktop':
template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
- except (OSError, IOError):
+ except OSError:
self.report_error(f'Cannot write internet shortcut {linkfn}')
return False
return True
for link_type, should_write in write_links.items()):
return
- try:
- info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
- except PostProcessingError as err:
- self.report_error('Preprocessing: %s' % str(err))
- return
+ def replace_info_dict(new_info):
+ nonlocal info_dict
+ if new_info == info_dict:
+ return
+ info_dict.clear()
+ info_dict.update(new_info)
- must_record_download_archive = False
- if self.params.get('skip_download', False):
+ new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
+ replace_info_dict(new_info)
+
+ if self.params.get('skip_download'):
info_dict['filepath'] = temp_filename
info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
info_dict['__files_to_move'] = files_to_move
- info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
+ replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
+ info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
else:
# Download
info_dict.setdefault('__postprocessors', [])
try:
- def existing_file(*filepaths):
+ def existing_video_file(*filepaths):
ext = info_dict.get('ext')
- final_ext = self.params.get('final_ext', ext)
- existing_files = []
- for file in orderedSet(filepaths):
- if final_ext != ext:
- converted = replace_extension(file, final_ext, ext)
- if os.path.exists(encodeFilename(converted)):
- existing_files.append(converted)
- if os.path.exists(encodeFilename(file)):
- existing_files.append(file)
-
- if not existing_files or self.params.get('overwrites', False):
- for file in orderedSet(existing_files):
- self.report_file_delete(file)
- os.remove(encodeFilename(file))
- return None
-
- info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
- return existing_files[0]
+ converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
+ file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
+ default_overwrite=False)
+ if file:
+ info_dict['ext'] = os.path.splitext(file)[1][1:]
+ return file
success = True
+ merger, fd = FFmpegMergerPP(self), None
+ if info_dict.get('protocol') or info_dict.get('url'):
+ fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
+ if fd is not FFmpegFD and (
+ info_dict.get('section_start') or info_dict.get('section_end')):
+ msg = ('This format cannot be partially downloaded' if merger.available
+ else 'You have requested downloading the video partially, but ffmpeg is not installed')
+ self.report_error(f'{msg}. Aborting')
+ return
+
if info_dict.get('requested_formats') is not None:
def compatible_formats(formats):
return False
# Check extension
- exts = set(format.get('ext') for format in formats)
+ exts = {format.get('ext') for format in formats}
COMPATIBLE_EXTS = (
- set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
- set(('webm',)),
+ {'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'},
+ {'webm'},
)
for ext_sets in COMPATIBLE_EXTS:
if ext_sets.issuperset(exts):
and info_dict.get('thumbnails')
# check with type instead of pp_key, __name__, or isinstance
# since we dont want any custom PPs to trigger this
- and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
+ and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])): # noqa: E721
info_dict['ext'] = 'mkv'
self.report_warning(
'webm doesn\'t support embedding a thumbnail, mkv will be used')
os.path.splitext(filename)[0]
if filename_real_ext in (old_ext, new_ext)
else filename)
- return '%s.%s' % (filename_wo_ext, ext)
+ return f'{filename_wo_ext}.{ext}'
# Ensure filename always has a correct extension for successful merge
full_filename = correct_ext(full_filename)
temp_filename = correct_ext(temp_filename)
- dl_filename = existing_file(full_filename, temp_filename)
+ dl_filename = existing_video_file(full_filename, temp_filename)
info_dict['__real_download'] = False
+ downloaded = []
if dl_filename is not None:
self.report_file_already_downloaded(dl_filename)
- elif get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-'):
+ elif fd:
+ for f in requested_formats if fd != FFmpegFD else []:
+ f['filepath'] = fname = prepend_extension(
+ correct_ext(temp_filename, info_dict['ext']),
+ 'f%s' % f['format_id'], info_dict['ext'])
+ downloaded.append(fname)
info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
success, real_download = self.dl(temp_filename, info_dict)
info_dict['__real_download'] = real_download
else:
- downloaded = []
- merger = FFmpegMergerPP(self)
if self.params.get('allow_unplayable_formats'):
self.report_warning(
'You have requested merging of multiple formats '
'while also allowing unplayable formats to be downloaded. '
'The formats won\'t be merged to prevent data corruption.')
elif not merger.available:
- self.report_warning(
- 'You have requested merging of multiple formats but ffmpeg is not installed. '
- 'The formats won\'t be merged.')
+ msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
+ if not self.params.get('ignoreerrors'):
+ self.report_error(f'{msg}. Aborting due to --abort-on-error')
+ return
+ self.report_warning(f'{msg}. The formats won\'t be merged')
if temp_filename == '-':
- reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict)
+ reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
else 'but the formats are incompatible for simultaneous download' if merger.available
else 'but ffmpeg is not installed')
self.report_warning(
partial_success, real_download = self.dl(fname, new_info)
info_dict['__real_download'] = info_dict['__real_download'] or real_download
success = success and partial_success
- if merger.available and not self.params.get('allow_unplayable_formats'):
- info_dict['__postprocessors'].append(merger)
- info_dict['__files_to_merge'] = downloaded
- # Even if there were no downloads, it is being merged only now
- info_dict['__real_download'] = True
- else:
- for file in downloaded:
- files_to_move[file] = None
+
+ if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
+ info_dict['__postprocessors'].append(merger)
+ info_dict['__files_to_merge'] = downloaded
+ # Even if there were no downloads, it is being merged only now
+ info_dict['__real_download'] = True
+ else:
+ for file in downloaded:
+ files_to_move[file] = None
else:
# Just a single file
- dl_filename = existing_file(full_filename, temp_filename)
+ dl_filename = existing_video_file(full_filename, temp_filename)
if dl_filename is None or dl_filename == temp_filename:
# dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
# So we should try to resume the download
except network_exceptions as err:
self.report_error('unable to download video data: %s' % error_to_compat_str(err))
return
- except (OSError, IOError) as err:
+ except OSError as err:
raise UnavailableVideoError(err)
except (ContentTooShortError, ) as err:
- self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
+ self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')
return
+ self._raise_pending_errors(info_dict)
if success and full_filename != '-':
def fixup():
if fixup_policy in ('ignore', 'never'):
return
elif fixup_policy == 'warn':
- do_fixup = False
+ do_fixup = 'warn'
elif fixup_policy != 'force':
assert fixup_policy in ('detect_or_warn', None)
if not info_dict.get('__real_download'):
do_fixup = False
def ffmpeg_fixup(cndn, msg, cls):
- if not cndn:
+ if not (do_fixup and cndn):
return
- if not do_fixup:
+ elif do_fixup == 'warn':
self.report_warning(f'{vid}: {msg}')
return
pp = cls(self)
FFmpegFixupM4aPP)
downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
- downloader = downloader.__name__ if downloader else None
- ffmpeg_fixup(info_dict.get('requested_formats') is None and downloader == 'HlsFD',
- 'malformed AAC bitstream detected', FFmpegFixupM3u8PP)
- ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP)
- ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed duration detected', FFmpegFixupDurationPP)
+ downloader = downloader.FD_NAME if downloader else None
+
+ if info_dict.get('requested_formats') is None: # Not necessary if doing merger
+ ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')
+ or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
+ 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
+ FFmpegFixupM3u8PP)
+ ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
+ 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
+
+ ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
+ ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP)
fixup()
try:
- info_dict = self.post_process(dl_filename, info_dict, files_to_move)
+ replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
except PostProcessingError as err:
self.report_error('Postprocessing: %s' % str(err))
return
except Exception as err:
self.report_error('post hooks: %s' % str(err))
return
- must_record_download_archive = True
+ info_dict['__write_download_archive'] = True
- if must_record_download_archive or self.params.get('force_write_download_archive', False):
- self.record_download_archive(info_dict)
- max_downloads = self.params.get('max_downloads')
- if max_downloads is not None and self._num_downloads >= int(max_downloads):
- raise MaxDownloadsReached()
+ assert info_dict is original_infodict # Make sure the info_dict was modified in-place
+ if self.params.get('force_write_download_archive'):
+ info_dict['__write_download_archive'] = True
+ check_max_downloads()
+
+ def __download_wrapper(self, func):
+ @functools.wraps(func)
+ def wrapper(*args, **kwargs):
+ try:
+ res = func(*args, **kwargs)
+ except UnavailableVideoError as e:
+ self.report_error(e)
+ except DownloadCancelled as e:
+ self.to_screen(f'[info] {e}')
+ if not self.params.get('break_per_url'):
+ raise
+ else:
+ if self.params.get('dump_single_json', False):
+ self.post_extract(res)
+ self.to_stdout(json.dumps(self.sanitize_info(res)))
+ return wrapper
def download(self, url_list):
"""Download a given list of URLs."""
+ url_list = variadic(url_list) # Passing a single URL is a common mistake
outtmpl = self.outtmpl_dict['default']
if (len(url_list) > 1
and outtmpl != '-'
raise SameFileError(outtmpl)
for url in url_list:
- try:
- # It also downloads the videos
- res = self.extract_info(
- url, force_generic_extractor=self.params.get('force_generic_extractor', False))
- except UnavailableVideoError:
- self.report_error('unable to download video')
- except MaxDownloadsReached:
- self.to_screen('[info] Maximum number of downloads reached')
- raise
- except ExistingVideoReached:
- self.to_screen('[info] Encountered a video that is already in the archive, stopping due to --break-on-existing')
- raise
- except RejectedVideoReached:
- self.to_screen('[info] Encountered a video that did not match filter, stopping due to --break-on-reject')
- raise
- else:
- if self.params.get('dump_single_json', False):
- self.post_extract(res)
- self.to_stdout(json.dumps(self.sanitize_info(res)))
+ self.__download_wrapper(self.extract_info)(
+ url, force_generic_extractor=self.params.get('force_generic_extractor', False))
return self._download_retcode
# FileInput doesn't have a read method, we can't call json.load
info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
try:
- self.process_ie_result(info, download=True)
- except (DownloadError, EntryNotInPlaylist, ThrottledDownload):
+ self.__download_wrapper(self.process_ie_result)(info, download=True)
+ except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
+ if not isinstance(e, EntryNotInPlaylist):
+ self.to_stderr('\r')
webpage_url = info.get('webpage_url')
if webpage_url is not None:
- self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
+ self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
return self.download([webpage_url])
else:
raise
if info_dict is None:
return info_dict
info_dict.setdefault('epoch', int(time.time()))
- remove_keys = {'__original_infodict'} # Always remove this since this may contain a copy of the entire dict
- keep_keys = ['_type'], # Always keep this to facilitate load-info-json
+ info_dict.setdefault('_type', 'video')
+
if remove_private_keys:
- remove_keys |= {
- 'requested_formats', 'requested_subtitles', 'requested_entries',
- 'filepath', 'entries', 'original_url', 'playlist_autonumber',
+ reject = lambda k, v: v is None or k.startswith('__') or k in {
+ 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
+ 'entries', 'filepath', '_filename', 'infojson_filename', 'original_url', 'playlist_autonumber',
}
- empty_values = (None, {}, [], set(), tuple())
- reject = lambda k, v: k not in keep_keys and (
- k.startswith('_') or k in remove_keys or v in empty_values)
else:
- reject = lambda k, v: k in remove_keys
- filter_fn = lambda obj: (
- list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))
- else obj if not isinstance(obj, dict)
- else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
+ reject = lambda k, v: False
+
+ def filter_fn(obj):
+ if isinstance(obj, dict):
+ return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
+ elif isinstance(obj, (list, tuple, set, LazyList)):
+ return list(map(filter_fn, obj))
+ elif obj is None or isinstance(obj, (str, int, float, bool)):
+ return obj
+ else:
+ return repr(obj)
+
return filter_fn(info_dict)
@staticmethod
''' Alias of sanitize_info for backward compatibility '''
return YoutubeDL.sanitize_info(info_dict, actually_filter)
+ def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):
+ for filename in set(filter(None, files_to_delete)):
+ if msg:
+ self.to_screen(msg % filename)
+ try:
+ os.remove(filename)
+ except OSError:
+ self.report_warning(f'Unable to delete file {filename}')
+ if filename in info.get('__files_to_move', []): # NB: Delete even if None
+ del info['__files_to_move'][filename]
+
+ @staticmethod
+ def post_extract(info_dict):
+ def actual_post_extract(info_dict):
+ if info_dict.get('_type') in ('playlist', 'multi_video'):
+ for video_dict in info_dict.get('entries', {}):
+ actual_post_extract(video_dict or {})
+ return
+
+ post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
+ info_dict.update(post_extractor())
+
+ actual_post_extract(info_dict or {})
+
def run_pp(self, pp, infodict):
files_to_delete = []
if '__files_to_move' not in infodict:
for f in files_to_delete:
infodict['__files_to_move'].setdefault(f, '')
else:
- for old_filename in set(files_to_delete):
- self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
- try:
- os.remove(encodeFilename(old_filename))
- except (IOError, OSError):
- self.report_warning('Unable to remove downloaded original file')
- if old_filename in infodict['__files_to_move']:
- del infodict['__files_to_move'][old_filename]
+ self._delete_downloaded_files(
+ *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
return infodict
- @staticmethod
- def post_extract(info_dict):
- def actual_post_extract(info_dict):
- if info_dict.get('_type') in ('playlist', 'multi_video'):
- for video_dict in info_dict.get('entries', {}):
- actual_post_extract(video_dict or {})
- return
-
- post_extractor = info_dict.get('__post_extractor') or (lambda: {})
- extra = post_extractor().items()
- info_dict.update(extra)
- info_dict.pop('__post_extractor', None)
-
- original_infodict = info_dict.get('__original_infodict') or {}
- original_infodict.update(extra)
- original_infodict.pop('__post_extractor', None)
-
- actual_post_extract(info_dict or {})
+ def run_all_pps(self, key, info, *, additional_pps=None):
+ self._forceprint(key, info)
+ for pp in (additional_pps or []) + self._pps[key]:
+ info = self.run_pp(pp, info)
+ return info
def pre_process(self, ie_info, key='pre_process', files_to_move=None):
info = dict(ie_info)
info['__files_to_move'] = files_to_move or {}
- for pp in self._pps[key]:
- info = self.run_pp(pp, info)
+ try:
+ info = self.run_all_pps(key, info)
+ except PostProcessingError as err:
+ msg = f'Preprocessing: {err}'
+ info.setdefault('__pending_error', msg)
+ self.report_error(msg, is_error=False)
return info, info.pop('__files_to_move', None)
- def post_process(self, filename, ie_info, files_to_move=None):
+ def post_process(self, filename, info, files_to_move=None):
"""Run all the postprocessors on the given file."""
- info = dict(ie_info)
info['filepath'] = filename
info['__files_to_move'] = files_to_move or {}
-
- for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
- info = self.run_pp(pp, info)
+ info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
del info['__files_to_move']
- for pp in self._pps['after_move']:
- info = self.run_pp(pp, info)
- return info
+ return self.run_all_pps('after_move', info)
def _make_archive_id(self, info_dict):
video_id = info_dict.get('id')
break
else:
return
- return '%s %s' % (extractor.lower(), video_id)
+ return f'{extractor.lower()} {video_id}'
def in_download_archive(self, info_dict):
fn = self.params.get('download_archive')
return
vid_id = self._make_archive_id(info_dict)
assert vid_id
+ self.write_debug(f'Adding to archive: {vid_id}')
with locked_file(fn, 'a', encoding='utf-8') as archive_file:
archive_file.write(vid_id + '\n')
self.archive.add(vid_id)
@staticmethod
def format_resolution(format, default='unknown'):
- is_images = format.get('vcodec') == 'none' and format.get('acodec') == 'none'
if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
return 'audio only'
if format.get('resolution') is not None:
return format['resolution']
if format.get('width') and format.get('height'):
- res = '%dx%d' % (format['width'], format['height'])
+ return '%dx%d' % (format['width'], format['height'])
elif format.get('height'):
- res = '%sp' % format['height']
+ return '%sp' % format['height']
elif format.get('width'):
- res = '%dx?' % format['width']
- elif is_images:
- return 'images'
- else:
- return default
- return f'{res} images' if is_images else res
+ return '%dx?' % format['width']
+ return default
+
+ def _list_format_headers(self, *headers):
+ if self.params.get('listformats_table', True) is not False:
+ return [self._format_out(header, self.Styles.HEADERS) for header in headers]
+ return headers
def _format_note(self, fdict):
res = ''
if fdict.get('ext') in ['f4f', 'f4m']:
- res += '(unsupported) '
+ res += '(unsupported)'
if fdict.get('language'):
if res:
res += ' '
- res += '[%s] ' % fdict['language']
+ res += '[%s]' % fdict['language']
if fdict.get('format_note') is not None:
- res += fdict['format_note'] + ' '
+ if res:
+ res += ' '
+ res += fdict['format_note']
if fdict.get('tbr') is not None:
- res += '%4dk ' % fdict['tbr']
+ if res:
+ res += ', '
+ res += '%4dk' % fdict['tbr']
if fdict.get('container') is not None:
if res:
res += ', '
res += '~' + format_bytes(fdict['filesize_approx'])
return res
- def _list_format_headers(self, *headers):
- if self.params.get('listformats_table', True) is not False:
- return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
- return headers
+ def render_formats_table(self, info_dict):
+ if not info_dict.get('formats') and not info_dict.get('url'):
+ return None
- def list_formats(self, info_dict):
formats = info_dict.get('formats', [info_dict])
- new_format = self.params.get('listformats_table', True) is not False
- if new_format:
- tbr_digits = number_of_digits(max(f.get('tbr') or 0 for f in formats))
- vbr_digits = number_of_digits(max(f.get('vbr') or 0 for f in formats))
- abr_digits = number_of_digits(max(f.get('abr') or 0 for f in formats))
- delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
- table = [
- [
- self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
- format_field(f, 'ext'),
- self.format_resolution(f),
- format_field(f, 'fps', '%d'),
- format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
- delim,
- format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
- format_field(f, 'tbr', f'%{tbr_digits}dk'),
- shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
- delim,
- format_field(f, 'vcodec', default='unknown').replace('none', ''),
- format_field(f, 'vbr', f'%{vbr_digits}dk'),
- format_field(f, 'acodec', default='unknown').replace('none', ''),
- format_field(f, 'abr', f'%{abr_digits}dk'),
- format_field(f, 'asr', '%5dHz'),
- ', '.join(filter(None, (
- self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else '',
- format_field(f, 'language', '[%s]'),
- format_field(f, 'format_note'),
- format_field(f, 'container', ignore=(None, f.get('ext'))),
- ))),
- ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
- header_line = self._list_format_headers(
- 'ID', 'EXT', 'RESOLUTION', 'FPS', 'HDR', delim, ' FILESIZE', ' TBR', 'PROTO',
- delim, 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO')
- else:
+ if not self.params.get('listformats_table', True) is not False:
table = [
[
format_field(f, 'format_id'),
format_field(f, 'ext'),
self.format_resolution(f),
- self._format_note(f)]
- for f in formats
- if f.get('preference') is None or f['preference'] >= -1000]
- header_line = ['format code', 'extension', 'resolution', 'note']
-
- self.to_screen(
- '[info] Available formats for %s:' % info_dict['id'])
- self.to_stdout(render_table(
- header_line, table,
- extraGap=(0 if new_format else 1),
- hideEmpty=new_format,
- delim=new_format and self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True)))
-
- def list_thumbnails(self, info_dict):
- thumbnails = list(info_dict.get('thumbnails'))
+ self._format_note(f)
+ ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
+ return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
+
+ delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True)
+ table = [
+ [
+ self._format_out(format_field(f, 'format_id'), self.Styles.ID),
+ format_field(f, 'ext'),
+ format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
+ format_field(f, 'fps', '\t%d'),
+ format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
+ delim,
+ format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
+ format_field(f, 'tbr', '\t%dk'),
+ shorten_protocol_name(f.get('protocol', '')),
+ delim,
+ format_field(f, 'vcodec', default='unknown').replace(
+ 'none', 'images' if f.get('acodec') == 'none'
+ else self._format_out('audio only', self.Styles.SUPPRESS)),
+ format_field(f, 'vbr', '\t%dk'),
+ format_field(f, 'acodec', default='unknown').replace(
+ 'none', '' if f.get('vcodec') == 'none'
+ else self._format_out('video only', self.Styles.SUPPRESS)),
+ format_field(f, 'abr', '\t%dk'),
+ format_field(f, 'asr', '\t%dHz'),
+ join_nonempty(
+ self._format_out('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
+ format_field(f, 'language', '[%s]'),
+ join_nonempty(format_field(f, 'format_note'),
+ format_field(f, 'container', ignore=(None, f.get('ext'))),
+ delim=', '),
+ delim=' '),
+ ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
+ header_line = self._list_format_headers(
+ 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
+ delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
+
+ return render_table(
+ header_line, table, hide_empty=True,
+ delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True))
+
+ def render_thumbnails_table(self, info_dict):
+ thumbnails = list(info_dict.get('thumbnails') or [])
if not thumbnails:
- self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
- return
-
- self.to_screen(
- '[info] Thumbnails for %s:' % info_dict['id'])
- self.to_stdout(render_table(
+ return None
+ return render_table(
self._list_format_headers('ID', 'Width', 'Height', 'URL'),
- [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
-
- def list_subtitles(self, video_id, subtitles, name='subtitles'):
- if not subtitles:
- self.to_screen('%s has no %s' % (video_id, name))
- return
- self.to_screen(
- 'Available %s for %s:' % (name, video_id))
+ [[t.get('id'), t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])
+ def render_subtitles_table(self, video_id, subtitles):
def _row(lang, formats):
exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
if len(set(names)) == 1:
names = [] if names[0] == 'unknown' else names[:1]
return [lang, ', '.join(names), ', '.join(exts)]
- self.to_stdout(render_table(
+ if not subtitles:
+ return None
+ return render_table(
self._list_format_headers('Language', 'Name', 'Formats'),
[_row(lang, formats) for lang, formats in subtitles.items()],
- hideEmpty=True))
+ hide_empty=True)
+
+ def __list_table(self, video_id, name, func, *args):
+ table = func(*args)
+ if not table:
+ self.to_screen(f'{video_id} has no {name}')
+ return
+ self.to_screen(f'[info] Available {name} for {video_id}:')
+ self.to_stdout(table)
+
+ def list_formats(self, info_dict):
+ self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
+
+ def list_thumbnails(self, info_dict):
+ self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
+
+ def list_subtitles(self, video_id, subtitles, name='subtitles'):
+ self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
def urlopen(self, req):
""" Start an HTTP download """
- if isinstance(req, compat_basestring):
+ if isinstance(req, str):
req = sanitized_Request(req)
return self._opener.open(req, timeout=self._socket_timeout)
if not self.params.get('verbose'):
return
+ # These imports can be slow. So import them only as needed
+ from .extractor.extractors import _LAZY_LOADER
+ from .extractor.extractors import _PLUGIN_CLASSES as plugin_extractors
+
def get_encoding(stream):
- ret = getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)
+ ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
if not supports_terminal_sequences(stream):
- ret += ' (No ANSI)'
+ from .utils import WINDOWS_VT_MODE # Must be imported locally
+ ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
return ret
- encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
+ encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % (
locale.getpreferredencoding(),
sys.getfilesystemencoding(),
- get_encoding(self._screen_file), get_encoding(self._err_file),
- self.get_encoding())
+ self.get_encoding(),
+ ', '.join(
+ f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_
+ if stream is not None and key != 'console')
+ )
logger = self.params.get('logger')
if logger:
write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
source = detect_variant()
- write_debug('yt-dlp version %s%s' % (__version__, '' if source == 'unknown' else f' ({source})'))
+ write_debug(join_nonempty(
+ 'yt-dlp version', __version__,
+ f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
+ '' if source == 'unknown' else f'({source})',
+ delim=' '))
if not _LAZY_LOADER:
if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
write_debug('Lazy loading extractors is forcibly disabled')
write_debug('Plugins: %s' % [
'%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
- if self.params.get('compat_opts'):
- write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
- try:
- sp = Popen(
- ['git', 'rev-parse', '--short', 'HEAD'],
- stdout=subprocess.PIPE, stderr=subprocess.PIPE,
- cwd=os.path.dirname(os.path.abspath(__file__)))
- out, err = sp.communicate_or_kill()
- out = out.decode().strip()
- if re.match('[0-9a-f]+', out):
- write_debug('Git HEAD: %s' % out)
- except Exception:
+ if self.params['compat_opts']:
+ write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts']))
+
+ if source == 'source':
try:
- sys.exc_clear()
+ stdout, _, _ = Popen.run(
+ ['git', 'rev-parse', '--short', 'HEAD'],
+ text=True, cwd=os.path.dirname(os.path.abspath(__file__)),
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ if re.fullmatch('[0-9a-f]+', stdout.strip()):
+ write_debug(f'Git HEAD: {stdout.strip()}')
except Exception:
- pass
+ with contextlib.suppress(Exception):
+ sys.exc_clear()
def python_implementation():
impl_name = platform.python_implementation()
platform.architecture()[0],
platform_name()))
- exe_versions = FFmpegPostProcessor.get_versions(self)
+ exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
+ ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
+ if ffmpeg_features:
+ exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features))
+
exe_versions['rtmpdump'] = rtmpdump_version()
exe_versions['phantomjs'] = PhantomJSwrapper._version()
exe_str = ', '.join(
) or 'none'
write_debug('exe versions: %s' % exe_str)
- from .downloader.websocket import has_websockets
- from .postprocessor.embedthumbnail import has_mutagen
- from .cookies import SQLITE_AVAILABLE, KEYRING_AVAILABLE
+ from .compat.compat_utils import get_package_info
+ from .dependencies import available_dependencies
- lib_str = ', '.join(sorted(filter(None, (
- compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
- has_websockets and 'websockets',
- has_mutagen and 'mutagen',
- SQLITE_AVAILABLE and 'sqlite',
- KEYRING_AVAILABLE and 'keyring',
- )))) or 'none'
- write_debug('Optional libraries: %s' % lib_str)
+ write_debug('Optional libraries: %s' % (', '.join(sorted({
+ join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
+ })) or 'none'))
+ self._setup_opener()
proxy_map = {}
for handler in self._opener.handlers:
if hasattr(handler, 'proxies'):
# Not implemented
if False and self.params.get('call_home'):
- ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
+ ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode()
write_debug('Public IP address: %s' % ipaddr)
latest_version = self.urlopen(
- 'https://yt-dl.org/latest/version').read().decode('utf-8')
+ 'https://yt-dl.org/latest/version').read().decode()
if version_tuple(latest_version) > version_tuple(__version__):
self.report_warning(
'You are using an outdated version (newest version: %s)! '
latest_version)
def _setup_opener(self):
+ if hasattr(self, '_opener'):
+ return
timeout_val = self.params.get('socket_timeout')
self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
redirect_handler = YoutubeDLRedirectHandler()
- data_handler = compat_urllib_request_DataHandler()
+ data_handler = urllib.request.DataHandler()
# When passing our own FileHandler instance, build_opener won't add the
# default FileHandler and allows us to disable the file protocol, which
encoding = preferredencoding()
return encoding
- def _write_info_json(self, label, ie_result, infofn):
- ''' Write infojson and returns True = written, False = skip, None = error '''
+ def _write_info_json(self, label, ie_result, infofn, overwrite=None):
+ ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
+ if overwrite is None:
+ overwrite = self.params.get('overwrites', True)
if not self.params.get('writeinfojson'):
return False
elif not infofn:
return False
elif not self._ensure_dir_exists(infofn):
return None
- elif not self.params.get('overwrites', True) and os.path.exists(infofn):
+ elif not overwrite and os.path.exists(infofn):
self.to_screen(f'[info] {label.title()} metadata is already present')
- else:
- self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
- try:
- write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
- except (OSError, IOError):
- self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
- return None
- return True
+ return 'exists'
+
+ self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
+ try:
+ write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
+ return True
+ except OSError:
+ self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
+ return None
def _write_description(self, label, ie_result, descfn):
''' Write description and returns True = written, False = skip, None = error '''
else:
try:
self.to_screen(f'[info] Writing {label} description to: {descfn}')
- with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
+ with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
descfile.write(ie_result['description'])
- except (OSError, IOError):
+ except OSError:
self.report_error(f'Cannot write {label} description file {descfn}')
return None
return True
sub_format = sub_info['ext']
sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
- if not self.params.get('overwrites', True) and os.path.exists(sub_filename):
+ existing_sub = self.existing_file((sub_filename_final, sub_filename))
+ if existing_sub:
self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
- sub_info['filepath'] = sub_filename
- ret.append((sub_filename, sub_filename_final))
+ sub_info['filepath'] = existing_sub
+ ret.append((existing_sub, sub_filename_final))
continue
self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
try:
# Use newline='' to prevent conversion of newline characters
# See https://github.com/ytdl-org/youtube-dl/issues/10268
- with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
+ with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
subfile.write(sub_info['data'])
sub_info['filepath'] = sub_filename
ret.append((sub_filename, sub_filename_final))
continue
- except (OSError, IOError):
+ except OSError:
self.report_error(f'Cannot write video subtitles file {sub_filename}')
return None
self.dl(sub_filename, sub_copy, subtitle=True)
sub_info['filepath'] = sub_filename
ret.append((sub_filename, sub_filename_final))
- except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
- self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
- continue
+ except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
+ msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
+ if self.params.get('ignoreerrors') is not True: # False or 'only_download'
+ if not self.params.get('ignoreerrors'):
+ self.report_error(msg)
+ raise DownloadError(msg)
+ self.report_warning(msg)
return ret
def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
self.write_debug(f'Skipping writing {label} thumbnail')
return ret
- for t in thumbnails[::-1]:
+ for idx, t in list(enumerate(thumbnails))[::-1]:
thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
- thumb_display_id = f'{label} thumbnail' + (f' {t["id"]}' if multiple else '')
+ thumb_display_id = f'{label} thumbnail {t["id"]}'
thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
- if not self.params.get('overwrites', True) and os.path.exists(thumb_filename):
- ret.append((thumb_filename, thumb_filename_final))
- t['filepath'] = thumb_filename
- self.to_screen(f'[info] {thumb_display_id.title()} is already present')
+ existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
+ if existing_thumb:
+ self.to_screen('[info] %s is already present' % (
+ thumb_display_id if multiple else f'{label} thumbnail').capitalize())
+ t['filepath'] = existing_thumb
+ ret.append((existing_thumb, thumb_filename_final))
else:
self.to_screen(f'[info] Downloading {thumb_display_id} ...')
try:
- uf = self.urlopen(t['url'])
+ uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))
self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
with open(encodeFilename(thumb_filename), 'wb') as thumbf:
shutil.copyfileobj(uf, thumbf)
ret.append((thumb_filename, thumb_filename_final))
t['filepath'] = thumb_filename
except network_exceptions as err:
+ thumbnails.pop(idx)
self.report_warning(f'Unable to download {thumb_display_id}: {err}')
if ret and not write_all:
break