yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import functools
  13 import io
  14 import itertools
  15 import json
  16 import locale
  17 import operator
  18 import os
  19 import platform
  20 import re
  21 import shutil
  22 import subprocess
  23 import sys
  24 import tempfile
  25 import time
  26 import tokenize
  27 import traceback
  28 import random
  29 import unicodedata
  30
  31 from string import ascii_letters
  32
  33 from .compat import (
  34     compat_basestring,
  35     compat_get_terminal_size,
  36     compat_kwargs,
  37     compat_numeric_types,
  38     compat_os_name,
  39     compat_pycrypto_AES,
  40     compat_shlex_quote,
  41     compat_str,
  42     compat_tokenize_tokenize,
  43     compat_urllib_error,
  44     compat_urllib_request,
  45     compat_urllib_request_DataHandler,
  46     windows_enable_vt_mode,
  47 )
  48 from .cookies import load_cookies
  49 from .utils import (
  50     age_restricted,
  51     args_to_str,
  52     ContentTooShortError,
  53     date_from_str,
  54     DateRange,
  55     DEFAULT_OUTTMPL,
  56     determine_ext,
  57     determine_protocol,
  58     DOT_DESKTOP_LINK_TEMPLATE,
  59     DOT_URL_LINK_TEMPLATE,
  60     DOT_WEBLOC_LINK_TEMPLATE,
  61     DownloadError,
  62     encode_compat_str,
  63     encodeFilename,
  64     EntryNotInPlaylist,
  65     error_to_compat_str,
  66     ExistingVideoReached,
  67     expand_path,
  68     ExtractorError,
  69     float_or_none,
  70     format_bytes,
  71     format_field,
  72     formatSeconds,
  73     GeoRestrictedError,
  74     HEADRequest,
  75     int_or_none,
  76     iri_to_uri,
  77     ISO3166Utils,
  78     LazyList,
  79     locked_file,
  80     make_dir,
  81     make_HTTPS_handler,
  82     MaxDownloadsReached,
  83     network_exceptions,
  84     orderedSet,
  85     OUTTMPL_TYPES,
  86     PagedList,
  87     parse_filesize,
  88     PerRequestProxyHandler,
  89     platform_name,
  90     PostProcessingError,
  91     preferredencoding,
  92     prepend_extension,
  93     process_communicate_or_kill,
  94     register_socks_protocols,
  95     RejectedVideoReached,
  96     render_table,
  97     replace_extension,
  98     SameFileError,
  99     sanitize_filename,
 100     sanitize_path,
 101     sanitize_url,
 102     sanitized_Request,
 103     std_headers,
 104     STR_FORMAT_RE_TMPL,
 105     STR_FORMAT_TYPES,
 106     str_or_none,
 107     strftime_or_none,
 108     subtitles_filename,
 109     supports_terminal_sequences,
 110     TERMINAL_SEQUENCES,
 111     ThrottledDownload,
 112     to_high_limit_path,
 113     traverse_obj,
 114     try_get,
 115     UnavailableVideoError,
 116     url_basename,
 117     variadic,
 118     version_tuple,
 119     write_json_file,
 120     write_string,
 121     YoutubeDLCookieProcessor,
 122     YoutubeDLHandler,
 123     YoutubeDLRedirectHandler,
 124 )
 125 from .cache import Cache
 126 from .extractor import (
 127     gen_extractor_classes,
 128     get_info_extractor,
 129     _LAZY_LOADER,
 130     _PLUGIN_CLASSES as plugin_extractors
 131 )
 132 from .extractor.openload import PhantomJSwrapper
 133 from .downloader import (
 134     FFmpegFD,
 135     get_suitable_downloader,
 136     shorten_protocol_name
 137 )
 138 from .downloader.rtmp import rtmpdump_version
 139 from .postprocessor import (
 140     get_postprocessor,
 141     EmbedThumbnailPP,
 142     FFmpegFixupDurationPP,
 143     FFmpegFixupM3u8PP,
 144     FFmpegFixupM4aPP,
 145     FFmpegFixupStretchedPP,
 146     FFmpegFixupTimestampPP,
 147     FFmpegMergerPP,
 148     FFmpegPostProcessor,
 149     MoveFilesAfterDownloadPP,
 150     _PLUGIN_CLASSES as plugin_postprocessors
 151 )
 152 from .update import detect_variant
 153 from .version import __version__
 154
 155 if compat_os_name == 'nt':
 156     import ctypes
 157
 158
 159 class YoutubeDL(object):
 160     """YoutubeDL class.
 161
 162     YoutubeDL objects are the ones responsible of downloading the
 163     actual video file and writing it to disk if the user has requested
 164     it, among some other tasks. In most cases there should be one per
 165     program. As, given a video URL, the downloader doesn't know how to
 166     extract all the needed information, task that InfoExtractors do, it
 167     has to pass the URL to one of them.
 168
 169     For this, YoutubeDL objects have a method that allows
 170     InfoExtractors to be registered in a given order. When it is passed
 171     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 172     finds that reports being able to handle it. The InfoExtractor extracts
 173     all the information about the video or videos the URL refers to, and
 174     YoutubeDL process the extracted information, possibly using a File
 175     Downloader to download the video.
 176
 177     YoutubeDL objects accept a lot of parameters. In order not to saturate
 178     the object constructor with arguments, it receives a dictionary of
 179     options instead. These options are available through the params
 180     attribute for the InfoExtractors to use. The YoutubeDL also
 181     registers itself as the downloader in charge for the InfoExtractors
 182     that are added to it, so this is a "mutual registration".
 183
 184     Available options:
 185
 186     username:          Username for authentication purposes.
 187     password:          Password for authentication purposes.
 188     videopassword:     Password for accessing a video.
 189     ap_mso:            Adobe Pass multiple-system operator identifier.
 190     ap_username:       Multiple-system operator account username.
 191     ap_password:       Multiple-system operator account password.
 192     usenetrc:          Use netrc for authentication instead.
 193     verbose:           Print additional info to stdout.
 194     quiet:             Do not print messages to stdout.
 195     no_warnings:       Do not print out anything for warnings.
 196     forceprint:        A list of templates to force print
 197     forceurl:          Force printing final URL. (Deprecated)
 198     forcetitle:        Force printing title. (Deprecated)
 199     forceid:           Force printing ID. (Deprecated)
 200     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 201     forcedescription:  Force printing description. (Deprecated)
 202     forcefilename:     Force printing final filename. (Deprecated)
 203     forceduration:     Force printing duration. (Deprecated)
 204     forcejson:         Force printing info_dict as JSON.
 205     dump_single_json:  Force printing the info_dict of the whole playlist
 206                        (or video) as a single JSON line.
 207     force_write_download_archive: Force writing download archive regardless
 208                        of 'skip_download' or 'simulate'.
 209     simulate:          Do not download the video files. If unset (or None),
 210                        simulate only if listsubtitles, listformats or list_thumbnails is used
 211     format:            Video format code. see "FORMAT SELECTION" for more details.
 212     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 213     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 214                        extracting metadata even if the video is not actually
 215                        available for download (experimental)
 216     format_sort:       How to sort the video formats. see "Sorting Formats"
 217                        for more details.
 218     format_sort_force: Force the given format_sort. see "Sorting Formats"
 219                        for more details.
 220     allow_multiple_video_streams:   Allow multiple video streams to be merged
 221                        into a single file
 222     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 223                        into a single file
 224     check_formats      Whether to test if the formats are downloadable.
 225                        Can be True (check all), False (check none)
 226                        or None (check only if requested by extractor)
 227     paths:             Dictionary of output paths. The allowed keys are 'home'
 228                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 229     outtmpl:           Dictionary of templates for output names. Allowed keys
 230                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 231                        For compatibility with youtube-dl, a single string can also be used
 232     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 233     restrictfilenames: Do not allow "&" and spaces in file names
 234     trim_file_name:    Limit length of filename (extension excluded)
 235     windowsfilenames:  Force the filenames to be windows compatible
 236     ignoreerrors:      Do not stop on download/postprocessing errors.
 237                        Can be 'only_download' to ignore only download errors.
 238                        Default is 'only_download' for CLI, but False for API
 239     skip_playlist_after_errors: Number of allowed failures until the rest of
 240                        the playlist is skipped
 241     force_generic_extractor: Force downloader to use the generic extractor
 242     overwrites:        Overwrite all video and metadata files if True,
 243                        overwrite only non-video files if None
 244                        and don't overwrite any file if False
 245                        For compatibility with youtube-dl,
 246                        "nooverwrites" may also be used instead
 247     playliststart:     Playlist item to start at.
 248     playlistend:       Playlist item to end at.
 249     playlist_items:    Specific indices of playlist to download.
 250     playlistreverse:   Download playlist items in reverse order.
 251     playlistrandom:    Download playlist items in random order.
 252     matchtitle:        Download only matching titles.
 253     rejecttitle:       Reject downloads for matching titles.
 254     logger:            Log messages to a logging.Logger instance.
 255     logtostderr:       Log messages to stderr instead of stdout.
 256     consoletitle:       Display progress in console window's titlebar.
 257     writedescription:  Write the video description to a .description file
 258     writeinfojson:     Write the video description to a .info.json file
 259     clean_infojson:    Remove private fields from the infojson
 260     getcomments:       Extract video comments. This will not be written to disk
 261                        unless writeinfojson is also given
 262     writeannotations:  Write the video annotations to a .annotations.xml file
 263     writethumbnail:    Write the thumbnail image to a file
 264     allow_playlist_files: Whether to write playlists' description, infojson etc
 265                        also to disk when using the 'write*' options
 266     write_all_thumbnails:  Write all thumbnail formats to files
 267     writelink:         Write an internet shortcut file, depending on the
 268                        current platform (.url/.webloc/.desktop)
 269     writeurllink:      Write a Windows internet shortcut file (.url)
 270     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 271     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 272     writesubtitles:    Write the video subtitles to a file
 273     writeautomaticsub: Write the automatically generated subtitles to a file
 274     allsubtitles:      Deprecated - Use subtitleslangs = ['all']
 275                        Downloads all the subtitles of the video
 276                        (requires writesubtitles or writeautomaticsub)
 277     listsubtitles:     Lists all available subtitles for the video
 278     subtitlesformat:   The format code for subtitles
 279     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 280                        The list may contain "all" to refer to all the available
 281                        subtitles. The language can be prefixed with a "-" to
 282                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 283     keepvideo:         Keep the video file after post-processing
 284     daterange:         A DateRange object, download only if the upload_date is in the range.
 285     skip_download:     Skip the actual download of the video file
 286     cachedir:          Location of the cache files in the filesystem.
 287                        False to disable filesystem cache.
 288     noplaylist:        Download single video instead of a playlist if in doubt.
 289     age_limit:         An integer representing the user's age in years.
 290                        Unsuitable videos for the given age are skipped.
 291     min_views:         An integer representing the minimum view count the video
 292                        must have in order to not be skipped.
 293                        Videos without view count information are always
 294                        downloaded. None for no limit.
 295     max_views:         An integer representing the maximum view count.
 296                        Videos that are more popular than that are not
 297                        downloaded.
 298                        Videos without view count information are always
 299                        downloaded. None for no limit.
 300     download_archive:  File name of a file where all downloads are recorded.
 301                        Videos already present in the file are not downloaded
 302                        again.
 303     break_on_existing: Stop the download process after attempting to download a
 304                        file that is in the archive.
 305     break_on_reject:   Stop the download process when encountering a video that
 306                        has been filtered out.
 307     cookiefile:        File name where cookies should be read from and dumped to
 308     cookiesfrombrowser: A tuple containing the name of the browser and the profile
 309                        name/path from where cookies are loaded.
 310                        Eg: ('chrome', ) or (vivaldi, 'default')
 311     nocheckcertificate:Do not verify SSL certificates
 312     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 313                        At the moment, this is only supported by YouTube.
 314     proxy:             URL of the proxy server to use
 315     geo_verification_proxy:  URL of the proxy to use for IP address verification
 316                        on geo-restricted sites.
 317     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 318     bidi_workaround:   Work around buggy terminals without bidirectional text
 319                        support, using fridibi
 320     debug_printtraffic:Print out sent and received HTTP traffic
 321     include_ads:       Download ads as well
 322     default_search:    Prepend this string if an input url is not valid.
 323                        'auto' for elaborate guessing
 324     encoding:          Use this encoding instead of the system-specified.
 325     extract_flat:      Do not resolve URLs, return the immediate result.
 326                        Pass in 'in_playlist' to only show this behavior for
 327                        playlist items.
 328     postprocessors:    A list of dictionaries, each with an entry
 329                        * key:  The name of the postprocessor. See
 330                                yt_dlp/postprocessor/__init__.py for a list.
 331                        * when: When to run the postprocessor. Can be one of
 332                                pre_process|before_dl|post_process|after_move.
 333                                Assumed to be 'post_process' if not given
 334     post_hooks:        Deprecated - Register a custom postprocessor instead
 335                        A list of functions that get called as the final step
 336                        for each video file, after all postprocessors have been
 337                        called. The filename will be passed as the only argument.
 338     progress_hooks:    A list of functions that get called on download
 339                        progress, with a dictionary with the entries
 340                        * status: One of "downloading", "error", or "finished".
 341                                  Check this first and ignore unknown values.
 342                        * info_dict: The extracted info_dict
 343
 344                        If status is one of "downloading", or "finished", the
 345                        following properties may also be present:
 346                        * filename: The final filename (always present)
 347                        * tmpfilename: The filename we're currently writing to
 348                        * downloaded_bytes: Bytes on disk
 349                        * total_bytes: Size of the whole file, None if unknown
 350                        * total_bytes_estimate: Guess of the eventual file size,
 351                                                None if unavailable.
 352                        * elapsed: The number of seconds since download started.
 353                        * eta: The estimated time in seconds, None if unknown
 354                        * speed: The download speed in bytes/second, None if
 355                                 unknown
 356                        * fragment_index: The counter of the currently
 357                                          downloaded video fragment.
 358                        * fragment_count: The number of fragments (= individual
 359                                          files that will be merged)
 360
 361                        Progress hooks are guaranteed to be called at least once
 362                        (with status "finished") if the download is successful.
 363     postprocessor_hooks:  A list of functions that get called on postprocessing
 364                        progress, with a dictionary with the entries
 365                        * status: One of "started", "processing", or "finished".
 366                                  Check this first and ignore unknown values.
 367                        * postprocessor: Name of the postprocessor
 368                        * info_dict: The extracted info_dict
 369
 370                        Progress hooks are guaranteed to be called at least twice
 371                        (with status "started" and "finished") if the processing is successful.
 372     merge_output_format: Extension to use when merging formats.
 373     final_ext:         Expected final extension; used to detect when the file was
 374                        already downloaded and converted. "merge_output_format" is
 375                        replaced by this extension when given
 376     fixup:             Automatically correct known faults of the file.
 377                        One of:
 378                        - "never": do nothing
 379                        - "warn": only emit a warning
 380                        - "detect_or_warn": check whether we can do anything
 381                                            about it, warn otherwise (default)
 382     source_address:    Client-side IP address to bind to.
 383     call_home:         Boolean, true iff we are allowed to contact the
 384                        yt-dlp servers for debugging. (BROKEN)
 385     sleep_interval_requests: Number of seconds to sleep between requests
 386                        during extraction
 387     sleep_interval:    Number of seconds to sleep before each download when
 388                        used alone or a lower bound of a range for randomized
 389                        sleep before each download (minimum possible number
 390                        of seconds to sleep) when used along with
 391                        max_sleep_interval.
 392     max_sleep_interval:Upper bound of a range for randomized sleep before each
 393                        download (maximum possible number of seconds to sleep).
 394                        Must only be used along with sleep_interval.
 395                        Actual sleep time will be a random float from range
 396                        [sleep_interval; max_sleep_interval].
 397     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 398     listformats:       Print an overview of available video formats and exit.
 399     list_thumbnails:   Print a table of all thumbnails and exit.
 400     match_filter:      A function that gets called with the info_dict of
 401                        every video.
 402                        If it returns a message, the video is ignored.
 403                        If it returns None, the video is downloaded.
 404                        match_filter_func in utils.py is one example for this.
 405     no_color:          Do not emit color codes in output.
 406     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 407                        HTTP header
 408     geo_bypass_country:
 409                        Two-letter ISO 3166-2 country code that will be used for
 410                        explicit geographic restriction bypassing via faking
 411                        X-Forwarded-For HTTP header
 412     geo_bypass_ip_block:
 413                        IP range in CIDR notation that will be used similarly to
 414                        geo_bypass_country
 415
 416     The following options determine which downloader is picked:
 417     external_downloader: A dictionary of protocol keys and the executable of the
 418                        external downloader to use for it. The allowed protocols
 419                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 420                        Set the value to 'native' to use the native downloader
 421     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 422                        or {'m3u8': 'ffmpeg'} instead.
 423                        Use the native HLS downloader instead of ffmpeg/avconv
 424                        if True, otherwise use ffmpeg/avconv if False, otherwise
 425                        use downloader suggested by extractor if None.
 426     compat_opts:       Compatibility options. See "Differences in default behavior".
 427                        The following options do not work when used through the API:
 428                        filename, abort-on-error, multistreams, no-live-chat, format-sort
 429                        no-clean-infojson, no-playlist-metafiles, no-keep-subs.
 430                        Refer __init__.py for their implementation
 431     progress_template: Dictionary of templates for progress outputs.
 432                        Allowed keys are 'download', 'postprocess',
 433                        'download-title' (console title) and 'postprocess-title'.
 434                        The template is mapped on a dictionary with keys 'progress' and 'info'
 435
 436     The following parameters are not used by YoutubeDL itself, they are used by
 437     the downloader (see yt_dlp/downloader/common.py):
 438     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 439     max_filesize, test, noresizebuffer, retries, fragment_retries, continuedl,
 440     noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
 441     external_downloader_args.
 442
 443     The following options are used by the post processors:
 444     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 445                        otherwise prefer ffmpeg. (avconv support is deprecated)
 446     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 447                        to the binary or its containing directory.
 448     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 449                        and a list of additional command-line arguments for the
 450                        postprocessor/executable. The dict can also have "PP+EXE" keys
 451                        which are used when the given exe is used by the given PP.
 452                        Use 'default' as the name for arguments to passed to all PP
 453                        For compatibility with youtube-dl, a single list of args
 454                        can also be used
 455
 456     The following options are used by the extractors:
 457     extractor_retries: Number of times to retry for known errors
 458     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 459     hls_split_discontinuity: Split HLS playlists to different formats at
 460                        discontinuities such as ad breaks (default: False)
 461     extractor_args:    A dictionary of arguments to be passed to the extractors.
 462                        See "EXTRACTOR ARGUMENTS" for details.
 463                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 464     youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
 465                        If True (default), DASH manifests and related
 466                        data will be downloaded and processed by extractor.
 467                        You can reduce network I/O by disabling it if you don't
 468                        care about DASH. (only for youtube)
 469     youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
 470                        If True (default), HLS manifests and related
 471                        data will be downloaded and processed by extractor.
 472                        You can reduce network I/O by disabling it if you don't
 473                        care about HLS. (only for youtube)
 474     """
 475
 476     _NUMERIC_FIELDS = set((
 477         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 478         'timestamp', 'release_timestamp',
 479         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 480         'average_rating', 'comment_count', 'age_limit',
 481         'start_time', 'end_time',
 482         'chapter_number', 'season_number', 'episode_number',
 483         'track_number', 'disc_number', 'release_year',
 484     ))
 485
 486     _format_selection_exts = {
 487         'audio': {'m4a', 'mp3', 'ogg', 'aac'},
 488         'video': {'mp4', 'flv', 'webm', '3gp'},
 489         'storyboards': {'mhtml'},
 490     }
 491
 492     params = None
 493     _ies = {}
 494     _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 495     _printed_messages = set()
 496     _first_webpage_request = True
 497     _download_retcode = None
 498     _num_downloads = None
 499     _playlist_level = 0
 500     _playlist_urls = set()
 501     _screen_file = None
 502
 503     def __init__(self, params=None, auto_init=True):
 504         """Create a FileDownloader object with the given options.
 505         @param auto_init    Whether to load the default extractors and print header (if verbose).
 506                             Set to 'no_verbose_header' to not ptint the header
 507         """
 508         if params is None:
 509             params = {}
 510         self._ies = {}
 511         self._ies_instances = {}
 512         self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 513         self._printed_messages = set()
 514         self._first_webpage_request = True
 515         self._post_hooks = []
 516         self._progress_hooks = []
 517         self._postprocessor_hooks = []
 518         self._download_retcode = 0
 519         self._num_downloads = 0
 520         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 521         self._err_file = sys.stderr
 522         self.params = params
 523         self.cache = Cache(self)
 524
 525         windows_enable_vt_mode()
 526         # FIXME: This will break if we ever print color to stdout
 527         self.params['no_color'] = self.params.get('no_color') or not supports_terminal_sequences(self._err_file)
 528
 529         if sys.version_info < (3, 6):
 530             self.report_warning(
 531                 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 532
 533         if self.params.get('allow_unplayable_formats'):
 534             self.report_warning(
 535                 f'You have asked for {self._color_text("unplayable formats", "blue")} to be listed/downloaded. '
 536                 'This is a developer option intended for debugging. \n'
 537                 '         If you experience any issues while using this option, '
 538                 f'{self._color_text("DO NOT", "red")} open a bug report')
 539
 540         def check_deprecated(param, option, suggestion):
 541             if self.params.get(param) is not None:
 542                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 543                 return True
 544             return False
 545
 546         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 547             if self.params.get('geo_verification_proxy') is None:
 548                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 549
 550         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 551         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 552         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 553
 554         for msg in self.params.get('warnings', []):
 555             self.report_warning(msg)
 556
 557         if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
 558             # nooverwrites was unnecessarily changed to overwrites
 559             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 560             # This ensures compatibility with both keys
 561             self.params['overwrites'] = not self.params['nooverwrites']
 562         elif self.params.get('overwrites') is None:
 563             self.params.pop('overwrites', None)
 564         else:
 565             self.params['nooverwrites'] = not self.params['overwrites']
 566
 567         if params.get('bidi_workaround', False):
 568             try:
 569                 import pty
 570                 master, slave = pty.openpty()
 571                 width = compat_get_terminal_size().columns
 572                 if width is None:
 573                     width_args = []
 574                 else:
 575                     width_args = ['-w', str(width)]
 576                 sp_kwargs = dict(
 577                     stdin=subprocess.PIPE,
 578                     stdout=slave,
 579                     stderr=self._err_file)
 580                 try:
 581                     self._output_process = subprocess.Popen(
 582                         ['bidiv'] + width_args, **sp_kwargs
 583                     )
 584                 except OSError:
 585                     self._output_process = subprocess.Popen(
 586                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 587                 self._output_channel = os.fdopen(master, 'rb')
 588             except OSError as ose:
 589                 if ose.errno == errno.ENOENT:
 590                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 591                 else:
 592                     raise
 593
 594         if (sys.platform != 'win32'
 595                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 596                 and not params.get('restrictfilenames', False)):
 597             # Unicode filesystem API will throw errors (#1474, #13027)
 598             self.report_warning(
 599                 'Assuming --restrict-filenames since file system encoding '
 600                 'cannot encode all characters. '
 601                 'Set the LC_ALL environment variable to fix this.')
 602             self.params['restrictfilenames'] = True
 603
 604         self.outtmpl_dict = self.parse_outtmpl()
 605
 606         # Creating format selector here allows us to catch syntax errors before the extraction
 607         self.format_selector = (
 608             None if self.params.get('format') is None
 609             else self.build_format_selector(self.params['format']))
 610
 611         self._setup_opener()
 612
 613         if auto_init:
 614             if auto_init != 'no_verbose_header':
 615                 self.print_debug_header()
 616             self.add_default_info_extractors()
 617
 618         for pp_def_raw in self.params.get('postprocessors', []):
 619             pp_def = dict(pp_def_raw)
 620             when = pp_def.pop('when', 'post_process')
 621             pp_class = get_postprocessor(pp_def.pop('key'))
 622             pp = pp_class(self, **compat_kwargs(pp_def))
 623             self.add_post_processor(pp, when=when)
 624
 625         for ph in self.params.get('post_hooks', []):
 626             self.add_post_hook(ph)
 627
 628         for ph in self.params.get('progress_hooks', []):
 629             self.add_progress_hook(ph)
 630
 631         register_socks_protocols()
 632
 633         def preload_download_archive(fn):
 634             """Preload the archive, if any is specified"""
 635             if fn is None:
 636                 return False
 637             self.write_debug('Loading archive file %r\n' % fn)
 638             try:
 639                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 640                     for line in archive_file:
 641                         self.archive.add(line.strip())
 642             except IOError as ioe:
 643                 if ioe.errno != errno.ENOENT:
 644                     raise
 645                 return False
 646             return True
 647
 648         self.archive = set()
 649         preload_download_archive(self.params.get('download_archive'))
 650
 651     def warn_if_short_id(self, argv):
 652         # short YouTube ID starting with dash?
 653         idxs = [
 654             i for i, a in enumerate(argv)
 655             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 656         if idxs:
 657             correct_argv = (
 658                 ['yt-dlp']
 659                 + [a for i, a in enumerate(argv) if i not in idxs]
 660                 + ['--'] + [argv[i] for i in idxs]
 661             )
 662             self.report_warning(
 663                 'Long argument string detected. '
 664                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 665                 args_to_str(correct_argv))
 666
 667     def add_info_extractor(self, ie):
 668         """Add an InfoExtractor object to the end of the list."""
 669         ie_key = ie.ie_key()
 670         self._ies[ie_key] = ie
 671         if not isinstance(ie, type):
 672             self._ies_instances[ie_key] = ie
 673             ie.set_downloader(self)
 674
 675     def _get_info_extractor_class(self, ie_key):
 676         ie = self._ies.get(ie_key)
 677         if ie is None:
 678             ie = get_info_extractor(ie_key)
 679             self.add_info_extractor(ie)
 680         return ie
 681
 682     def get_info_extractor(self, ie_key):
 683         """
 684         Get an instance of an IE with name ie_key, it will try to get one from
 685         the _ies list, if there's no instance it will create a new one and add
 686         it to the extractor list.
 687         """
 688         ie = self._ies_instances.get(ie_key)
 689         if ie is None:
 690             ie = get_info_extractor(ie_key)()
 691             self.add_info_extractor(ie)
 692         return ie
 693
 694     def add_default_info_extractors(self):
 695         """
 696         Add the InfoExtractors returned by gen_extractors to the end of the list
 697         """
 698         for ie in gen_extractor_classes():
 699             self.add_info_extractor(ie)
 700
 701     def add_post_processor(self, pp, when='post_process'):
 702         """Add a PostProcessor object to the end of the chain."""
 703         self._pps[when].append(pp)
 704         pp.set_downloader(self)
 705
 706     def add_post_hook(self, ph):
 707         """Add the post hook"""
 708         self._post_hooks.append(ph)
 709
 710     def add_progress_hook(self, ph):
 711         """Add the download progress hook"""
 712         self._progress_hooks.append(ph)
 713
 714     def add_postprocessor_hook(self, ph):
 715         """Add the postprocessing progress hook"""
 716         self._postprocessor_hooks.append(ph)
 717
 718     def _bidi_workaround(self, message):
 719         if not hasattr(self, '_output_channel'):
 720             return message
 721
 722         assert hasattr(self, '_output_process')
 723         assert isinstance(message, compat_str)
 724         line_count = message.count('\n') + 1
 725         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 726         self._output_process.stdin.flush()
 727         res = ''.join(self._output_channel.readline().decode('utf-8')
 728                       for _ in range(line_count))
 729         return res[:-len('\n')]
 730
 731     def _write_string(self, message, out=None, only_once=False):
 732         if only_once:
 733             if message in self._printed_messages:
 734                 return
 735             self._printed_messages.add(message)
 736         write_string(message, out=out, encoding=self.params.get('encoding'))
 737
 738     def to_stdout(self, message, skip_eol=False, quiet=False):
 739         """Print message to stdout"""
 740         if self.params.get('logger'):
 741             self.params['logger'].debug(message)
 742         elif not quiet or self.params.get('verbose'):
 743             self._write_string(
 744                 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 745                 self._err_file if quiet else self._screen_file)
 746
 747     def to_stderr(self, message, only_once=False):
 748         """Print message to stderr"""
 749         assert isinstance(message, compat_str)
 750         if self.params.get('logger'):
 751             self.params['logger'].error(message)
 752         else:
 753             self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
 754
 755     def to_console_title(self, message):
 756         if not self.params.get('consoletitle', False):
 757             return
 758         if compat_os_name == 'nt':
 759             if ctypes.windll.kernel32.GetConsoleWindow():
 760                 # c_wchar_p() might not be necessary if `message` is
 761                 # already of type unicode()
 762                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 763         elif 'TERM' in os.environ:
 764             self._write_string('\033]0;%s\007' % message, self._screen_file)
 765
 766     def save_console_title(self):
 767         if not self.params.get('consoletitle', False):
 768             return
 769         if self.params.get('simulate'):
 770             return
 771         if compat_os_name != 'nt' and 'TERM' in os.environ:
 772             # Save the title on stack
 773             self._write_string('\033[22;0t', self._screen_file)
 774
 775     def restore_console_title(self):
 776         if not self.params.get('consoletitle', False):
 777             return
 778         if self.params.get('simulate'):
 779             return
 780         if compat_os_name != 'nt' and 'TERM' in os.environ:
 781             # Restore the title from stack
 782             self._write_string('\033[23;0t', self._screen_file)
 783
 784     def __enter__(self):
 785         self.save_console_title()
 786         return self
 787
 788     def __exit__(self, *args):
 789         self.restore_console_title()
 790
 791         if self.params.get('cookiefile') is not None:
 792             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 793
 794     def trouble(self, message=None, tb=None):
 795         """Determine action to take when a download problem appears.
 796
 797         Depending on if the downloader has been configured to ignore
 798         download errors or not, this method may throw an exception or
 799         not when errors are found, after printing the message.
 800
 801         tb, if given, is additional traceback information.
 802         """
 803         if message is not None:
 804             self.to_stderr(message)
 805         if self.params.get('verbose'):
 806             if tb is None:
 807                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 808                     tb = ''
 809                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 810                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 811                     tb += encode_compat_str(traceback.format_exc())
 812                 else:
 813                     tb_data = traceback.format_list(traceback.extract_stack())
 814                     tb = ''.join(tb_data)
 815             if tb:
 816                 self.to_stderr(tb)
 817         if not self.params.get('ignoreerrors'):
 818             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 819                 exc_info = sys.exc_info()[1].exc_info
 820             else:
 821                 exc_info = sys.exc_info()
 822             raise DownloadError(message, exc_info)
 823         self._download_retcode = 1
 824
 825     def to_screen(self, message, skip_eol=False):
 826         """Print message to stdout if not in quiet mode"""
 827         self.to_stdout(
 828             message, skip_eol, quiet=self.params.get('quiet', False))
 829
 830     def _color_text(self, text, color):
 831         if self.params.get('no_color'):
 832             return text
 833         return f'{TERMINAL_SEQUENCES[color.upper()]}{text}{TERMINAL_SEQUENCES["RESET_STYLE"]}'
 834
 835     def report_warning(self, message, only_once=False):
 836         '''
 837         Print the message to stderr, it will be prefixed with 'WARNING:'
 838         If stderr is a tty file the 'WARNING:' will be colored
 839         '''
 840         if self.params.get('logger') is not None:
 841             self.params['logger'].warning(message)
 842         else:
 843             if self.params.get('no_warnings'):
 844                 return
 845             self.to_stderr(f'{self._color_text("WARNING:", "yellow")} {message}', only_once)
 846
 847     def report_error(self, message, tb=None):
 848         '''
 849         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 850         in red if stderr is a tty file.
 851         '''
 852         self.trouble(f'{self._color_text("ERROR:", "red")} {message}', tb)
 853
 854     def write_debug(self, message, only_once=False):
 855         '''Log debug message or Print message to stderr'''
 856         if not self.params.get('verbose', False):
 857             return
 858         message = '[debug] %s' % message
 859         if self.params.get('logger'):
 860             self.params['logger'].debug(message)
 861         else:
 862             self.to_stderr(message, only_once)
 863
 864     def report_file_already_downloaded(self, file_name):
 865         """Report file has already been fully downloaded."""
 866         try:
 867             self.to_screen('[download] %s has already been downloaded' % file_name)
 868         except UnicodeEncodeError:
 869             self.to_screen('[download] The file has already been downloaded')
 870
 871     def report_file_delete(self, file_name):
 872         """Report that existing file will be deleted."""
 873         try:
 874             self.to_screen('Deleting existing file %s' % file_name)
 875         except UnicodeEncodeError:
 876             self.to_screen('Deleting existing file')
 877
 878     def raise_no_formats(self, info, forced=False):
 879         has_drm = info.get('__has_drm')
 880         msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
 881         expected = self.params.get('ignore_no_formats_error')
 882         if forced or not expected:
 883             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
 884                                  expected=has_drm or expected)
 885         else:
 886             self.report_warning(msg)
 887
 888     def parse_outtmpl(self):
 889         outtmpl_dict = self.params.get('outtmpl', {})
 890         if not isinstance(outtmpl_dict, dict):
 891             outtmpl_dict = {'default': outtmpl_dict}
 892         # Remove spaces in the default template
 893         if self.params.get('restrictfilenames'):
 894             sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
 895         else:
 896             sanitize = lambda x: x
 897         outtmpl_dict.update({
 898             k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
 899             if outtmpl_dict.get(k) is None})
 900         for key, val in outtmpl_dict.items():
 901             if isinstance(val, bytes):
 902                 self.report_warning(
 903                     'Parameter outtmpl is bytes, but should be a unicode string. '
 904                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 905         return outtmpl_dict
 906
 907     def get_output_path(self, dir_type='', filename=None):
 908         paths = self.params.get('paths', {})
 909         assert isinstance(paths, dict)
 910         path = os.path.join(
 911             expand_path(paths.get('home', '').strip()),
 912             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
 913             filename or '')
 914
 915         # Temporary fix for #4787
 916         # 'Treat' all problem characters by passing filename through preferredencoding
 917         # to workaround encoding issues with subprocess on python2 @ Windows
 918         if sys.version_info < (3, 0) and sys.platform == 'win32':
 919             path = encodeFilename(path, True).decode(preferredencoding())
 920         return sanitize_path(path, force=self.params.get('windowsfilenames'))
 921
 922     @staticmethod
 923     def _outtmpl_expandpath(outtmpl):
 924         # expand_path translates '%%' into '%' and '$$' into '$'
 925         # correspondingly that is not what we want since we need to keep
 926         # '%%' intact for template dict substitution step. Working around
 927         # with boundary-alike separator hack.
 928         sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 929         outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 930
 931         # outtmpl should be expand_path'ed before template dict substitution
 932         # because meta fields may contain env variables we don't want to
 933         # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 934         # title "Hello $PATH", we don't want `$PATH` to be expanded.
 935         return expand_path(outtmpl).replace(sep, '')
 936
 937     @staticmethod
 938     def escape_outtmpl(outtmpl):
 939         ''' Escape any remaining strings like %s, %abc% etc. '''
 940         return re.sub(
 941             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
 942             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
 943             outtmpl)
 944
 945     @classmethod
 946     def validate_outtmpl(cls, outtmpl):
 947         ''' @return None or Exception object '''
 948         outtmpl = re.sub(
 949             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBU]'),
 950             lambda mobj: f'{mobj.group(0)[:-1]}s',
 951             cls._outtmpl_expandpath(outtmpl))
 952         try:
 953             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
 954             return None
 955         except ValueError as err:
 956             return err
 957
 958     @staticmethod
 959     def _copy_infodict(info_dict):
 960         info_dict = dict(info_dict)
 961         for key in ('__original_infodict', '__postprocessors'):
 962             info_dict.pop(key, None)
 963         return info_dict
 964
 965     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
 966         """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict """
 967         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
 968
 969         info_dict = self._copy_infodict(info_dict)
 970         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
 971             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
 972             if info_dict.get('duration', None) is not None
 973             else None)
 974         info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
 975         if info_dict.get('resolution') is None:
 976             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
 977
 978         # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
 979         # of %(field)s to %(field)0Nd for backward compatibility
 980         field_size_compat_map = {
 981             'playlist_index': len(str(info_dict.get('_last_playlist_index') or '')),
 982             'playlist_autonumber': len(str(info_dict.get('n_entries') or '')),
 983             'autonumber': self.params.get('autonumber_size') or 5,
 984         }
 985
 986         TMPL_DICT = {}
 987         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBU]'))
 988         MATH_FUNCTIONS = {
 989             '+': float.__add__,
 990             '-': float.__sub__,
 991         }
 992         # Field is of the form key1.key2...
 993         # where keys (except first) can be string, int or slice
 994         FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
 995         MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
 996         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
 997         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
 998             (?P<negate>-)?
 999             (?P<fields>{field})
1000             (?P<maths>(?:{math_op}{math_field})*)
1001             (?:>(?P<strf_format>.+?))?
1002             (?P<alternate>(?<!\\),[^|)]+)?
1003             (?:\|(?P<default>.*?))?
1004             $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
1005
1006         def _traverse_infodict(k):
1007             k = k.split('.')
1008             if k[0] == '':
1009                 k.pop(0)
1010             return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
1011
1012         def get_value(mdict):
1013             # Object traversal
1014             value = _traverse_infodict(mdict['fields'])
1015             # Negative
1016             if mdict['negate']:
1017                 value = float_or_none(value)
1018                 if value is not None:
1019                     value *= -1
1020             # Do maths
1021             offset_key = mdict['maths']
1022             if offset_key:
1023                 value = float_or_none(value)
1024                 operator = None
1025                 while offset_key:
1026                     item = re.match(
1027                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1028                         offset_key).group(0)
1029                     offset_key = offset_key[len(item):]
1030                     if operator is None:
1031                         operator = MATH_FUNCTIONS[item]
1032                         continue
1033                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1034                     offset = float_or_none(item)
1035                     if offset is None:
1036                         offset = float_or_none(_traverse_infodict(item))
1037                     try:
1038                         value = operator(value, multiplier * offset)
1039                     except (TypeError, ZeroDivisionError):
1040                         return None
1041                     operator = None
1042             # Datetime formatting
1043             if mdict['strf_format']:
1044                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1045
1046             return value
1047
1048         na = self.params.get('outtmpl_na_placeholder', 'NA')
1049
1050         def _dumpjson_default(obj):
1051             if isinstance(obj, (set, LazyList)):
1052                 return list(obj)
1053             raise TypeError(f'Object of type {type(obj).__name__} is not JSON serializable')
1054
1055         def create_key(outer_mobj):
1056             if not outer_mobj.group('has_key'):
1057                 return outer_mobj.group(0)
1058             key = outer_mobj.group('key')
1059             mobj = re.match(INTERNAL_FORMAT_RE, key)
1060             initial_field = mobj.group('fields').split('.')[-1] if mobj else ''
1061             value, default = None, na
1062             while mobj:
1063                 mobj = mobj.groupdict()
1064                 default = mobj['default'] if mobj['default'] is not None else default
1065                 value = get_value(mobj)
1066                 if value is None and mobj['alternate']:
1067                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
1068                 else:
1069                     break
1070
1071             fmt = outer_mobj.group('format')
1072             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1073                 fmt = '0{:d}d'.format(field_size_compat_map[key])
1074
1075             value = default if value is None else value
1076
1077             str_fmt = f'{fmt[:-1]}s'
1078             if fmt[-1] == 'l':  # list
1079                 delim = '\n' if '#' in (outer_mobj.group('conversion') or '') else ', '
1080                 value, fmt = delim.join(variadic(value)), str_fmt
1081             elif fmt[-1] == 'j':  # json
1082                 value, fmt = json.dumps(value, default=_dumpjson_default), str_fmt
1083             elif fmt[-1] == 'q':  # quoted
1084                 value, fmt = compat_shlex_quote(str(value)), str_fmt
1085             elif fmt[-1] == 'B':  # bytes
1086                 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1087                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1088             elif fmt[-1] == 'U':  # unicode normalized
1089                 opts = outer_mobj.group('conversion') or ''
1090                 value, fmt = unicodedata.normalize(
1091                     # "+" = compatibility equivalence, "#" = NFD
1092                     'NF%s%s' % ('K' if '+' in opts else '', 'D' if '#' in opts else 'C'),
1093                     value), str_fmt
1094             elif fmt[-1] == 'c':
1095                 if value:
1096                     value = str(value)[0]
1097                 else:
1098                     fmt = str_fmt
1099             elif fmt[-1] not in 'rs':  # numeric
1100                 value = float_or_none(value)
1101                 if value is None:
1102                     value, fmt = default, 's'
1103
1104             if sanitize:
1105                 if fmt[-1] == 'r':
1106                     # If value is an object, sanitize might convert it to a string
1107                     # So we convert it to repr first
1108                     value, fmt = repr(value), str_fmt
1109                 if fmt[-1] in 'csr':
1110                     value = sanitize(initial_field, value)
1111
1112             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1113             TMPL_DICT[key] = value
1114             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1115
1116         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1117
1118     def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1119         outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1120         return self.escape_outtmpl(outtmpl) % info_dict
1121
1122     def _prepare_filename(self, info_dict, tmpl_type='default'):
1123         try:
1124             sanitize = lambda k, v: sanitize_filename(
1125                 compat_str(v),
1126                 restricted=self.params.get('restrictfilenames'),
1127                 is_id=(k == 'id' or k.endswith('_id')))
1128             outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
1129             filename = self.evaluate_outtmpl(outtmpl, info_dict, sanitize)
1130
1131             force_ext = OUTTMPL_TYPES.get(tmpl_type)
1132             if filename and force_ext is not None:
1133                 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1134
1135             # https://github.com/blackjack4494/youtube-dlc/issues/85
1136             trim_file_name = self.params.get('trim_file_name', False)
1137             if trim_file_name:
1138                 fn_groups = filename.rsplit('.')
1139                 ext = fn_groups[-1]
1140                 sub_ext = ''
1141                 if len(fn_groups) > 2:
1142                     sub_ext = fn_groups[-2]
1143                 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
1144
1145             return filename
1146         except ValueError as err:
1147             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1148             return None
1149
1150     def prepare_filename(self, info_dict, dir_type='', warn=False):
1151         """Generate the output filename."""
1152
1153         filename = self._prepare_filename(info_dict, dir_type or 'default')
1154         if not filename and dir_type not in ('', 'temp'):
1155             return ''
1156
1157         if warn:
1158             if not self.params.get('paths'):
1159                 pass
1160             elif filename == '-':
1161                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1162             elif os.path.isabs(filename):
1163                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1164         if filename == '-' or not filename:
1165             return filename
1166
1167         return self.get_output_path(dir_type, filename)
1168
1169     def _match_entry(self, info_dict, incomplete=False, silent=False):
1170         """ Returns None if the file should be downloaded """
1171
1172         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1173
1174         def check_filter():
1175             if 'title' in info_dict:
1176                 # This can happen when we're just evaluating the playlist
1177                 title = info_dict['title']
1178                 matchtitle = self.params.get('matchtitle', False)
1179                 if matchtitle:
1180                     if not re.search(matchtitle, title, re.IGNORECASE):
1181                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1182                 rejecttitle = self.params.get('rejecttitle', False)
1183                 if rejecttitle:
1184                     if re.search(rejecttitle, title, re.IGNORECASE):
1185                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1186             date = info_dict.get('upload_date')
1187             if date is not None:
1188                 dateRange = self.params.get('daterange', DateRange())
1189                 if date not in dateRange:
1190                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1191             view_count = info_dict.get('view_count')
1192             if view_count is not None:
1193                 min_views = self.params.get('min_views')
1194                 if min_views is not None and view_count < min_views:
1195                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1196                 max_views = self.params.get('max_views')
1197                 if max_views is not None and view_count > max_views:
1198                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1199             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1200                 return 'Skipping "%s" because it is age restricted' % video_title
1201
1202             match_filter = self.params.get('match_filter')
1203             if match_filter is not None:
1204                 try:
1205                     ret = match_filter(info_dict, incomplete=incomplete)
1206                 except TypeError:
1207                     # For backward compatibility
1208                     ret = None if incomplete else match_filter(info_dict)
1209                 if ret is not None:
1210                     return ret
1211             return None
1212
1213         if self.in_download_archive(info_dict):
1214             reason = '%s has already been recorded in the archive' % video_title
1215             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1216         else:
1217             reason = check_filter()
1218             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1219         if reason is not None:
1220             if not silent:
1221                 self.to_screen('[download] ' + reason)
1222             if self.params.get(break_opt, False):
1223                 raise break_err()
1224         return reason
1225
1226     @staticmethod
1227     def add_extra_info(info_dict, extra_info):
1228         '''Set the keys from extra_info in info dict if they are missing'''
1229         for key, value in extra_info.items():
1230             info_dict.setdefault(key, value)
1231
1232     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1233                      process=True, force_generic_extractor=False):
1234         """
1235         Return a list with a dictionary for each video extracted.
1236
1237         Arguments:
1238         url -- URL to extract
1239
1240         Keyword arguments:
1241         download -- whether to download videos during extraction
1242         ie_key -- extractor key hint
1243         extra_info -- dictionary containing the extra values to add to each result
1244         process -- whether to resolve all unresolved references (URLs, playlist items),
1245             must be True for download to work.
1246         force_generic_extractor -- force using the generic extractor
1247         """
1248
1249         if extra_info is None:
1250             extra_info = {}
1251
1252         if not ie_key and force_generic_extractor:
1253             ie_key = 'Generic'
1254
1255         if ie_key:
1256             ies = {ie_key: self._get_info_extractor_class(ie_key)}
1257         else:
1258             ies = self._ies
1259
1260         for ie_key, ie in ies.items():
1261             if not ie.suitable(url):
1262                 continue
1263
1264             if not ie.working():
1265                 self.report_warning('The program functionality for this site has been marked as broken, '
1266                                     'and will probably not work.')
1267
1268             temp_id = ie.get_temp_id(url)
1269             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1270                 self.to_screen("[%s] %s: has already been recorded in archive" % (
1271                                ie_key, temp_id))
1272                 break
1273             return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1274         else:
1275             self.report_error('no suitable InfoExtractor for URL %s' % url)
1276
1277     def __handle_extraction_exceptions(func):
1278         @functools.wraps(func)
1279         def wrapper(self, *args, **kwargs):
1280             try:
1281                 return func(self, *args, **kwargs)
1282             except GeoRestrictedError as e:
1283                 msg = e.msg
1284                 if e.countries:
1285                     msg += '\nThis video is available in %s.' % ', '.join(
1286                         map(ISO3166Utils.short2full, e.countries))
1287                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1288                 self.report_error(msg)
1289             except ExtractorError as e:  # An error we somewhat expected
1290                 self.report_error(compat_str(e), e.format_traceback())
1291             except ThrottledDownload:
1292                 self.to_stderr('\r')
1293                 self.report_warning('The download speed is below throttle limit. Re-extracting data')
1294                 return wrapper(self, *args, **kwargs)
1295             except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached, LazyList.IndexError):
1296                 raise
1297             except Exception as e:
1298                 if self.params.get('ignoreerrors'):
1299                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
1300                 else:
1301                     raise
1302         return wrapper
1303
1304     @__handle_extraction_exceptions
1305     def __extract_info(self, url, ie, download, extra_info, process):
1306         ie_result = ie.extract(url)
1307         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1308             return
1309         if isinstance(ie_result, list):
1310             # Backwards compatibility: old IE result format
1311             ie_result = {
1312                 '_type': 'compat_list',
1313                 'entries': ie_result,
1314             }
1315         if extra_info.get('original_url'):
1316             ie_result.setdefault('original_url', extra_info['original_url'])
1317         self.add_default_extra_info(ie_result, ie, url)
1318         if process:
1319             return self.process_ie_result(ie_result, download, extra_info)
1320         else:
1321             return ie_result
1322
1323     def add_default_extra_info(self, ie_result, ie, url):
1324         if url is not None:
1325             self.add_extra_info(ie_result, {
1326                 'webpage_url': url,
1327                 'original_url': url,
1328                 'webpage_url_basename': url_basename(url),
1329             })
1330         if ie is not None:
1331             self.add_extra_info(ie_result, {
1332                 'extractor': ie.IE_NAME,
1333                 'extractor_key': ie.ie_key(),
1334             })
1335
1336     def process_ie_result(self, ie_result, download=True, extra_info=None):
1337         """
1338         Take the result of the ie(may be modified) and resolve all unresolved
1339         references (URLs, playlist items).
1340
1341         It will also download the videos if 'download'.
1342         Returns the resolved ie_result.
1343         """
1344         if extra_info is None:
1345             extra_info = {}
1346         result_type = ie_result.get('_type', 'video')
1347
1348         if result_type in ('url', 'url_transparent'):
1349             ie_result['url'] = sanitize_url(ie_result['url'])
1350             if ie_result.get('original_url'):
1351                 extra_info.setdefault('original_url', ie_result['original_url'])
1352
1353             extract_flat = self.params.get('extract_flat', False)
1354             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1355                     or extract_flat is True):
1356                 info_copy = ie_result.copy()
1357                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1358                 if ie and not ie_result.get('id'):
1359                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1360                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1361                 self.add_extra_info(info_copy, extra_info)
1362                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1363                 if self.params.get('force_write_download_archive', False):
1364                     self.record_download_archive(info_copy)
1365                 return ie_result
1366
1367         if result_type == 'video':
1368             self.add_extra_info(ie_result, extra_info)
1369             ie_result = self.process_video_result(ie_result, download=download)
1370             additional_urls = (ie_result or {}).get('additional_urls')
1371             if additional_urls:
1372                 # TODO: Improve MetadataParserPP to allow setting a list
1373                 if isinstance(additional_urls, compat_str):
1374                     additional_urls = [additional_urls]
1375                 self.to_screen(
1376                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1377                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1378                 ie_result['additional_entries'] = [
1379                     self.extract_info(
1380                         url, download, extra_info,
1381                         force_generic_extractor=self.params.get('force_generic_extractor'))
1382                     for url in additional_urls
1383                 ]
1384             return ie_result
1385         elif result_type == 'url':
1386             # We have to add extra_info to the results because it may be
1387             # contained in a playlist
1388             return self.extract_info(
1389                 ie_result['url'], download,
1390                 ie_key=ie_result.get('ie_key'),
1391                 extra_info=extra_info)
1392         elif result_type == 'url_transparent':
1393             # Use the information from the embedding page
1394             info = self.extract_info(
1395                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1396                 extra_info=extra_info, download=False, process=False)
1397
1398             # extract_info may return None when ignoreerrors is enabled and
1399             # extraction failed with an error, don't crash and return early
1400             # in this case
1401             if not info:
1402                 return info
1403
1404             force_properties = dict(
1405                 (k, v) for k, v in ie_result.items() if v is not None)
1406             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1407                 if f in force_properties:
1408                     del force_properties[f]
1409             new_result = info.copy()
1410             new_result.update(force_properties)
1411
1412             # Extracted info may not be a video result (i.e.
1413             # info.get('_type', 'video') != video) but rather an url or
1414             # url_transparent. In such cases outer metadata (from ie_result)
1415             # should be propagated to inner one (info). For this to happen
1416             # _type of info should be overridden with url_transparent. This
1417             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1418             if new_result.get('_type') == 'url':
1419                 new_result['_type'] = 'url_transparent'
1420
1421             return self.process_ie_result(
1422                 new_result, download=download, extra_info=extra_info)
1423         elif result_type in ('playlist', 'multi_video'):
1424             # Protect from infinite recursion due to recursively nested playlists
1425             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1426             webpage_url = ie_result['webpage_url']
1427             if webpage_url in self._playlist_urls:
1428                 self.to_screen(
1429                     '[download] Skipping already downloaded playlist: %s'
1430                     % ie_result.get('title') or ie_result.get('id'))
1431                 return
1432
1433             self._playlist_level += 1
1434             self._playlist_urls.add(webpage_url)
1435             self._sanitize_thumbnails(ie_result)
1436             try:
1437                 return self.__process_playlist(ie_result, download)
1438             finally:
1439                 self._playlist_level -= 1
1440                 if not self._playlist_level:
1441                     self._playlist_urls.clear()
1442         elif result_type == 'compat_list':
1443             self.report_warning(
1444                 'Extractor %s returned a compat_list result. '
1445                 'It needs to be updated.' % ie_result.get('extractor'))
1446
1447             def _fixup(r):
1448                 self.add_extra_info(r, {
1449                     'extractor': ie_result['extractor'],
1450                     'webpage_url': ie_result['webpage_url'],
1451                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1452                     'extractor_key': ie_result['extractor_key'],
1453                 })
1454                 return r
1455             ie_result['entries'] = [
1456                 self.process_ie_result(_fixup(r), download, extra_info)
1457                 for r in ie_result['entries']
1458             ]
1459             return ie_result
1460         else:
1461             raise Exception('Invalid result type: %s' % result_type)
1462
1463     def _ensure_dir_exists(self, path):
1464         return make_dir(path, self.report_error)
1465
1466     def __process_playlist(self, ie_result, download):
1467         # We process each entry in the playlist
1468         playlist = ie_result.get('title') or ie_result.get('id')
1469         self.to_screen('[download] Downloading playlist: %s' % playlist)
1470
1471         if 'entries' not in ie_result:
1472             raise EntryNotInPlaylist()
1473         incomplete_entries = bool(ie_result.get('requested_entries'))
1474         if incomplete_entries:
1475             def fill_missing_entries(entries, indexes):
1476                 ret = [None] * max(*indexes)
1477                 for i, entry in zip(indexes, entries):
1478                     ret[i - 1] = entry
1479                 return ret
1480             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1481
1482         playlist_results = []
1483
1484         playliststart = self.params.get('playliststart', 1)
1485         playlistend = self.params.get('playlistend')
1486         # For backwards compatibility, interpret -1 as whole list
1487         if playlistend == -1:
1488             playlistend = None
1489
1490         playlistitems_str = self.params.get('playlist_items')
1491         playlistitems = None
1492         if playlistitems_str is not None:
1493             def iter_playlistitems(format):
1494                 for string_segment in format.split(','):
1495                     if '-' in string_segment:
1496                         start, end = string_segment.split('-')
1497                         for item in range(int(start), int(end) + 1):
1498                             yield int(item)
1499                     else:
1500                         yield int(string_segment)
1501             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1502
1503         ie_entries = ie_result['entries']
1504         msg = (
1505             'Downloading %d videos' if not isinstance(ie_entries, list)
1506             else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
1507
1508         if isinstance(ie_entries, list):
1509             def get_entry(i):
1510                 return ie_entries[i - 1]
1511         else:
1512             if not isinstance(ie_entries, PagedList):
1513                 ie_entries = LazyList(ie_entries)
1514
1515             def get_entry(i):
1516                 return YoutubeDL.__handle_extraction_exceptions(
1517                     lambda self, i: ie_entries[i - 1]
1518                 )(self, i)
1519
1520         entries = []
1521         items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1522         for i in items:
1523             if i == 0:
1524                 continue
1525             if playlistitems is None and playlistend is not None and playlistend < i:
1526                 break
1527             entry = None
1528             try:
1529                 entry = get_entry(i)
1530                 if entry is None:
1531                     raise EntryNotInPlaylist()
1532             except (IndexError, EntryNotInPlaylist):
1533                 if incomplete_entries:
1534                     raise EntryNotInPlaylist()
1535                 elif not playlistitems:
1536                     break
1537             entries.append(entry)
1538             try:
1539                 if entry is not None:
1540                     self._match_entry(entry, incomplete=True, silent=True)
1541             except (ExistingVideoReached, RejectedVideoReached):
1542                 break
1543         ie_result['entries'] = entries
1544
1545         # Save playlist_index before re-ordering
1546         entries = [
1547             ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
1548             for i, entry in enumerate(entries, 1)
1549             if entry is not None]
1550         n_entries = len(entries)
1551
1552         if not playlistitems and (playliststart or playlistend):
1553             playlistitems = list(range(playliststart, playliststart + n_entries))
1554         ie_result['requested_entries'] = playlistitems
1555
1556         if self.params.get('allow_playlist_files', True):
1557             ie_copy = {
1558                 'playlist': playlist,
1559                 'playlist_id': ie_result.get('id'),
1560                 'playlist_title': ie_result.get('title'),
1561                 'playlist_uploader': ie_result.get('uploader'),
1562                 'playlist_uploader_id': ie_result.get('uploader_id'),
1563                 'playlist_index': 0,
1564             }
1565             ie_copy.update(dict(ie_result))
1566
1567             if self._write_info_json('playlist', ie_result,
1568                                      self.prepare_filename(ie_copy, 'pl_infojson')) is None:
1569                 return
1570             if self._write_description('playlist', ie_result,
1571                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1572                 return
1573             # TODO: This should be passed to ThumbnailsConvertor if necessary
1574             self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1575
1576         if self.params.get('playlistreverse', False):
1577             entries = entries[::-1]
1578         if self.params.get('playlistrandom', False):
1579             random.shuffle(entries)
1580
1581         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1582
1583         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1584         failures = 0
1585         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1586         for i, entry_tuple in enumerate(entries, 1):
1587             playlist_index, entry = entry_tuple
1588             if 'playlist-index' in self.params.get('compat_opts', []):
1589                 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
1590             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1591             # This __x_forwarded_for_ip thing is a bit ugly but requires
1592             # minimal changes
1593             if x_forwarded_for:
1594                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1595             extra = {
1596                 'n_entries': n_entries,
1597                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1598                 'playlist_index': playlist_index,
1599                 'playlist_autonumber': i,
1600                 'playlist': playlist,
1601                 'playlist_id': ie_result.get('id'),
1602                 'playlist_title': ie_result.get('title'),
1603                 'playlist_uploader': ie_result.get('uploader'),
1604                 'playlist_uploader_id': ie_result.get('uploader_id'),
1605                 'extractor': ie_result['extractor'],
1606                 'webpage_url': ie_result['webpage_url'],
1607                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1608                 'extractor_key': ie_result['extractor_key'],
1609             }
1610
1611             if self._match_entry(entry, incomplete=True) is not None:
1612                 continue
1613
1614             entry_result = self.__process_iterable_entry(entry, download, extra)
1615             if not entry_result:
1616                 failures += 1
1617             if failures >= max_failures:
1618                 self.report_error(
1619                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1620                 break
1621             # TODO: skip failed (empty) entries?
1622             playlist_results.append(entry_result)
1623         ie_result['entries'] = playlist_results
1624         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1625         return ie_result
1626
1627     @__handle_extraction_exceptions
1628     def __process_iterable_entry(self, entry, download, extra_info):
1629         return self.process_ie_result(
1630             entry, download=download, extra_info=extra_info)
1631
1632     def _build_format_filter(self, filter_spec):
1633         " Returns a function to filter the formats according to the filter_spec "
1634
1635         OPERATORS = {
1636             '<': operator.lt,
1637             '<=': operator.le,
1638             '>': operator.gt,
1639             '>=': operator.ge,
1640             '=': operator.eq,
1641             '!=': operator.ne,
1642         }
1643         operator_rex = re.compile(r'''(?x)\s*
1644             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1645             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1646             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1647             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1648         m = operator_rex.fullmatch(filter_spec)
1649         if m:
1650             try:
1651                 comparison_value = int(m.group('value'))
1652             except ValueError:
1653                 comparison_value = parse_filesize(m.group('value'))
1654                 if comparison_value is None:
1655                     comparison_value = parse_filesize(m.group('value') + 'B')
1656                 if comparison_value is None:
1657                     raise ValueError(
1658                         'Invalid value %r in format specification %r' % (
1659                             m.group('value'), filter_spec))
1660             op = OPERATORS[m.group('op')]
1661
1662         if not m:
1663             STR_OPERATORS = {
1664                 '=': operator.eq,
1665                 '^=': lambda attr, value: attr.startswith(value),
1666                 '$=': lambda attr, value: attr.endswith(value),
1667                 '*=': lambda attr, value: value in attr,
1668             }
1669             str_operator_rex = re.compile(r'''(?x)\s*
1670                 (?P<key>[a-zA-Z0-9._-]+)\s*
1671                 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1672                 (?P<value>[a-zA-Z0-9._-]+)\s*
1673                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1674             m = str_operator_rex.fullmatch(filter_spec)
1675             if m:
1676                 comparison_value = m.group('value')
1677                 str_op = STR_OPERATORS[m.group('op')]
1678                 if m.group('negation'):
1679                     op = lambda attr, value: not str_op(attr, value)
1680                 else:
1681                     op = str_op
1682
1683         if not m:
1684             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1685
1686         def _filter(f):
1687             actual_value = f.get(m.group('key'))
1688             if actual_value is None:
1689                 return m.group('none_inclusive')
1690             return op(actual_value, comparison_value)
1691         return _filter
1692
1693     def _default_format_spec(self, info_dict, download=True):
1694
1695         def can_merge():
1696             merger = FFmpegMergerPP(self)
1697             return merger.available and merger.can_merge()
1698
1699         prefer_best = (
1700             not self.params.get('simulate')
1701             and download
1702             and (
1703                 not can_merge()
1704                 or info_dict.get('is_live', False)
1705                 or self.outtmpl_dict['default'] == '-'))
1706         compat = (
1707             prefer_best
1708             or self.params.get('allow_multiple_audio_streams', False)
1709             or 'format-spec' in self.params.get('compat_opts', []))
1710
1711         return (
1712             'best/bestvideo+bestaudio' if prefer_best
1713             else 'bestvideo*+bestaudio/best' if not compat
1714             else 'bestvideo+bestaudio/best')
1715
1716     def build_format_selector(self, format_spec):
1717         def syntax_error(note, start):
1718             message = (
1719                 'Invalid format specification: '
1720                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1721             return SyntaxError(message)
1722
1723         PICKFIRST = 'PICKFIRST'
1724         MERGE = 'MERGE'
1725         SINGLE = 'SINGLE'
1726         GROUP = 'GROUP'
1727         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1728
1729         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1730                                   'video': self.params.get('allow_multiple_video_streams', False)}
1731
1732         check_formats = self.params.get('check_formats')
1733
1734         def _parse_filter(tokens):
1735             filter_parts = []
1736             for type, string, start, _, _ in tokens:
1737                 if type == tokenize.OP and string == ']':
1738                     return ''.join(filter_parts)
1739                 else:
1740                     filter_parts.append(string)
1741
1742         def _remove_unused_ops(tokens):
1743             # Remove operators that we don't use and join them with the surrounding strings
1744             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1745             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1746             last_string, last_start, last_end, last_line = None, None, None, None
1747             for type, string, start, end, line in tokens:
1748                 if type == tokenize.OP and string == '[':
1749                     if last_string:
1750                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1751                         last_string = None
1752                     yield type, string, start, end, line
1753                     # everything inside brackets will be handled by _parse_filter
1754                     for type, string, start, end, line in tokens:
1755                         yield type, string, start, end, line
1756                         if type == tokenize.OP and string == ']':
1757                             break
1758                 elif type == tokenize.OP and string in ALLOWED_OPS:
1759                     if last_string:
1760                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1761                         last_string = None
1762                     yield type, string, start, end, line
1763                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1764                     if not last_string:
1765                         last_string = string
1766                         last_start = start
1767                         last_end = end
1768                     else:
1769                         last_string += string
1770             if last_string:
1771                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1772
1773         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1774             selectors = []
1775             current_selector = None
1776             for type, string, start, _, _ in tokens:
1777                 # ENCODING is only defined in python 3.x
1778                 if type == getattr(tokenize, 'ENCODING', None):
1779                     continue
1780                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1781                     current_selector = FormatSelector(SINGLE, string, [])
1782                 elif type == tokenize.OP:
1783                     if string == ')':
1784                         if not inside_group:
1785                             # ')' will be handled by the parentheses group
1786                             tokens.restore_last_token()
1787                         break
1788                     elif inside_merge and string in ['/', ',']:
1789                         tokens.restore_last_token()
1790                         break
1791                     elif inside_choice and string == ',':
1792                         tokens.restore_last_token()
1793                         break
1794                     elif string == ',':
1795                         if not current_selector:
1796                             raise syntax_error('"," must follow a format selector', start)
1797                         selectors.append(current_selector)
1798                         current_selector = None
1799                     elif string == '/':
1800                         if not current_selector:
1801                             raise syntax_error('"/" must follow a format selector', start)
1802                         first_choice = current_selector
1803                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1804                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1805                     elif string == '[':
1806                         if not current_selector:
1807                             current_selector = FormatSelector(SINGLE, 'best', [])
1808                         format_filter = _parse_filter(tokens)
1809                         current_selector.filters.append(format_filter)
1810                     elif string == '(':
1811                         if current_selector:
1812                             raise syntax_error('Unexpected "("', start)
1813                         group = _parse_format_selection(tokens, inside_group=True)
1814                         current_selector = FormatSelector(GROUP, group, [])
1815                     elif string == '+':
1816                         if not current_selector:
1817                             raise syntax_error('Unexpected "+"', start)
1818                         selector_1 = current_selector
1819                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1820                         if not selector_2:
1821                             raise syntax_error('Expected a selector', start)
1822                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1823                     else:
1824                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1825                 elif type == tokenize.ENDMARKER:
1826                     break
1827             if current_selector:
1828                 selectors.append(current_selector)
1829             return selectors
1830
1831         def _merge(formats_pair):
1832             format_1, format_2 = formats_pair
1833
1834             formats_info = []
1835             formats_info.extend(format_1.get('requested_formats', (format_1,)))
1836             formats_info.extend(format_2.get('requested_formats', (format_2,)))
1837
1838             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1839                 get_no_more = {'video': False, 'audio': False}
1840                 for (i, fmt_info) in enumerate(formats_info):
1841                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
1842                         formats_info.pop(i)
1843                         continue
1844                     for aud_vid in ['audio', 'video']:
1845                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1846                             if get_no_more[aud_vid]:
1847                                 formats_info.pop(i)
1848                                 break
1849                             get_no_more[aud_vid] = True
1850
1851             if len(formats_info) == 1:
1852                 return formats_info[0]
1853
1854             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1855             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1856
1857             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1858             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1859
1860             output_ext = self.params.get('merge_output_format')
1861             if not output_ext:
1862                 if the_only_video:
1863                     output_ext = the_only_video['ext']
1864                 elif the_only_audio and not video_fmts:
1865                     output_ext = the_only_audio['ext']
1866                 else:
1867                     output_ext = 'mkv'
1868
1869             filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
1870
1871             new_dict = {
1872                 'requested_formats': formats_info,
1873                 'format': '+'.join(filtered('format')),
1874                 'format_id': '+'.join(filtered('format_id')),
1875                 'ext': output_ext,
1876                 'protocol': '+'.join(map(determine_protocol, formats_info)),
1877                 'language': '+'.join(orderedSet(filtered('language'))),
1878                 'format_note': '+'.join(orderedSet(filtered('format_note'))),
1879                 'filesize_approx': sum(filtered('filesize', 'filesize_approx')),
1880                 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
1881             }
1882
1883             if the_only_video:
1884                 new_dict.update({
1885                     'width': the_only_video.get('width'),
1886                     'height': the_only_video.get('height'),
1887                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1888                     'fps': the_only_video.get('fps'),
1889                     'vcodec': the_only_video.get('vcodec'),
1890                     'vbr': the_only_video.get('vbr'),
1891                     'stretched_ratio': the_only_video.get('stretched_ratio'),
1892                 })
1893
1894             if the_only_audio:
1895                 new_dict.update({
1896                     'acodec': the_only_audio.get('acodec'),
1897                     'abr': the_only_audio.get('abr'),
1898                     'asr': the_only_audio.get('asr'),
1899                 })
1900
1901             return new_dict
1902
1903         def _check_formats(formats):
1904             if not check_formats:
1905                 yield from formats
1906                 return
1907             for f in formats:
1908                 self.to_screen('[info] Testing format %s' % f['format_id'])
1909                 temp_file = tempfile.NamedTemporaryFile(
1910                     suffix='.tmp', delete=False,
1911                     dir=self.get_output_path('temp') or None)
1912                 temp_file.close()
1913                 try:
1914                     success, _ = self.dl(temp_file.name, f, test=True)
1915                 except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1916                     success = False
1917                 finally:
1918                     if os.path.exists(temp_file.name):
1919                         try:
1920                             os.remove(temp_file.name)
1921                         except OSError:
1922                             self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1923                 if success:
1924                     yield f
1925                 else:
1926                     self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1927
1928         def _build_selector_function(selector):
1929             if isinstance(selector, list):  # ,
1930                 fs = [_build_selector_function(s) for s in selector]
1931
1932                 def selector_function(ctx):
1933                     for f in fs:
1934                         yield from f(ctx)
1935                 return selector_function
1936
1937             elif selector.type == GROUP:  # ()
1938                 selector_function = _build_selector_function(selector.selector)
1939
1940             elif selector.type == PICKFIRST:  # /
1941                 fs = [_build_selector_function(s) for s in selector.selector]
1942
1943                 def selector_function(ctx):
1944                     for f in fs:
1945                         picked_formats = list(f(ctx))
1946                         if picked_formats:
1947                             return picked_formats
1948                     return []
1949
1950             elif selector.type == MERGE:  # +
1951                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1952
1953                 def selector_function(ctx):
1954                     for pair in itertools.product(
1955                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1956                         yield _merge(pair)
1957
1958             elif selector.type == SINGLE:  # atom
1959                 format_spec = selector.selector or 'best'
1960
1961                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
1962                 if format_spec == 'all':
1963                     def selector_function(ctx):
1964                         yield from _check_formats(ctx['formats'])
1965                 elif format_spec == 'mergeall':
1966                     def selector_function(ctx):
1967                         formats = list(_check_formats(ctx['formats']))
1968                         if not formats:
1969                             return
1970                         merged_format = formats[-1]
1971                         for f in formats[-2::-1]:
1972                             merged_format = _merge((merged_format, f))
1973                         yield merged_format
1974
1975                 else:
1976                     format_fallback, format_reverse, format_idx = False, True, 1
1977                     mobj = re.match(
1978                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
1979                         format_spec)
1980                     if mobj is not None:
1981                         format_idx = int_or_none(mobj.group('n'), default=1)
1982                         format_reverse = mobj.group('bw')[0] == 'b'
1983                         format_type = (mobj.group('type') or [None])[0]
1984                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
1985                         format_modified = mobj.group('mod') is not None
1986
1987                         format_fallback = not format_type and not format_modified  # for b, w
1988                         _filter_f = (
1989                             (lambda f: f.get('%scodec' % format_type) != 'none')
1990                             if format_type and format_modified  # bv*, ba*, wv*, wa*
1991                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
1992                             if format_type  # bv, ba, wv, wa
1993                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1994                             if not format_modified  # b, w
1995                             else lambda f: True)  # b*, w*
1996                         filter_f = lambda f: _filter_f(f) and (
1997                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
1998                     else:
1999                         if format_spec in self._format_selection_exts['audio']:
2000                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2001                         elif format_spec in self._format_selection_exts['video']:
2002                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2003                         elif format_spec in self._format_selection_exts['storyboards']:
2004                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2005                         else:
2006                             filter_f = lambda f: f.get('format_id') == format_spec  # id
2007
2008                     def selector_function(ctx):
2009                         formats = list(ctx['formats'])
2010                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2011                         if format_fallback and ctx['incomplete_formats'] and not matches:
2012                             # for extractors with incomplete formats (audio only (soundcloud)
2013                             # or video only (imgur)) best/worst will fallback to
2014                             # best/worst {video,audio}-only format
2015                             matches = formats
2016                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2017                         try:
2018                             yield matches[format_idx - 1]
2019                         except IndexError:
2020                             return
2021
2022             filters = [self._build_format_filter(f) for f in selector.filters]
2023
2024             def final_selector(ctx):
2025                 ctx_copy = copy.deepcopy(ctx)
2026                 for _filter in filters:
2027                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2028                 return selector_function(ctx_copy)
2029             return final_selector
2030
2031         stream = io.BytesIO(format_spec.encode('utf-8'))
2032         try:
2033             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
2034         except tokenize.TokenError:
2035             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2036
2037         class TokenIterator(object):
2038             def __init__(self, tokens):
2039                 self.tokens = tokens
2040                 self.counter = 0
2041
2042             def __iter__(self):
2043                 return self
2044
2045             def __next__(self):
2046                 if self.counter >= len(self.tokens):
2047                     raise StopIteration()
2048                 value = self.tokens[self.counter]
2049                 self.counter += 1
2050                 return value
2051
2052             next = __next__
2053
2054             def restore_last_token(self):
2055                 self.counter -= 1
2056
2057         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2058         return _build_selector_function(parsed_selector)
2059
2060     def _calc_headers(self, info_dict):
2061         res = std_headers.copy()
2062
2063         add_headers = info_dict.get('http_headers')
2064         if add_headers:
2065             res.update(add_headers)
2066
2067         cookies = self._calc_cookies(info_dict)
2068         if cookies:
2069             res['Cookie'] = cookies
2070
2071         if 'X-Forwarded-For' not in res:
2072             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2073             if x_forwarded_for_ip:
2074                 res['X-Forwarded-For'] = x_forwarded_for_ip
2075
2076         return res
2077
2078     def _calc_cookies(self, info_dict):
2079         pr = sanitized_Request(info_dict['url'])
2080         self.cookiejar.add_cookie_header(pr)
2081         return pr.get_header('Cookie')
2082
2083     def _sanitize_thumbnails(self, info_dict):
2084         thumbnails = info_dict.get('thumbnails')
2085         if thumbnails is None:
2086             thumbnail = info_dict.get('thumbnail')
2087             if thumbnail:
2088                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2089         if thumbnails:
2090             thumbnails.sort(key=lambda t: (
2091                 t.get('preference') if t.get('preference') is not None else -1,
2092                 t.get('width') if t.get('width') is not None else -1,
2093                 t.get('height') if t.get('height') is not None else -1,
2094                 t.get('id') if t.get('id') is not None else '',
2095                 t.get('url')))
2096
2097             def thumbnail_tester():
2098                 if self.params.get('check_formats'):
2099                     test_all = True
2100                     to_screen = lambda msg: self.to_screen(f'[info] {msg}')
2101                 else:
2102                     test_all = False
2103                     to_screen = self.write_debug
2104
2105                 def test_thumbnail(t):
2106                     if not test_all and not t.get('_test_url'):
2107                         return True
2108                     to_screen('Testing thumbnail %s' % t['id'])
2109                     try:
2110                         self.urlopen(HEADRequest(t['url']))
2111                     except network_exceptions as err:
2112                         to_screen('Unable to connect to thumbnail %s URL "%s" - %s. Skipping...' % (
2113                             t['id'], t['url'], error_to_compat_str(err)))
2114                         return False
2115                     return True
2116
2117                 return test_thumbnail
2118
2119             for i, t in enumerate(thumbnails):
2120                 if t.get('id') is None:
2121                     t['id'] = '%d' % i
2122                 if t.get('width') and t.get('height'):
2123                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
2124                 t['url'] = sanitize_url(t['url'])
2125
2126             if self.params.get('check_formats') is not False:
2127                 info_dict['thumbnails'] = LazyList(filter(thumbnail_tester(), thumbnails[::-1])).reverse()
2128             else:
2129                 info_dict['thumbnails'] = thumbnails
2130
2131     def process_video_result(self, info_dict, download=True):
2132         assert info_dict.get('_type', 'video') == 'video'
2133
2134         if 'id' not in info_dict:
2135             raise ExtractorError('Missing "id" field in extractor result')
2136         if 'title' not in info_dict:
2137             raise ExtractorError('Missing "title" field in extractor result',
2138                                  video_id=info_dict['id'], ie=info_dict['extractor'])
2139
2140         def report_force_conversion(field, field_not, conversion):
2141             self.report_warning(
2142                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2143                 % (field, field_not, conversion))
2144
2145         def sanitize_string_field(info, string_field):
2146             field = info.get(string_field)
2147             if field is None or isinstance(field, compat_str):
2148                 return
2149             report_force_conversion(string_field, 'a string', 'string')
2150             info[string_field] = compat_str(field)
2151
2152         def sanitize_numeric_fields(info):
2153             for numeric_field in self._NUMERIC_FIELDS:
2154                 field = info.get(numeric_field)
2155                 if field is None or isinstance(field, compat_numeric_types):
2156                     continue
2157                 report_force_conversion(numeric_field, 'numeric', 'int')
2158                 info[numeric_field] = int_or_none(field)
2159
2160         sanitize_string_field(info_dict, 'id')
2161         sanitize_numeric_fields(info_dict)
2162
2163         if 'playlist' not in info_dict:
2164             # It isn't part of a playlist
2165             info_dict['playlist'] = None
2166             info_dict['playlist_index'] = None
2167
2168         self._sanitize_thumbnails(info_dict)
2169
2170         thumbnail = info_dict.get('thumbnail')
2171         thumbnails = info_dict.get('thumbnails')
2172         if thumbnail:
2173             info_dict['thumbnail'] = sanitize_url(thumbnail)
2174         elif thumbnails:
2175             info_dict['thumbnail'] = thumbnails[-1]['url']
2176
2177         if info_dict.get('display_id') is None and 'id' in info_dict:
2178             info_dict['display_id'] = info_dict['id']
2179
2180         if info_dict.get('duration') is not None:
2181             info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2182
2183         for ts_key, date_key in (
2184                 ('timestamp', 'upload_date'),
2185                 ('release_timestamp', 'release_date'),
2186         ):
2187             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2188                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2189                 # see http://bugs.python.org/issue1646728)
2190                 try:
2191                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2192                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2193                 except (ValueError, OverflowError, OSError):
2194                     pass
2195
2196         live_keys = ('is_live', 'was_live')
2197         live_status = info_dict.get('live_status')
2198         if live_status is None:
2199             for key in live_keys:
2200                 if info_dict.get(key) is False:
2201                     continue
2202                 if info_dict.get(key):
2203                     live_status = key
2204                 break
2205             if all(info_dict.get(key) is False for key in live_keys):
2206                 live_status = 'not_live'
2207         if live_status:
2208             info_dict['live_status'] = live_status
2209             for key in live_keys:
2210                 if info_dict.get(key) is None:
2211                     info_dict[key] = (live_status == key)
2212
2213         # Auto generate title fields corresponding to the *_number fields when missing
2214         # in order to always have clean titles. This is very common for TV series.
2215         for field in ('chapter', 'season', 'episode'):
2216             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2217                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2218
2219         for cc_kind in ('subtitles', 'automatic_captions'):
2220             cc = info_dict.get(cc_kind)
2221             if cc:
2222                 for _, subtitle in cc.items():
2223                     for subtitle_format in subtitle:
2224                         if subtitle_format.get('url'):
2225                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2226                         if subtitle_format.get('ext') is None:
2227                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2228
2229         automatic_captions = info_dict.get('automatic_captions')
2230         subtitles = info_dict.get('subtitles')
2231
2232         info_dict['requested_subtitles'] = self.process_subtitles(
2233             info_dict['id'], subtitles, automatic_captions)
2234
2235         # We now pick which formats have to be downloaded
2236         if info_dict.get('formats') is None:
2237             # There's only one format available
2238             formats = [info_dict]
2239         else:
2240             formats = info_dict['formats']
2241
2242         info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
2243         if not self.params.get('allow_unplayable_formats'):
2244             formats = [f for f in formats if not f.get('has_drm')]
2245
2246         if not formats:
2247             self.raise_no_formats(info_dict)
2248
2249         def is_wellformed(f):
2250             url = f.get('url')
2251             if not url:
2252                 self.report_warning(
2253                     '"url" field is missing or empty - skipping format, '
2254                     'there is an error in extractor')
2255                 return False
2256             if isinstance(url, bytes):
2257                 sanitize_string_field(f, 'url')
2258             return True
2259
2260         # Filter out malformed formats for better extraction robustness
2261         formats = list(filter(is_wellformed, formats))
2262
2263         formats_dict = {}
2264
2265         # We check that all the formats have the format and format_id fields
2266         for i, format in enumerate(formats):
2267             sanitize_string_field(format, 'format_id')
2268             sanitize_numeric_fields(format)
2269             format['url'] = sanitize_url(format['url'])
2270             if not format.get('format_id'):
2271                 format['format_id'] = compat_str(i)
2272             else:
2273                 # Sanitize format_id from characters used in format selector expression
2274                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2275             format_id = format['format_id']
2276             if format_id not in formats_dict:
2277                 formats_dict[format_id] = []
2278             formats_dict[format_id].append(format)
2279
2280         # Make sure all formats have unique format_id
2281         common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2282         for format_id, ambiguous_formats in formats_dict.items():
2283             ambigious_id = len(ambiguous_formats) > 1
2284             for i, format in enumerate(ambiguous_formats):
2285                 if ambigious_id:
2286                     format['format_id'] = '%s-%d' % (format_id, i)
2287                 if format.get('ext') is None:
2288                     format['ext'] = determine_ext(format['url']).lower()
2289                 # Ensure there is no conflict between id and ext in format selection
2290                 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2291                 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2292                     format['format_id'] = 'f%s' % format['format_id']
2293
2294         for i, format in enumerate(formats):
2295             if format.get('format') is None:
2296                 format['format'] = '{id} - {res}{note}'.format(
2297                     id=format['format_id'],
2298                     res=self.format_resolution(format),
2299                     note=format_field(format, 'format_note', ' (%s)'),
2300                 )
2301             if format.get('protocol') is None:
2302                 format['protocol'] = determine_protocol(format)
2303             if format.get('resolution') is None:
2304                 format['resolution'] = self.format_resolution(format, default=None)
2305             # Add HTTP headers, so that external programs can use them from the
2306             # json output
2307             full_format_info = info_dict.copy()
2308             full_format_info.update(format)
2309             format['http_headers'] = self._calc_headers(full_format_info)
2310         # Remove private housekeeping stuff
2311         if '__x_forwarded_for_ip' in info_dict:
2312             del info_dict['__x_forwarded_for_ip']
2313
2314         # TODO Central sorting goes here
2315
2316         if not formats or formats[0] is not info_dict:
2317             # only set the 'formats' fields if the original info_dict list them
2318             # otherwise we end up with a circular reference, the first (and unique)
2319             # element in the 'formats' field in info_dict is info_dict itself,
2320             # which can't be exported to json
2321             info_dict['formats'] = formats
2322
2323         info_dict, _ = self.pre_process(info_dict)
2324
2325         if self.params.get('list_thumbnails'):
2326             self.list_thumbnails(info_dict)
2327         if self.params.get('listformats'):
2328             if not info_dict.get('formats') and not info_dict.get('url'):
2329                 self.to_screen('%s has no formats' % info_dict['id'])
2330             else:
2331                 self.list_formats(info_dict)
2332         if self.params.get('listsubtitles'):
2333             if 'automatic_captions' in info_dict:
2334                 self.list_subtitles(
2335                     info_dict['id'], automatic_captions, 'automatic captions')
2336             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2337         list_only = self.params.get('simulate') is None and (
2338             self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2339         if list_only:
2340             # Without this printing, -F --print-json will not work
2341             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2342             return
2343
2344         format_selector = self.format_selector
2345         if format_selector is None:
2346             req_format = self._default_format_spec(info_dict, download=download)
2347             self.write_debug('Default format spec: %s' % req_format)
2348             format_selector = self.build_format_selector(req_format)
2349
2350         # While in format selection we may need to have an access to the original
2351         # format set in order to calculate some metrics or do some processing.
2352         # For now we need to be able to guess whether original formats provided
2353         # by extractor are incomplete or not (i.e. whether extractor provides only
2354         # video-only or audio-only formats) for proper formats selection for
2355         # extractors with such incomplete formats (see
2356         # https://github.com/ytdl-org/youtube-dl/pull/5556).
2357         # Since formats may be filtered during format selection and may not match
2358         # the original formats the results may be incorrect. Thus original formats
2359         # or pre-calculated metrics should be passed to format selection routines
2360         # as well.
2361         # We will pass a context object containing all necessary additional data
2362         # instead of just formats.
2363         # This fixes incorrect format selection issue (see
2364         # https://github.com/ytdl-org/youtube-dl/issues/10083).
2365         incomplete_formats = (
2366             # All formats are video-only or
2367             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2368             # all formats are audio-only
2369             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2370
2371         ctx = {
2372             'formats': formats,
2373             'incomplete_formats': incomplete_formats,
2374         }
2375
2376         formats_to_download = list(format_selector(ctx))
2377         if not formats_to_download:
2378             if not self.params.get('ignore_no_formats_error'):
2379                 raise ExtractorError('Requested format is not available', expected=True,
2380                                      video_id=info_dict['id'], ie=info_dict['extractor'])
2381             else:
2382                 self.report_warning('Requested format is not available')
2383                 # Process what we can, even without any available formats.
2384                 self.process_info(dict(info_dict))
2385         elif download:
2386             self.to_screen(
2387                 '[info] %s: Downloading %d format(s): %s' % (
2388                     info_dict['id'], len(formats_to_download),
2389                     ", ".join([f['format_id'] for f in formats_to_download])))
2390             for fmt in formats_to_download:
2391                 new_info = dict(info_dict)
2392                 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2393                 new_info['__original_infodict'] = info_dict
2394                 new_info.update(fmt)
2395                 self.process_info(new_info)
2396         # We update the info dict with the best quality format (backwards compatibility)
2397         if formats_to_download:
2398             info_dict.update(formats_to_download[-1])
2399         return info_dict
2400
2401     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2402         """Select the requested subtitles and their format"""
2403         available_subs = {}
2404         if normal_subtitles and self.params.get('writesubtitles'):
2405             available_subs.update(normal_subtitles)
2406         if automatic_captions and self.params.get('writeautomaticsub'):
2407             for lang, cap_info in automatic_captions.items():
2408                 if lang not in available_subs:
2409                     available_subs[lang] = cap_info
2410
2411         if (not self.params.get('writesubtitles') and not
2412                 self.params.get('writeautomaticsub') or not
2413                 available_subs):
2414             return None
2415
2416         all_sub_langs = available_subs.keys()
2417         if self.params.get('allsubtitles', False):
2418             requested_langs = all_sub_langs
2419         elif self.params.get('subtitleslangs', False):
2420             # A list is used so that the order of languages will be the same as
2421             # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2422             requested_langs = []
2423             for lang_re in self.params.get('subtitleslangs'):
2424                 if lang_re == 'all':
2425                     requested_langs.extend(all_sub_langs)
2426                     continue
2427                 discard = lang_re[0] == '-'
2428                 if discard:
2429                     lang_re = lang_re[1:]
2430                 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2431                 if discard:
2432                     for lang in current_langs:
2433                         while lang in requested_langs:
2434                             requested_langs.remove(lang)
2435                 else:
2436                     requested_langs.extend(current_langs)
2437             requested_langs = orderedSet(requested_langs)
2438         elif 'en' in available_subs:
2439             requested_langs = ['en']
2440         else:
2441             requested_langs = [list(all_sub_langs)[0]]
2442         if requested_langs:
2443             self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2444
2445         formats_query = self.params.get('subtitlesformat', 'best')
2446         formats_preference = formats_query.split('/') if formats_query else []
2447         subs = {}
2448         for lang in requested_langs:
2449             formats = available_subs.get(lang)
2450             if formats is None:
2451                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2452                 continue
2453             for ext in formats_preference:
2454                 if ext == 'best':
2455                     f = formats[-1]
2456                     break
2457                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2458                 if matches:
2459                     f = matches[-1]
2460                     break
2461             else:
2462                 f = formats[-1]
2463                 self.report_warning(
2464                     'No subtitle format found matching "%s" for language %s, '
2465                     'using %s' % (formats_query, lang, f['ext']))
2466             subs[lang] = f
2467         return subs
2468
2469     def __forced_printings(self, info_dict, filename, incomplete):
2470         def print_mandatory(field, actual_field=None):
2471             if actual_field is None:
2472                 actual_field = field
2473             if (self.params.get('force%s' % field, False)
2474                     and (not incomplete or info_dict.get(actual_field) is not None)):
2475                 self.to_stdout(info_dict[actual_field])
2476
2477         def print_optional(field):
2478             if (self.params.get('force%s' % field, False)
2479                     and info_dict.get(field) is not None):
2480                 self.to_stdout(info_dict[field])
2481
2482         info_dict = info_dict.copy()
2483         if filename is not None:
2484             info_dict['filename'] = filename
2485         if info_dict.get('requested_formats') is not None:
2486             # For RTMP URLs, also include the playpath
2487             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2488         elif 'url' in info_dict:
2489             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2490
2491         if self.params.get('forceprint') or self.params.get('forcejson'):
2492             self.post_extract(info_dict)
2493         for tmpl in self.params.get('forceprint', []):
2494             mobj = re.match(r'\w+(=?)$', tmpl)
2495             if mobj and mobj.group(1):
2496                 tmpl = f'{tmpl[:-1]} = %({tmpl[:-1]})s'
2497             elif mobj:
2498                 tmpl = '%({})s'.format(tmpl)
2499             self.to_stdout(self.evaluate_outtmpl(tmpl, info_dict))
2500
2501         print_mandatory('title')
2502         print_mandatory('id')
2503         print_mandatory('url', 'urls')
2504         print_optional('thumbnail')
2505         print_optional('description')
2506         print_optional('filename')
2507         if self.params.get('forceduration') and info_dict.get('duration') is not None:
2508             self.to_stdout(formatSeconds(info_dict['duration']))
2509         print_mandatory('format')
2510
2511         if self.params.get('forcejson'):
2512             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2513
2514     def dl(self, name, info, subtitle=False, test=False):
2515         if not info.get('url'):
2516             self.raise_no_formats(info, True)
2517
2518         if test:
2519             verbose = self.params.get('verbose')
2520             params = {
2521                 'test': True,
2522                 'quiet': self.params.get('quiet') or not verbose,
2523                 'verbose': verbose,
2524                 'noprogress': not verbose,
2525                 'nopart': True,
2526                 'skip_unavailable_fragments': False,
2527                 'keep_fragments': False,
2528                 'overwrites': True,
2529                 '_no_ytdl_file': True,
2530             }
2531         else:
2532             params = self.params
2533         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2534         if not test:
2535             for ph in self._progress_hooks:
2536                 fd.add_progress_hook(ph)
2537             urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2538             self.write_debug('Invoking downloader on "%s"' % urls)
2539
2540         new_info = copy.deepcopy(self._copy_infodict(info))
2541         if new_info.get('http_headers') is None:
2542             new_info['http_headers'] = self._calc_headers(new_info)
2543         return fd.download(name, new_info, subtitle)
2544
2545     def process_info(self, info_dict):
2546         """Process a single resolved IE result."""
2547
2548         assert info_dict.get('_type', 'video') == 'video'
2549
2550         max_downloads = self.params.get('max_downloads')
2551         if max_downloads is not None:
2552             if self._num_downloads >= int(max_downloads):
2553                 raise MaxDownloadsReached()
2554
2555         # TODO: backward compatibility, to be removed
2556         info_dict['fulltitle'] = info_dict['title']
2557
2558         if 'format' not in info_dict and 'ext' in info_dict:
2559             info_dict['format'] = info_dict['ext']
2560
2561         if self._match_entry(info_dict) is not None:
2562             return
2563
2564         self.post_extract(info_dict)
2565         self._num_downloads += 1
2566
2567         # info_dict['_filename'] needs to be set for backward compatibility
2568         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2569         temp_filename = self.prepare_filename(info_dict, 'temp')
2570         files_to_move = {}
2571
2572         # Forced printings
2573         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2574
2575         if self.params.get('simulate'):
2576             if self.params.get('force_write_download_archive', False):
2577                 self.record_download_archive(info_dict)
2578             # Do nothing else if in simulate mode
2579             return
2580
2581         if full_filename is None:
2582             return
2583         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2584             return
2585         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2586             return
2587
2588         if self._write_description('video', info_dict,
2589                                    self.prepare_filename(info_dict, 'description')) is None:
2590             return
2591
2592         sub_files = self._write_subtitles(info_dict, temp_filename)
2593         if sub_files is None:
2594             return
2595         files_to_move.update(dict(sub_files))
2596
2597         thumb_files = self._write_thumbnails(
2598             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2599         if thumb_files is None:
2600             return
2601         files_to_move.update(dict(thumb_files))
2602
2603         infofn = self.prepare_filename(info_dict, 'infojson')
2604         _infojson_written = self._write_info_json('video', info_dict, infofn)
2605         if _infojson_written:
2606             info_dict['__infojson_filename'] = infofn
2607         elif _infojson_written is None:
2608             return
2609
2610         # Note: Annotations are deprecated
2611         annofn = None
2612         if self.params.get('writeannotations', False):
2613             annofn = self.prepare_filename(info_dict, 'annotation')
2614         if annofn:
2615             if not self._ensure_dir_exists(encodeFilename(annofn)):
2616                 return
2617             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2618                 self.to_screen('[info] Video annotations are already present')
2619             elif not info_dict.get('annotations'):
2620                 self.report_warning('There are no annotations to write.')
2621             else:
2622                 try:
2623                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2624                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2625                         annofile.write(info_dict['annotations'])
2626                 except (KeyError, TypeError):
2627                     self.report_warning('There are no annotations to write.')
2628                 except (OSError, IOError):
2629                     self.report_error('Cannot write annotations file: ' + annofn)
2630                     return
2631
2632         # Write internet shortcut files
2633         url_link = webloc_link = desktop_link = False
2634         if self.params.get('writelink', False):
2635             if sys.platform == "darwin":  # macOS.
2636                 webloc_link = True
2637             elif sys.platform.startswith("linux"):
2638                 desktop_link = True
2639             else:  # if sys.platform in ['win32', 'cygwin']:
2640                 url_link = True
2641         if self.params.get('writeurllink', False):
2642             url_link = True
2643         if self.params.get('writewebloclink', False):
2644             webloc_link = True
2645         if self.params.get('writedesktoplink', False):
2646             desktop_link = True
2647
2648         if url_link or webloc_link or desktop_link:
2649             if 'webpage_url' not in info_dict:
2650                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2651                 return
2652             ascii_url = iri_to_uri(info_dict['webpage_url'])
2653
2654         def _write_link_file(extension, template, newline, embed_filename):
2655             linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
2656             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2657                 self.to_screen('[info] Internet shortcut is already present')
2658             else:
2659                 try:
2660                     self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2661                     with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2662                         template_vars = {'url': ascii_url}
2663                         if embed_filename:
2664                             template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2665                         linkfile.write(template % template_vars)
2666                 except (OSError, IOError):
2667                     self.report_error('Cannot write internet shortcut ' + linkfn)
2668                     return False
2669             return True
2670
2671         if url_link:
2672             if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2673                 return
2674         if webloc_link:
2675             if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2676                 return
2677         if desktop_link:
2678             if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2679                 return
2680
2681         try:
2682             info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2683         except PostProcessingError as err:
2684             self.report_error('Preprocessing: %s' % str(err))
2685             return
2686
2687         must_record_download_archive = False
2688         if self.params.get('skip_download', False):
2689             info_dict['filepath'] = temp_filename
2690             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2691             info_dict['__files_to_move'] = files_to_move
2692             info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2693         else:
2694             # Download
2695             info_dict.setdefault('__postprocessors', [])
2696             try:
2697
2698                 def existing_file(*filepaths):
2699                     ext = info_dict.get('ext')
2700                     final_ext = self.params.get('final_ext', ext)
2701                     existing_files = []
2702                     for file in orderedSet(filepaths):
2703                         if final_ext != ext:
2704                             converted = replace_extension(file, final_ext, ext)
2705                             if os.path.exists(encodeFilename(converted)):
2706                                 existing_files.append(converted)
2707                         if os.path.exists(encodeFilename(file)):
2708                             existing_files.append(file)
2709
2710                     if not existing_files or self.params.get('overwrites', False):
2711                         for file in orderedSet(existing_files):
2712                             self.report_file_delete(file)
2713                             os.remove(encodeFilename(file))
2714                         return None
2715
2716                     info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2717                     return existing_files[0]
2718
2719                 success = True
2720                 if info_dict.get('requested_formats') is not None:
2721
2722                     def compatible_formats(formats):
2723                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2724                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2725                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2726                         if len(video_formats) > 2 or len(audio_formats) > 2:
2727                             return False
2728
2729                         # Check extension
2730                         exts = set(format.get('ext') for format in formats)
2731                         COMPATIBLE_EXTS = (
2732                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2733                             set(('webm',)),
2734                         )
2735                         for ext_sets in COMPATIBLE_EXTS:
2736                             if ext_sets.issuperset(exts):
2737                                 return True
2738                         # TODO: Check acodec/vcodec
2739                         return False
2740
2741                     requested_formats = info_dict['requested_formats']
2742                     old_ext = info_dict['ext']
2743                     if self.params.get('merge_output_format') is None:
2744                         if not compatible_formats(requested_formats):
2745                             info_dict['ext'] = 'mkv'
2746                             self.report_warning(
2747                                 'Requested formats are incompatible for merge and will be merged into mkv')
2748                         if (info_dict['ext'] == 'webm'
2749                                 and info_dict.get('thumbnails')
2750                                 # check with type instead of pp_key, __name__, or isinstance
2751                                 # since we dont want any custom PPs to trigger this
2752                                 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
2753                             info_dict['ext'] = 'mkv'
2754                             self.report_warning(
2755                                 'webm doesn\'t support embedding a thumbnail, mkv will be used')
2756                     new_ext = info_dict['ext']
2757
2758                     def correct_ext(filename, ext=new_ext):
2759                         if filename == '-':
2760                             return filename
2761                         filename_real_ext = os.path.splitext(filename)[1][1:]
2762                         filename_wo_ext = (
2763                             os.path.splitext(filename)[0]
2764                             if filename_real_ext in (old_ext, new_ext)
2765                             else filename)
2766                         return '%s.%s' % (filename_wo_ext, ext)
2767
2768                     # Ensure filename always has a correct extension for successful merge
2769                     full_filename = correct_ext(full_filename)
2770                     temp_filename = correct_ext(temp_filename)
2771                     dl_filename = existing_file(full_filename, temp_filename)
2772                     info_dict['__real_download'] = False
2773
2774                     if dl_filename is not None:
2775                         self.report_file_already_downloaded(dl_filename)
2776                     elif get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-'):
2777                         info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
2778                         success, real_download = self.dl(temp_filename, info_dict)
2779                         info_dict['__real_download'] = real_download
2780                     else:
2781                         downloaded = []
2782                         merger = FFmpegMergerPP(self)
2783                         if self.params.get('allow_unplayable_formats'):
2784                             self.report_warning(
2785                                 'You have requested merging of multiple formats '
2786                                 'while also allowing unplayable formats to be downloaded. '
2787                                 'The formats won\'t be merged to prevent data corruption.')
2788                         elif not merger.available:
2789                             self.report_warning(
2790                                 'You have requested merging of multiple formats but ffmpeg is not installed. '
2791                                 'The formats won\'t be merged.')
2792
2793                         if temp_filename == '-':
2794                             reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict)
2795                                       else 'but the formats are incompatible for simultaneous download' if merger.available
2796                                       else 'but ffmpeg is not installed')
2797                             self.report_warning(
2798                                 f'You have requested downloading multiple formats to stdout {reason}. '
2799                                 'The formats will be streamed one after the other')
2800                             fname = temp_filename
2801                         for f in requested_formats:
2802                             new_info = dict(info_dict)
2803                             del new_info['requested_formats']
2804                             new_info.update(f)
2805                             if temp_filename != '-':
2806                                 fname = prepend_extension(
2807                                     correct_ext(temp_filename, new_info['ext']),
2808                                     'f%s' % f['format_id'], new_info['ext'])
2809                                 if not self._ensure_dir_exists(fname):
2810                                     return
2811                                 f['filepath'] = fname
2812                                 downloaded.append(fname)
2813                             partial_success, real_download = self.dl(fname, new_info)
2814                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
2815                             success = success and partial_success
2816                         if merger.available and not self.params.get('allow_unplayable_formats'):
2817                             info_dict['__postprocessors'].append(merger)
2818                             info_dict['__files_to_merge'] = downloaded
2819                             # Even if there were no downloads, it is being merged only now
2820                             info_dict['__real_download'] = True
2821                         else:
2822                             for file in downloaded:
2823                                 files_to_move[file] = None
2824                 else:
2825                     # Just a single file
2826                     dl_filename = existing_file(full_filename, temp_filename)
2827                     if dl_filename is None or dl_filename == temp_filename:
2828                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
2829                         # So we should try to resume the download
2830                         success, real_download = self.dl(temp_filename, info_dict)
2831                         info_dict['__real_download'] = real_download
2832                     else:
2833                         self.report_file_already_downloaded(dl_filename)
2834
2835                 dl_filename = dl_filename or temp_filename
2836                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2837
2838             except network_exceptions as err:
2839                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2840                 return
2841             except (OSError, IOError) as err:
2842                 raise UnavailableVideoError(err)
2843             except (ContentTooShortError, ) as err:
2844                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2845                 return
2846
2847             if success and full_filename != '-':
2848
2849                 def fixup():
2850                     do_fixup = True
2851                     fixup_policy = self.params.get('fixup')
2852                     vid = info_dict['id']
2853
2854                     if fixup_policy in ('ignore', 'never'):
2855                         return
2856                     elif fixup_policy == 'warn':
2857                         do_fixup = False
2858                     elif fixup_policy != 'force':
2859                         assert fixup_policy in ('detect_or_warn', None)
2860                         if not info_dict.get('__real_download'):
2861                             do_fixup = False
2862
2863                     def ffmpeg_fixup(cndn, msg, cls):
2864                         if not cndn:
2865                             return
2866                         if not do_fixup:
2867                             self.report_warning(f'{vid}: {msg}')
2868                             return
2869                         pp = cls(self)
2870                         if pp.available:
2871                             info_dict['__postprocessors'].append(pp)
2872                         else:
2873                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
2874
2875                     stretched_ratio = info_dict.get('stretched_ratio')
2876                     ffmpeg_fixup(
2877                         stretched_ratio not in (1, None),
2878                         f'Non-uniform pixel ratio {stretched_ratio}',
2879                         FFmpegFixupStretchedPP)
2880
2881                     ffmpeg_fixup(
2882                         (info_dict.get('requested_formats') is None
2883                          and info_dict.get('container') == 'm4a_dash'
2884                          and info_dict.get('ext') == 'm4a'),
2885                         'writing DASH m4a. Only some players support this container',
2886                         FFmpegFixupM4aPP)
2887
2888                     downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
2889                     downloader = downloader.__name__ if downloader else None
2890                     ffmpeg_fixup(info_dict.get('requested_formats') is None and downloader == 'HlsFD',
2891                                  'malformed AAC bitstream detected', FFmpegFixupM3u8PP)
2892                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP)
2893                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed duration detected', FFmpegFixupDurationPP)
2894
2895                 fixup()
2896                 try:
2897                     info_dict = self.post_process(dl_filename, info_dict, files_to_move)
2898                 except PostProcessingError as err:
2899                     self.report_error('Postprocessing: %s' % str(err))
2900                     return
2901                 try:
2902                     for ph in self._post_hooks:
2903                         ph(info_dict['filepath'])
2904                 except Exception as err:
2905                     self.report_error('post hooks: %s' % str(err))
2906                     return
2907                 must_record_download_archive = True
2908
2909         if must_record_download_archive or self.params.get('force_write_download_archive', False):
2910             self.record_download_archive(info_dict)
2911         max_downloads = self.params.get('max_downloads')
2912         if max_downloads is not None and self._num_downloads >= int(max_downloads):
2913             raise MaxDownloadsReached()
2914
2915     def download(self, url_list):
2916         """Download a given list of URLs."""
2917         outtmpl = self.outtmpl_dict['default']
2918         if (len(url_list) > 1
2919                 and outtmpl != '-'
2920                 and '%' not in outtmpl
2921                 and self.params.get('max_downloads') != 1):
2922             raise SameFileError(outtmpl)
2923
2924         for url in url_list:
2925             try:
2926                 # It also downloads the videos
2927                 res = self.extract_info(
2928                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2929             except UnavailableVideoError:
2930                 self.report_error('unable to download video')
2931             except MaxDownloadsReached:
2932                 self.to_screen('[info] Maximum number of downloads reached')
2933                 raise
2934             except ExistingVideoReached:
2935                 self.to_screen('[info] Encountered a video that is already in the archive, stopping due to --break-on-existing')
2936                 raise
2937             except RejectedVideoReached:
2938                 self.to_screen('[info] Encountered a video that did not match filter, stopping due to --break-on-reject')
2939                 raise
2940             else:
2941                 if self.params.get('dump_single_json', False):
2942                     self.post_extract(res)
2943                     self.to_stdout(json.dumps(self.sanitize_info(res)))
2944
2945         return self._download_retcode
2946
2947     def download_with_info_file(self, info_filename):
2948         with contextlib.closing(fileinput.FileInput(
2949                 [info_filename], mode='r',
2950                 openhook=fileinput.hook_encoded('utf-8'))) as f:
2951             # FileInput doesn't have a read method, we can't call json.load
2952             info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
2953         try:
2954             self.process_ie_result(info, download=True)
2955         except (DownloadError, EntryNotInPlaylist, ThrottledDownload):
2956             webpage_url = info.get('webpage_url')
2957             if webpage_url is not None:
2958                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2959                 return self.download([webpage_url])
2960             else:
2961                 raise
2962         return self._download_retcode
2963
2964     @staticmethod
2965     def sanitize_info(info_dict, remove_private_keys=False):
2966         ''' Sanitize the infodict for converting to json '''
2967         if info_dict is None:
2968             return info_dict
2969         info_dict.setdefault('epoch', int(time.time()))
2970         remove_keys = {'__original_infodict'}  # Always remove this since this may contain a copy of the entire dict
2971         keep_keys = ['_type'],  # Always keep this to facilitate load-info-json
2972         if remove_private_keys:
2973             remove_keys |= {
2974                 'requested_formats', 'requested_subtitles', 'requested_entries',
2975                 'filepath', 'entries', 'original_url', 'playlist_autonumber',
2976             }
2977             empty_values = (None, {}, [], set(), tuple())
2978             reject = lambda k, v: k not in keep_keys and (
2979                 k.startswith('_') or k in remove_keys or v in empty_values)
2980         else:
2981             reject = lambda k, v: k in remove_keys
2982         filter_fn = lambda obj: (
2983             list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))
2984             else obj if not isinstance(obj, dict)
2985             else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
2986         return filter_fn(info_dict)
2987
2988     @staticmethod
2989     def filter_requested_info(info_dict, actually_filter=True):
2990         ''' Alias of sanitize_info for backward compatibility '''
2991         return YoutubeDL.sanitize_info(info_dict, actually_filter)
2992
2993     def run_pp(self, pp, infodict):
2994         files_to_delete = []
2995         if '__files_to_move' not in infodict:
2996             infodict['__files_to_move'] = {}
2997         try:
2998             files_to_delete, infodict = pp.run(infodict)
2999         except PostProcessingError as e:
3000             # Must be True and not 'only_download'
3001             if self.params.get('ignoreerrors') is True:
3002                 self.report_error(e)
3003                 return infodict
3004             raise
3005
3006         if not files_to_delete:
3007             return infodict
3008         if self.params.get('keepvideo', False):
3009             for f in files_to_delete:
3010                 infodict['__files_to_move'].setdefault(f, '')
3011         else:
3012             for old_filename in set(files_to_delete):
3013                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3014                 try:
3015                     os.remove(encodeFilename(old_filename))
3016                 except (IOError, OSError):
3017                     self.report_warning('Unable to remove downloaded original file')
3018                 if old_filename in infodict['__files_to_move']:
3019                     del infodict['__files_to_move'][old_filename]
3020         return infodict
3021
3022     @staticmethod
3023     def post_extract(info_dict):
3024         def actual_post_extract(info_dict):
3025             if info_dict.get('_type') in ('playlist', 'multi_video'):
3026                 for video_dict in info_dict.get('entries', {}):
3027                     actual_post_extract(video_dict or {})
3028                 return
3029
3030             post_extractor = info_dict.get('__post_extractor') or (lambda: {})
3031             extra = post_extractor().items()
3032             info_dict.update(extra)
3033             info_dict.pop('__post_extractor', None)
3034
3035             original_infodict = info_dict.get('__original_infodict') or {}
3036             original_infodict.update(extra)
3037             original_infodict.pop('__post_extractor', None)
3038
3039         actual_post_extract(info_dict or {})
3040
3041     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3042         info = dict(ie_info)
3043         info['__files_to_move'] = files_to_move or {}
3044         for pp in self._pps[key]:
3045             info = self.run_pp(pp, info)
3046         return info, info.pop('__files_to_move', None)
3047
3048     def post_process(self, filename, ie_info, files_to_move=None):
3049         """Run all the postprocessors on the given file."""
3050         info = dict(ie_info)
3051         info['filepath'] = filename
3052         info['__files_to_move'] = files_to_move or {}
3053
3054         for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
3055             info = self.run_pp(pp, info)
3056         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3057         del info['__files_to_move']
3058         for pp in self._pps['after_move']:
3059             info = self.run_pp(pp, info)
3060         return info
3061
3062     def _make_archive_id(self, info_dict):
3063         video_id = info_dict.get('id')
3064         if not video_id:
3065             return
3066         # Future-proof against any change in case
3067         # and backwards compatibility with prior versions
3068         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3069         if extractor is None:
3070             url = str_or_none(info_dict.get('url'))
3071             if not url:
3072                 return
3073             # Try to find matching extractor for the URL and take its ie_key
3074             for ie_key, ie in self._ies.items():
3075                 if ie.suitable(url):
3076                     extractor = ie_key
3077                     break
3078             else:
3079                 return
3080         return '%s %s' % (extractor.lower(), video_id)
3081
3082     def in_download_archive(self, info_dict):
3083         fn = self.params.get('download_archive')
3084         if fn is None:
3085             return False
3086
3087         vid_id = self._make_archive_id(info_dict)
3088         if not vid_id:
3089             return False  # Incomplete video information
3090
3091         return vid_id in self.archive
3092
3093     def record_download_archive(self, info_dict):
3094         fn = self.params.get('download_archive')
3095         if fn is None:
3096             return
3097         vid_id = self._make_archive_id(info_dict)
3098         assert vid_id
3099         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3100             archive_file.write(vid_id + '\n')
3101         self.archive.add(vid_id)
3102
3103     @staticmethod
3104     def format_resolution(format, default='unknown'):
3105         is_images = format.get('vcodec') == 'none' and format.get('acodec') == 'none'
3106         if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3107             return 'audio only'
3108         if format.get('resolution') is not None:
3109             return format['resolution']
3110         if format.get('width') and format.get('height'):
3111             res = '%dx%d' % (format['width'], format['height'])
3112         elif format.get('height'):
3113             res = '%sp' % format['height']
3114         elif format.get('width'):
3115             res = '%dx?' % format['width']
3116         elif is_images:
3117             return 'images'
3118         else:
3119             return default
3120         return f'{res} images' if is_images else res
3121
3122     def _format_note(self, fdict):
3123         res = ''
3124         if fdict.get('ext') in ['f4f', 'f4m']:
3125             res += '(unsupported) '
3126         if fdict.get('language'):
3127             if res:
3128                 res += ' '
3129             res += '[%s] ' % fdict['language']
3130         if fdict.get('format_note') is not None:
3131             res += fdict['format_note'] + ' '
3132         if fdict.get('tbr') is not None:
3133             res += '%4dk ' % fdict['tbr']
3134         if fdict.get('container') is not None:
3135             if res:
3136                 res += ', '
3137             res += '%s container' % fdict['container']
3138         if (fdict.get('vcodec') is not None
3139                 and fdict.get('vcodec') != 'none'):
3140             if res:
3141                 res += ', '
3142             res += fdict['vcodec']
3143             if fdict.get('vbr') is not None:
3144                 res += '@'
3145         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3146             res += 'video@'
3147         if fdict.get('vbr') is not None:
3148             res += '%4dk' % fdict['vbr']
3149         if fdict.get('fps') is not None:
3150             if res:
3151                 res += ', '
3152             res += '%sfps' % fdict['fps']
3153         if fdict.get('acodec') is not None:
3154             if res:
3155                 res += ', '
3156             if fdict['acodec'] == 'none':
3157                 res += 'video only'
3158             else:
3159                 res += '%-5s' % fdict['acodec']
3160         elif fdict.get('abr') is not None:
3161             if res:
3162                 res += ', '
3163             res += 'audio'
3164         if fdict.get('abr') is not None:
3165             res += '@%3dk' % fdict['abr']
3166         if fdict.get('asr') is not None:
3167             res += ' (%5dHz)' % fdict['asr']
3168         if fdict.get('filesize') is not None:
3169             if res:
3170                 res += ', '
3171             res += format_bytes(fdict['filesize'])
3172         elif fdict.get('filesize_approx') is not None:
3173             if res:
3174                 res += ', '
3175             res += '~' + format_bytes(fdict['filesize_approx'])
3176         return res
3177
3178     def list_formats(self, info_dict):
3179         formats = info_dict.get('formats', [info_dict])
3180         new_format = (
3181             'list-formats' not in self.params.get('compat_opts', [])
3182             and self.params.get('listformats_table', True) is not False)
3183         if new_format:
3184             table = [
3185                 [
3186                     format_field(f, 'format_id'),
3187                     format_field(f, 'ext'),
3188                     self.format_resolution(f),
3189                     format_field(f, 'fps', '%d'),
3190                     '|',
3191                     format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
3192                     format_field(f, 'tbr', '%4dk'),
3193                     shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
3194                     '|',
3195                     format_field(f, 'vcodec', default='unknown').replace('none', ''),
3196                     format_field(f, 'vbr', '%4dk'),
3197                     format_field(f, 'acodec', default='unknown').replace('none', ''),
3198                     format_field(f, 'abr', '%3dk'),
3199                     format_field(f, 'asr', '%5dHz'),
3200                     ', '.join(filter(None, (
3201                         'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
3202                         format_field(f, 'language', '[%s]'),
3203                         format_field(f, 'format_note'),
3204                         format_field(f, 'container', ignore=(None, f.get('ext'))),
3205                     ))),
3206                 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3207             header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', '  TBR', 'PROTO',
3208                            '|', 'VCODEC', '  VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO']
3209         else:
3210             table = [
3211                 [
3212                     format_field(f, 'format_id'),
3213                     format_field(f, 'ext'),
3214                     self.format_resolution(f),
3215                     self._format_note(f)]
3216                 for f in formats
3217                 if f.get('preference') is None or f['preference'] >= -1000]
3218             header_line = ['format code', 'extension', 'resolution', 'note']
3219
3220         self.to_screen(
3221             '[info] Available formats for %s:' % info_dict['id'])
3222         self.to_stdout(render_table(
3223             header_line, table, delim=new_format, extraGap=(0 if new_format else 1), hideEmpty=new_format))
3224
3225     def list_thumbnails(self, info_dict):
3226         thumbnails = list(info_dict.get('thumbnails'))
3227         if not thumbnails:
3228             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3229             return
3230
3231         self.to_screen(
3232             '[info] Thumbnails for %s:' % info_dict['id'])
3233         self.to_stdout(render_table(
3234             ['ID', 'width', 'height', 'URL'],
3235             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
3236
3237     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3238         if not subtitles:
3239             self.to_screen('%s has no %s' % (video_id, name))
3240             return
3241         self.to_screen(
3242             'Available %s for %s:' % (name, video_id))
3243
3244         def _row(lang, formats):
3245             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3246             if len(set(names)) == 1:
3247                 names = [] if names[0] == 'unknown' else names[:1]
3248             return [lang, ', '.join(names), ', '.join(exts)]
3249
3250         self.to_stdout(render_table(
3251             ['Language', 'Name', 'Formats'],
3252             [_row(lang, formats) for lang, formats in subtitles.items()],
3253             hideEmpty=True))
3254
3255     def urlopen(self, req):
3256         """ Start an HTTP download """
3257         if isinstance(req, compat_basestring):
3258             req = sanitized_Request(req)
3259         return self._opener.open(req, timeout=self._socket_timeout)
3260
3261     def print_debug_header(self):
3262         if not self.params.get('verbose'):
3263             return
3264         get_encoding = lambda stream: getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)
3265         encoding_str = (
3266             '[debug] Encodings: locale %s, fs %s, stdout %s, stderr %s, pref %s\n' % (
3267                 locale.getpreferredencoding(),
3268                 sys.getfilesystemencoding(),
3269                 get_encoding(self._screen_file), get_encoding(self._err_file),
3270                 self.get_encoding()))
3271
3272         logger = self.params.get('logger')
3273         if logger:
3274             write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3275             write_debug(encoding_str)
3276         else:
3277             write_debug = lambda msg: self._write_string(f'[debug] {msg}')
3278             write_string(encoding_str, encoding=None)
3279
3280         source = detect_variant()
3281         write_debug('yt-dlp version %s%s\n' % (__version__, '' if source == 'unknown' else f' ({source})'))
3282         if _LAZY_LOADER:
3283             write_debug('Lazy loading extractors enabled\n')
3284         if plugin_extractors or plugin_postprocessors:
3285             write_debug('Plugins: %s\n' % [
3286                 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3287                 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3288         if self.params.get('compat_opts'):
3289             write_debug('Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts')))
3290         try:
3291             sp = subprocess.Popen(
3292                 ['git', 'rev-parse', '--short', 'HEAD'],
3293                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3294                 cwd=os.path.dirname(os.path.abspath(__file__)))
3295             out, err = process_communicate_or_kill(sp)
3296             out = out.decode().strip()
3297             if re.match('[0-9a-f]+', out):
3298                 write_debug('Git HEAD: %s\n' % out)
3299         except Exception:
3300             try:
3301                 sys.exc_clear()
3302             except Exception:
3303                 pass
3304
3305         def python_implementation():
3306             impl_name = platform.python_implementation()
3307             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3308                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3309             return impl_name
3310
3311         write_debug('Python version %s (%s %s) - %s\n' % (
3312             platform.python_version(),
3313             python_implementation(),
3314             platform.architecture()[0],
3315             platform_name()))
3316
3317         exe_versions = FFmpegPostProcessor.get_versions(self)
3318         exe_versions['rtmpdump'] = rtmpdump_version()
3319         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3320         exe_str = ', '.join(
3321             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3322         ) or 'none'
3323         write_debug('exe versions: %s\n' % exe_str)
3324
3325         from .downloader.websocket import has_websockets
3326         from .postprocessor.embedthumbnail import has_mutagen
3327         from .cookies import SQLITE_AVAILABLE, KEYRING_AVAILABLE
3328
3329         lib_str = ', '.join(sorted(filter(None, (
3330             compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
3331             has_websockets and 'websockets',
3332             has_mutagen and 'mutagen',
3333             SQLITE_AVAILABLE and 'sqlite',
3334             KEYRING_AVAILABLE and 'keyring',
3335         )))) or 'none'
3336         write_debug('Optional libraries: %s\n' % lib_str)
3337         write_debug('ANSI escape support: stdout = %s, stderr = %s\n' % (
3338             supports_terminal_sequences(self._screen_file),
3339             supports_terminal_sequences(self._err_file)))
3340
3341         proxy_map = {}
3342         for handler in self._opener.handlers:
3343             if hasattr(handler, 'proxies'):
3344                 proxy_map.update(handler.proxies)
3345         write_debug('Proxy map: ' + compat_str(proxy_map) + '\n')
3346
3347         if self.params.get('call_home', False):
3348             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3349             write_debug('Public IP address: %s\n' % ipaddr)
3350             return
3351             latest_version = self.urlopen(
3352                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3353             if version_tuple(latest_version) > version_tuple(__version__):
3354                 self.report_warning(
3355                     'You are using an outdated version (newest version: %s)! '
3356                     'See https://yt-dl.org/update if you need help updating.' %
3357                     latest_version)
3358
3359     def _setup_opener(self):
3360         timeout_val = self.params.get('socket_timeout')
3361         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
3362
3363         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3364         opts_cookiefile = self.params.get('cookiefile')
3365         opts_proxy = self.params.get('proxy')
3366
3367         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3368
3369         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3370         if opts_proxy is not None:
3371             if opts_proxy == '':
3372                 proxies = {}
3373             else:
3374                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3375         else:
3376             proxies = compat_urllib_request.getproxies()
3377             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3378             if 'http' in proxies and 'https' not in proxies:
3379                 proxies['https'] = proxies['http']
3380         proxy_handler = PerRequestProxyHandler(proxies)
3381
3382         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3383         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3384         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3385         redirect_handler = YoutubeDLRedirectHandler()
3386         data_handler = compat_urllib_request_DataHandler()
3387
3388         # When passing our own FileHandler instance, build_opener won't add the
3389         # default FileHandler and allows us to disable the file protocol, which
3390         # can be used for malicious purposes (see
3391         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3392         file_handler = compat_urllib_request.FileHandler()
3393
3394         def file_open(*args, **kwargs):
3395             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3396         file_handler.file_open = file_open
3397
3398         opener = compat_urllib_request.build_opener(
3399             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3400
3401         # Delete the default user-agent header, which would otherwise apply in
3402         # cases where our custom HTTP handler doesn't come into play
3403         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3404         opener.addheaders = []
3405         self._opener = opener
3406
3407     def encode(self, s):
3408         if isinstance(s, bytes):
3409             return s  # Already encoded
3410
3411         try:
3412             return s.encode(self.get_encoding())
3413         except UnicodeEncodeError as err:
3414             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3415             raise
3416
3417     def get_encoding(self):
3418         encoding = self.params.get('encoding')
3419         if encoding is None:
3420             encoding = preferredencoding()
3421         return encoding
3422
3423     def _write_info_json(self, label, ie_result, infofn):
3424         ''' Write infojson and returns True = written, False = skip, None = error '''
3425         if not self.params.get('writeinfojson'):
3426             return False
3427         elif not infofn:
3428             self.write_debug(f'Skipping writing {label} infojson')
3429             return False
3430         elif not self._ensure_dir_exists(infofn):
3431             return None
3432         elif not self.params.get('overwrites', True) and os.path.exists(infofn):
3433             self.to_screen(f'[info] {label.title()} metadata is already present')
3434         else:
3435             self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3436             try:
3437                 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3438             except (OSError, IOError):
3439                 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3440                 return None
3441         return True
3442
3443     def _write_description(self, label, ie_result, descfn):
3444         ''' Write description and returns True = written, False = skip, None = error '''
3445         if not self.params.get('writedescription'):
3446             return False
3447         elif not descfn:
3448             self.write_debug(f'Skipping writing {label} description')
3449             return False
3450         elif not self._ensure_dir_exists(descfn):
3451             return None
3452         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3453             self.to_screen(f'[info] {label.title()} description is already present')
3454         elif ie_result.get('description') is None:
3455             self.report_warning(f'There\'s no {label} description to write')
3456             return False
3457         else:
3458             try:
3459                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3460                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3461                     descfile.write(ie_result['description'])
3462             except (OSError, IOError):
3463                 self.report_error(f'Cannot write {label} description file {descfn}')
3464                 return None
3465         return True
3466
3467     def _write_subtitles(self, info_dict, filename):
3468         ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3469         ret = []
3470         subtitles = info_dict.get('requested_subtitles')
3471         if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3472             # subtitles download errors are already managed as troubles in relevant IE
3473             # that way it will silently go on when used with unsupporting IE
3474             return ret
3475
3476         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3477         if not sub_filename_base:
3478             self.to_screen('[info] Skipping writing video subtitles')
3479             return ret
3480         for sub_lang, sub_info in subtitles.items():
3481             sub_format = sub_info['ext']
3482             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3483             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3484             if not self.params.get('overwrites', True) and os.path.exists(sub_filename):
3485                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3486                 sub_info['filepath'] = sub_filename
3487                 ret.append((sub_filename, sub_filename_final))
3488                 continue
3489
3490             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3491             if sub_info.get('data') is not None:
3492                 try:
3493                     # Use newline='' to prevent conversion of newline characters
3494                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
3495                     with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3496                         subfile.write(sub_info['data'])
3497                     sub_info['filepath'] = sub_filename
3498                     ret.append((sub_filename, sub_filename_final))
3499                     continue
3500                 except (OSError, IOError):
3501                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
3502                     return None
3503
3504             try:
3505                 sub_copy = sub_info.copy()
3506                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3507                 self.dl(sub_filename, sub_copy, subtitle=True)
3508                 sub_info['filepath'] = sub_filename
3509                 ret.append((sub_filename, sub_filename_final))
3510             except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3511                 self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
3512                 continue
3513         return ret
3514
3515     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3516         ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3517         write_all = self.params.get('write_all_thumbnails', False)
3518         thumbnails, ret = [], []
3519         if write_all or self.params.get('writethumbnail', False):
3520             thumbnails = info_dict.get('thumbnails') or []
3521         multiple = write_all and len(thumbnails) > 1
3522
3523         if thumb_filename_base is None:
3524             thumb_filename_base = filename
3525         if thumbnails and not thumb_filename_base:
3526             self.write_debug(f'Skipping writing {label} thumbnail')
3527             return ret
3528
3529         for t in thumbnails[::-1]:
3530             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3531             thumb_display_id = f'{label} thumbnail' + (f' {t["id"]}' if multiple else '')
3532             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3533             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3534
3535             if not self.params.get('overwrites', True) and os.path.exists(thumb_filename):
3536                 ret.append((thumb_filename, thumb_filename_final))
3537                 t['filepath'] = thumb_filename
3538                 self.to_screen(f'[info] {thumb_display_id.title()} is already present')
3539             else:
3540                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3541                 try:
3542                     uf = self.urlopen(t['url'])
3543                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3544                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3545                         shutil.copyfileobj(uf, thumbf)
3546                     ret.append((thumb_filename, thumb_filename_final))
3547                     t['filepath'] = thumb_filename
3548                 except network_exceptions as err:
3549                     self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3550             if ret and not write_all:
3551                 break
3552         return ret