yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import sys
  23 import tempfile
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28 import unicodedata
  29
  30 from string import ascii_letters
  31
  32 from .compat import (
  33     compat_basestring,
  34     compat_get_terminal_size,
  35     compat_kwargs,
  36     compat_numeric_types,
  37     compat_os_name,
  38     compat_pycrypto_AES,
  39     compat_shlex_quote,
  40     compat_str,
  41     compat_tokenize_tokenize,
  42     compat_urllib_error,
  43     compat_urllib_request,
  44     compat_urllib_request_DataHandler,
  45 )
  46 from .cookies import load_cookies
  47 from .utils import (
  48     age_restricted,
  49     args_to_str,
  50     ContentTooShortError,
  51     date_from_str,
  52     DateRange,
  53     DEFAULT_OUTTMPL,
  54     determine_ext,
  55     determine_protocol,
  56     DOT_DESKTOP_LINK_TEMPLATE,
  57     DOT_URL_LINK_TEMPLATE,
  58     DOT_WEBLOC_LINK_TEMPLATE,
  59     DownloadError,
  60     encode_compat_str,
  61     encodeFilename,
  62     EntryNotInPlaylist,
  63     error_to_compat_str,
  64     ExistingVideoReached,
  65     expand_path,
  66     ExtractorError,
  67     float_or_none,
  68     format_bytes,
  69     format_field,
  70     STR_FORMAT_RE_TMPL,
  71     STR_FORMAT_TYPES,
  72     formatSeconds,
  73     GeoRestrictedError,
  74     HEADRequest,
  75     int_or_none,
  76     iri_to_uri,
  77     ISO3166Utils,
  78     LazyList,
  79     locked_file,
  80     make_dir,
  81     make_HTTPS_handler,
  82     MaxDownloadsReached,
  83     network_exceptions,
  84     orderedSet,
  85     OUTTMPL_TYPES,
  86     PagedList,
  87     parse_filesize,
  88     PerRequestProxyHandler,
  89     platform_name,
  90     PostProcessingError,
  91     preferredencoding,
  92     prepend_extension,
  93     process_communicate_or_kill,
  94     register_socks_protocols,
  95     RejectedVideoReached,
  96     render_table,
  97     replace_extension,
  98     SameFileError,
  99     sanitize_filename,
 100     sanitize_path,
 101     sanitize_url,
 102     sanitized_Request,
 103     std_headers,
 104     str_or_none,
 105     strftime_or_none,
 106     subtitles_filename,
 107     ThrottledDownload,
 108     to_high_limit_path,
 109     traverse_obj,
 110     try_get,
 111     UnavailableVideoError,
 112     url_basename,
 113     variadic,
 114     version_tuple,
 115     write_json_file,
 116     write_string,
 117     YoutubeDLCookieProcessor,
 118     YoutubeDLHandler,
 119     YoutubeDLRedirectHandler,
 120 )
 121 from .cache import Cache
 122 from .extractor import (
 123     gen_extractor_classes,
 124     get_info_extractor,
 125     _LAZY_LOADER,
 126     _PLUGIN_CLASSES as plugin_extractors
 127 )
 128 from .extractor.openload import PhantomJSwrapper
 129 from .downloader import (
 130     FFmpegFD,
 131     get_suitable_downloader,
 132     shorten_protocol_name
 133 )
 134 from .downloader.rtmp import rtmpdump_version
 135 from .postprocessor import (
 136     get_postprocessor,
 137     FFmpegFixupDurationPP,
 138     FFmpegFixupM3u8PP,
 139     FFmpegFixupM4aPP,
 140     FFmpegFixupStretchedPP,
 141     FFmpegFixupTimestampPP,
 142     FFmpegMergerPP,
 143     FFmpegPostProcessor,
 144     MoveFilesAfterDownloadPP,
 145     _PLUGIN_CLASSES as plugin_postprocessors
 146 )
 147 from .update import detect_variant
 148 from .version import __version__
 149
 150 if compat_os_name == 'nt':
 151     import ctypes
 152
 153
 154 class YoutubeDL(object):
 155     """YoutubeDL class.
 156
 157     YoutubeDL objects are the ones responsible of downloading the
 158     actual video file and writing it to disk if the user has requested
 159     it, among some other tasks. In most cases there should be one per
 160     program. As, given a video URL, the downloader doesn't know how to
 161     extract all the needed information, task that InfoExtractors do, it
 162     has to pass the URL to one of them.
 163
 164     For this, YoutubeDL objects have a method that allows
 165     InfoExtractors to be registered in a given order. When it is passed
 166     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 167     finds that reports being able to handle it. The InfoExtractor extracts
 168     all the information about the video or videos the URL refers to, and
 169     YoutubeDL process the extracted information, possibly using a File
 170     Downloader to download the video.
 171
 172     YoutubeDL objects accept a lot of parameters. In order not to saturate
 173     the object constructor with arguments, it receives a dictionary of
 174     options instead. These options are available through the params
 175     attribute for the InfoExtractors to use. The YoutubeDL also
 176     registers itself as the downloader in charge for the InfoExtractors
 177     that are added to it, so this is a "mutual registration".
 178
 179     Available options:
 180
 181     username:          Username for authentication purposes.
 182     password:          Password for authentication purposes.
 183     videopassword:     Password for accessing a video.
 184     ap_mso:            Adobe Pass multiple-system operator identifier.
 185     ap_username:       Multiple-system operator account username.
 186     ap_password:       Multiple-system operator account password.
 187     usenetrc:          Use netrc for authentication instead.
 188     verbose:           Print additional info to stdout.
 189     quiet:             Do not print messages to stdout.
 190     no_warnings:       Do not print out anything for warnings.
 191     forceprint:        A list of templates to force print
 192     forceurl:          Force printing final URL. (Deprecated)
 193     forcetitle:        Force printing title. (Deprecated)
 194     forceid:           Force printing ID. (Deprecated)
 195     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 196     forcedescription:  Force printing description. (Deprecated)
 197     forcefilename:     Force printing final filename. (Deprecated)
 198     forceduration:     Force printing duration. (Deprecated)
 199     forcejson:         Force printing info_dict as JSON.
 200     dump_single_json:  Force printing the info_dict of the whole playlist
 201                        (or video) as a single JSON line.
 202     force_write_download_archive: Force writing download archive regardless
 203                        of 'skip_download' or 'simulate'.
 204     simulate:          Do not download the video files. If unset (or None),
 205                        simulate only if listsubtitles, listformats or list_thumbnails is used
 206     format:            Video format code. see "FORMAT SELECTION" for more details.
 207     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 208     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 209                        extracting metadata even if the video is not actually
 210                        available for download (experimental)
 211     format_sort:       How to sort the video formats. see "Sorting Formats"
 212                        for more details.
 213     format_sort_force: Force the given format_sort. see "Sorting Formats"
 214                        for more details.
 215     allow_multiple_video_streams:   Allow multiple video streams to be merged
 216                        into a single file
 217     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 218                        into a single file
 219     check_formats      Whether to test if the formats are downloadable.
 220                        Can be True (check all), False (check none)
 221                        or None (check only if requested by extractor)
 222     paths:             Dictionary of output paths. The allowed keys are 'home'
 223                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 224     outtmpl:           Dictionary of templates for output names. Allowed keys
 225                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 226                        For compatibility with youtube-dl, a single string can also be used
 227     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 228     restrictfilenames: Do not allow "&" and spaces in file names
 229     trim_file_name:    Limit length of filename (extension excluded)
 230     windowsfilenames:  Force the filenames to be windows compatible
 231     ignoreerrors:      Do not stop on download/postprocessing errors.
 232                        Can be 'only_download' to ignore only download errors.
 233                        Default is 'only_download' for CLI, but False for API
 234     skip_playlist_after_errors: Number of allowed failures until the rest of
 235                        the playlist is skipped
 236     force_generic_extractor: Force downloader to use the generic extractor
 237     overwrites:        Overwrite all video and metadata files if True,
 238                        overwrite only non-video files if None
 239                        and don't overwrite any file if False
 240                        For compatibility with youtube-dl,
 241                        "nooverwrites" may also be used instead
 242     playliststart:     Playlist item to start at.
 243     playlistend:       Playlist item to end at.
 244     playlist_items:    Specific indices of playlist to download.
 245     playlistreverse:   Download playlist items in reverse order.
 246     playlistrandom:    Download playlist items in random order.
 247     matchtitle:        Download only matching titles.
 248     rejecttitle:       Reject downloads for matching titles.
 249     logger:            Log messages to a logging.Logger instance.
 250     logtostderr:       Log messages to stderr instead of stdout.
 251     writedescription:  Write the video description to a .description file
 252     writeinfojson:     Write the video description to a .info.json file
 253     clean_infojson:    Remove private fields from the infojson
 254     getcomments:       Extract video comments. This will not be written to disk
 255                        unless writeinfojson is also given
 256     writeannotations:  Write the video annotations to a .annotations.xml file
 257     writethumbnail:    Write the thumbnail image to a file
 258     allow_playlist_files: Whether to write playlists' description, infojson etc
 259                        also to disk when using the 'write*' options
 260     write_all_thumbnails:  Write all thumbnail formats to files
 261     writelink:         Write an internet shortcut file, depending on the
 262                        current platform (.url/.webloc/.desktop)
 263     writeurllink:      Write a Windows internet shortcut file (.url)
 264     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 265     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 266     writesubtitles:    Write the video subtitles to a file
 267     writeautomaticsub: Write the automatically generated subtitles to a file
 268     allsubtitles:      Deprecated - Use subtitleslangs = ['all']
 269                        Downloads all the subtitles of the video
 270                        (requires writesubtitles or writeautomaticsub)
 271     listsubtitles:     Lists all available subtitles for the video
 272     subtitlesformat:   The format code for subtitles
 273     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 274                        The list may contain "all" to refer to all the available
 275                        subtitles. The language can be prefixed with a "-" to
 276                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 277     keepvideo:         Keep the video file after post-processing
 278     daterange:         A DateRange object, download only if the upload_date is in the range.
 279     skip_download:     Skip the actual download of the video file
 280     cachedir:          Location of the cache files in the filesystem.
 281                        False to disable filesystem cache.
 282     noplaylist:        Download single video instead of a playlist if in doubt.
 283     age_limit:         An integer representing the user's age in years.
 284                        Unsuitable videos for the given age are skipped.
 285     min_views:         An integer representing the minimum view count the video
 286                        must have in order to not be skipped.
 287                        Videos without view count information are always
 288                        downloaded. None for no limit.
 289     max_views:         An integer representing the maximum view count.
 290                        Videos that are more popular than that are not
 291                        downloaded.
 292                        Videos without view count information are always
 293                        downloaded. None for no limit.
 294     download_archive:  File name of a file where all downloads are recorded.
 295                        Videos already present in the file are not downloaded
 296                        again.
 297     break_on_existing: Stop the download process after attempting to download a
 298                        file that is in the archive.
 299     break_on_reject:   Stop the download process when encountering a video that
 300                        has been filtered out.
 301     cookiefile:        File name where cookies should be read from and dumped to
 302     cookiesfrombrowser: A tuple containing the name of the browser and the profile
 303                        name/path from where cookies are loaded.
 304                        Eg: ('chrome', ) or (vivaldi, 'default')
 305     nocheckcertificate:Do not verify SSL certificates
 306     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 307                        At the moment, this is only supported by YouTube.
 308     proxy:             URL of the proxy server to use
 309     geo_verification_proxy:  URL of the proxy to use for IP address verification
 310                        on geo-restricted sites.
 311     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 312     bidi_workaround:   Work around buggy terminals without bidirectional text
 313                        support, using fridibi
 314     debug_printtraffic:Print out sent and received HTTP traffic
 315     include_ads:       Download ads as well
 316     default_search:    Prepend this string if an input url is not valid.
 317                        'auto' for elaborate guessing
 318     encoding:          Use this encoding instead of the system-specified.
 319     extract_flat:      Do not resolve URLs, return the immediate result.
 320                        Pass in 'in_playlist' to only show this behavior for
 321                        playlist items.
 322     postprocessors:    A list of dictionaries, each with an entry
 323                        * key:  The name of the postprocessor. See
 324                                yt_dlp/postprocessor/__init__.py for a list.
 325                        * when: When to run the postprocessor. Can be one of
 326                                pre_process|before_dl|post_process|after_move.
 327                                Assumed to be 'post_process' if not given
 328     post_hooks:        A list of functions that get called as the final step
 329                        for each video file, after all postprocessors have been
 330                        called. The filename will be passed as the only argument.
 331     progress_hooks:    A list of functions that get called on download
 332                        progress, with a dictionary with the entries
 333                        * status: One of "downloading", "error", or "finished".
 334                                  Check this first and ignore unknown values.
 335                        * info_dict: The extracted info_dict
 336
 337                        If status is one of "downloading", or "finished", the
 338                        following properties may also be present:
 339                        * filename: The final filename (always present)
 340                        * tmpfilename: The filename we're currently writing to
 341                        * downloaded_bytes: Bytes on disk
 342                        * total_bytes: Size of the whole file, None if unknown
 343                        * total_bytes_estimate: Guess of the eventual file size,
 344                                                None if unavailable.
 345                        * elapsed: The number of seconds since download started.
 346                        * eta: The estimated time in seconds, None if unknown
 347                        * speed: The download speed in bytes/second, None if
 348                                 unknown
 349                        * fragment_index: The counter of the currently
 350                                          downloaded video fragment.
 351                        * fragment_count: The number of fragments (= individual
 352                                          files that will be merged)
 353
 354                        Progress hooks are guaranteed to be called at least once
 355                        (with status "finished") if the download is successful.
 356     merge_output_format: Extension to use when merging formats.
 357     final_ext:         Expected final extension; used to detect when the file was
 358                        already downloaded and converted. "merge_output_format" is
 359                        replaced by this extension when given
 360     fixup:             Automatically correct known faults of the file.
 361                        One of:
 362                        - "never": do nothing
 363                        - "warn": only emit a warning
 364                        - "detect_or_warn": check whether we can do anything
 365                                            about it, warn otherwise (default)
 366     source_address:    Client-side IP address to bind to.
 367     call_home:         Boolean, true iff we are allowed to contact the
 368                        yt-dlp servers for debugging. (BROKEN)
 369     sleep_interval_requests: Number of seconds to sleep between requests
 370                        during extraction
 371     sleep_interval:    Number of seconds to sleep before each download when
 372                        used alone or a lower bound of a range for randomized
 373                        sleep before each download (minimum possible number
 374                        of seconds to sleep) when used along with
 375                        max_sleep_interval.
 376     max_sleep_interval:Upper bound of a range for randomized sleep before each
 377                        download (maximum possible number of seconds to sleep).
 378                        Must only be used along with sleep_interval.
 379                        Actual sleep time will be a random float from range
 380                        [sleep_interval; max_sleep_interval].
 381     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 382     listformats:       Print an overview of available video formats and exit.
 383     list_thumbnails:   Print a table of all thumbnails and exit.
 384     match_filter:      A function that gets called with the info_dict of
 385                        every video.
 386                        If it returns a message, the video is ignored.
 387                        If it returns None, the video is downloaded.
 388                        match_filter_func in utils.py is one example for this.
 389     no_color:          Do not emit color codes in output.
 390     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 391                        HTTP header
 392     geo_bypass_country:
 393                        Two-letter ISO 3166-2 country code that will be used for
 394                        explicit geographic restriction bypassing via faking
 395                        X-Forwarded-For HTTP header
 396     geo_bypass_ip_block:
 397                        IP range in CIDR notation that will be used similarly to
 398                        geo_bypass_country
 399
 400     The following options determine which downloader is picked:
 401     external_downloader: A dictionary of protocol keys and the executable of the
 402                        external downloader to use for it. The allowed protocols
 403                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 404                        Set the value to 'native' to use the native downloader
 405     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 406                        or {'m3u8': 'ffmpeg'} instead.
 407                        Use the native HLS downloader instead of ffmpeg/avconv
 408                        if True, otherwise use ffmpeg/avconv if False, otherwise
 409                        use downloader suggested by extractor if None.
 410     compat_opts:       Compatibility options. See "Differences in default behavior".
 411                        The following options do not work when used through the API:
 412                        filename, abort-on-error, multistreams, no-live-chat,
 413                        no-clean-infojson, no-playlist-metafiles, no-keep-subs.
 414                        Refer __init__.py for their implementation
 415
 416     The following parameters are not used by YoutubeDL itself, they are used by
 417     the downloader (see yt_dlp/downloader/common.py):
 418     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 419     max_filesize, test, noresizebuffer, retries, continuedl, noprogress, consoletitle,
 420     xattr_set_filesize, external_downloader_args, hls_use_mpegts, http_chunk_size.
 421
 422     The following options are used by the post processors:
 423     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 424                        otherwise prefer ffmpeg. (avconv support is deprecated)
 425     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 426                        to the binary or its containing directory.
 427     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 428                        and a list of additional command-line arguments for the
 429                        postprocessor/executable. The dict can also have "PP+EXE" keys
 430                        which are used when the given exe is used by the given PP.
 431                        Use 'default' as the name for arguments to passed to all PP
 432                        For compatibility with youtube-dl, a single list of args
 433                        can also be used
 434
 435     The following options are used by the extractors:
 436     extractor_retries: Number of times to retry for known errors
 437     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 438     hls_split_discontinuity: Split HLS playlists to different formats at
 439                        discontinuities such as ad breaks (default: False)
 440     extractor_args:    A dictionary of arguments to be passed to the extractors.
 441                        See "EXTRACTOR ARGUMENTS" for details.
 442                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 443     youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
 444                        If True (default), DASH manifests and related
 445                        data will be downloaded and processed by extractor.
 446                        You can reduce network I/O by disabling it if you don't
 447                        care about DASH. (only for youtube)
 448     youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
 449                        If True (default), HLS manifests and related
 450                        data will be downloaded and processed by extractor.
 451                        You can reduce network I/O by disabling it if you don't
 452                        care about HLS. (only for youtube)
 453     """
 454
 455     _NUMERIC_FIELDS = set((
 456         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 457         'timestamp', 'release_timestamp',
 458         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 459         'average_rating', 'comment_count', 'age_limit',
 460         'start_time', 'end_time',
 461         'chapter_number', 'season_number', 'episode_number',
 462         'track_number', 'disc_number', 'release_year',
 463     ))
 464
 465     params = None
 466     _ies = {}
 467     _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 468     _printed_messages = set()
 469     _first_webpage_request = True
 470     _download_retcode = None
 471     _num_downloads = None
 472     _playlist_level = 0
 473     _playlist_urls = set()
 474     _screen_file = None
 475
 476     def __init__(self, params=None, auto_init=True):
 477         """Create a FileDownloader object with the given options."""
 478         if params is None:
 479             params = {}
 480         self._ies = {}
 481         self._ies_instances = {}
 482         self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 483         self._printed_messages = set()
 484         self._first_webpage_request = True
 485         self._post_hooks = []
 486         self._progress_hooks = []
 487         self._download_retcode = 0
 488         self._num_downloads = 0
 489         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 490         self._err_file = sys.stderr
 491         self.params = {
 492             # Default parameters
 493             'nocheckcertificate': False,
 494         }
 495         self.params.update(params)
 496         self.cache = Cache(self)
 497
 498         if sys.version_info < (3, 6):
 499             self.report_warning(
 500                 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 501
 502         if self.params.get('allow_unplayable_formats'):
 503             self.report_warning(
 504                 'You have asked for unplayable formats to be listed/downloaded. '
 505                 'This is a developer option intended for debugging. '
 506                 'If you experience any issues while using this option, DO NOT open a bug report')
 507
 508         def check_deprecated(param, option, suggestion):
 509             if self.params.get(param) is not None:
 510                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 511                 return True
 512             return False
 513
 514         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 515             if self.params.get('geo_verification_proxy') is None:
 516                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 517
 518         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 519         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 520         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 521
 522         for msg in self.params.get('warnings', []):
 523             self.report_warning(msg)
 524
 525         if self.params.get('overwrites') is None:
 526             self.params.pop('overwrites', None)
 527         elif self.params.get('nooverwrites') is not None:
 528             # nooverwrites was unnecessarily changed to overwrites
 529             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 530             # This ensures compatibility with both keys
 531             self.params['overwrites'] = not self.params['nooverwrites']
 532         else:
 533             self.params['nooverwrites'] = not self.params['overwrites']
 534
 535         if params.get('bidi_workaround', False):
 536             try:
 537                 import pty
 538                 master, slave = pty.openpty()
 539                 width = compat_get_terminal_size().columns
 540                 if width is None:
 541                     width_args = []
 542                 else:
 543                     width_args = ['-w', str(width)]
 544                 sp_kwargs = dict(
 545                     stdin=subprocess.PIPE,
 546                     stdout=slave,
 547                     stderr=self._err_file)
 548                 try:
 549                     self._output_process = subprocess.Popen(
 550                         ['bidiv'] + width_args, **sp_kwargs
 551                     )
 552                 except OSError:
 553                     self._output_process = subprocess.Popen(
 554                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 555                 self._output_channel = os.fdopen(master, 'rb')
 556             except OSError as ose:
 557                 if ose.errno == errno.ENOENT:
 558                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 559                 else:
 560                     raise
 561
 562         if (sys.platform != 'win32'
 563                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 564                 and not params.get('restrictfilenames', False)):
 565             # Unicode filesystem API will throw errors (#1474, #13027)
 566             self.report_warning(
 567                 'Assuming --restrict-filenames since file system encoding '
 568                 'cannot encode all characters. '
 569                 'Set the LC_ALL environment variable to fix this.')
 570             self.params['restrictfilenames'] = True
 571
 572         self.outtmpl_dict = self.parse_outtmpl()
 573
 574         # Creating format selector here allows us to catch syntax errors before the extraction
 575         self.format_selector = (
 576             None if self.params.get('format') is None
 577             else self.build_format_selector(self.params['format']))
 578
 579         self._setup_opener()
 580
 581         def preload_download_archive(fn):
 582             """Preload the archive, if any is specified"""
 583             if fn is None:
 584                 return False
 585             self.write_debug('Loading archive file %r\n' % fn)
 586             try:
 587                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 588                     for line in archive_file:
 589                         self.archive.add(line.strip())
 590             except IOError as ioe:
 591                 if ioe.errno != errno.ENOENT:
 592                     raise
 593                 return False
 594             return True
 595
 596         self.archive = set()
 597         preload_download_archive(self.params.get('download_archive'))
 598
 599         if auto_init:
 600             self.print_debug_header()
 601             self.add_default_info_extractors()
 602
 603         for pp_def_raw in self.params.get('postprocessors', []):
 604             pp_def = dict(pp_def_raw)
 605             when = pp_def.pop('when', 'post_process')
 606             pp_class = get_postprocessor(pp_def.pop('key'))
 607             pp = pp_class(self, **compat_kwargs(pp_def))
 608             self.add_post_processor(pp, when=when)
 609
 610         for ph in self.params.get('post_hooks', []):
 611             self.add_post_hook(ph)
 612
 613         for ph in self.params.get('progress_hooks', []):
 614             self.add_progress_hook(ph)
 615
 616         register_socks_protocols()
 617
 618     def warn_if_short_id(self, argv):
 619         # short YouTube ID starting with dash?
 620         idxs = [
 621             i for i, a in enumerate(argv)
 622             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 623         if idxs:
 624             correct_argv = (
 625                 ['yt-dlp']
 626                 + [a for i, a in enumerate(argv) if i not in idxs]
 627                 + ['--'] + [argv[i] for i in idxs]
 628             )
 629             self.report_warning(
 630                 'Long argument string detected. '
 631                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 632                 args_to_str(correct_argv))
 633
 634     def add_info_extractor(self, ie):
 635         """Add an InfoExtractor object to the end of the list."""
 636         ie_key = ie.ie_key()
 637         self._ies[ie_key] = ie
 638         if not isinstance(ie, type):
 639             self._ies_instances[ie_key] = ie
 640             ie.set_downloader(self)
 641
 642     def _get_info_extractor_class(self, ie_key):
 643         ie = self._ies.get(ie_key)
 644         if ie is None:
 645             ie = get_info_extractor(ie_key)
 646             self.add_info_extractor(ie)
 647         return ie
 648
 649     def get_info_extractor(self, ie_key):
 650         """
 651         Get an instance of an IE with name ie_key, it will try to get one from
 652         the _ies list, if there's no instance it will create a new one and add
 653         it to the extractor list.
 654         """
 655         ie = self._ies_instances.get(ie_key)
 656         if ie is None:
 657             ie = get_info_extractor(ie_key)()
 658             self.add_info_extractor(ie)
 659         return ie
 660
 661     def add_default_info_extractors(self):
 662         """
 663         Add the InfoExtractors returned by gen_extractors to the end of the list
 664         """
 665         for ie in gen_extractor_classes():
 666             self.add_info_extractor(ie)
 667
 668     def add_post_processor(self, pp, when='post_process'):
 669         """Add a PostProcessor object to the end of the chain."""
 670         self._pps[when].append(pp)
 671         pp.set_downloader(self)
 672
 673     def add_post_hook(self, ph):
 674         """Add the post hook"""
 675         self._post_hooks.append(ph)
 676
 677     def add_progress_hook(self, ph):
 678         """Add the progress hook (currently only for the file downloader)"""
 679         self._progress_hooks.append(ph)
 680
 681     def _bidi_workaround(self, message):
 682         if not hasattr(self, '_output_channel'):
 683             return message
 684
 685         assert hasattr(self, '_output_process')
 686         assert isinstance(message, compat_str)
 687         line_count = message.count('\n') + 1
 688         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 689         self._output_process.stdin.flush()
 690         res = ''.join(self._output_channel.readline().decode('utf-8')
 691                       for _ in range(line_count))
 692         return res[:-len('\n')]
 693
 694     def _write_string(self, message, out=None, only_once=False):
 695         if only_once:
 696             if message in self._printed_messages:
 697                 return
 698             self._printed_messages.add(message)
 699         write_string(message, out=out, encoding=self.params.get('encoding'))
 700
 701     def to_stdout(self, message, skip_eol=False, quiet=False):
 702         """Print message to stdout"""
 703         if self.params.get('logger'):
 704             self.params['logger'].debug(message)
 705         elif not quiet or self.params.get('verbose'):
 706             self._write_string(
 707                 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 708                 self._err_file if quiet else self._screen_file)
 709
 710     def to_stderr(self, message, only_once=False):
 711         """Print message to stderr"""
 712         assert isinstance(message, compat_str)
 713         if self.params.get('logger'):
 714             self.params['logger'].error(message)
 715         else:
 716             self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
 717
 718     def to_console_title(self, message):
 719         if not self.params.get('consoletitle', False):
 720             return
 721         if compat_os_name == 'nt':
 722             if ctypes.windll.kernel32.GetConsoleWindow():
 723                 # c_wchar_p() might not be necessary if `message` is
 724                 # already of type unicode()
 725                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 726         elif 'TERM' in os.environ:
 727             self._write_string('\033]0;%s\007' % message, self._screen_file)
 728
 729     def save_console_title(self):
 730         if not self.params.get('consoletitle', False):
 731             return
 732         if self.params.get('simulate'):
 733             return
 734         if compat_os_name != 'nt' and 'TERM' in os.environ:
 735             # Save the title on stack
 736             self._write_string('\033[22;0t', self._screen_file)
 737
 738     def restore_console_title(self):
 739         if not self.params.get('consoletitle', False):
 740             return
 741         if self.params.get('simulate'):
 742             return
 743         if compat_os_name != 'nt' and 'TERM' in os.environ:
 744             # Restore the title from stack
 745             self._write_string('\033[23;0t', self._screen_file)
 746
 747     def __enter__(self):
 748         self.save_console_title()
 749         return self
 750
 751     def __exit__(self, *args):
 752         self.restore_console_title()
 753
 754         if self.params.get('cookiefile') is not None:
 755             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 756
 757     def trouble(self, message=None, tb=None):
 758         """Determine action to take when a download problem appears.
 759
 760         Depending on if the downloader has been configured to ignore
 761         download errors or not, this method may throw an exception or
 762         not when errors are found, after printing the message.
 763
 764         tb, if given, is additional traceback information.
 765         """
 766         if message is not None:
 767             self.to_stderr(message)
 768         if self.params.get('verbose'):
 769             if tb is None:
 770                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 771                     tb = ''
 772                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 773                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 774                     tb += encode_compat_str(traceback.format_exc())
 775                 else:
 776                     tb_data = traceback.format_list(traceback.extract_stack())
 777                     tb = ''.join(tb_data)
 778             if tb:
 779                 self.to_stderr(tb)
 780         if not self.params.get('ignoreerrors'):
 781             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 782                 exc_info = sys.exc_info()[1].exc_info
 783             else:
 784                 exc_info = sys.exc_info()
 785             raise DownloadError(message, exc_info)
 786         self._download_retcode = 1
 787
 788     def to_screen(self, message, skip_eol=False):
 789         """Print message to stdout if not in quiet mode"""
 790         self.to_stdout(
 791             message, skip_eol, quiet=self.params.get('quiet', False))
 792
 793     def report_warning(self, message, only_once=False):
 794         '''
 795         Print the message to stderr, it will be prefixed with 'WARNING:'
 796         If stderr is a tty file the 'WARNING:' will be colored
 797         '''
 798         if self.params.get('logger') is not None:
 799             self.params['logger'].warning(message)
 800         else:
 801             if self.params.get('no_warnings'):
 802                 return
 803             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 804                 _msg_header = '\033[0;33mWARNING:\033[0m'
 805             else:
 806                 _msg_header = 'WARNING:'
 807             warning_message = '%s %s' % (_msg_header, message)
 808             self.to_stderr(warning_message, only_once)
 809
 810     def report_error(self, message, tb=None):
 811         '''
 812         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 813         in red if stderr is a tty file.
 814         '''
 815         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 816             _msg_header = '\033[0;31mERROR:\033[0m'
 817         else:
 818             _msg_header = 'ERROR:'
 819         error_message = '%s %s' % (_msg_header, message)
 820         self.trouble(error_message, tb)
 821
 822     def write_debug(self, message, only_once=False):
 823         '''Log debug message or Print message to stderr'''
 824         if not self.params.get('verbose', False):
 825             return
 826         message = '[debug] %s' % message
 827         if self.params.get('logger'):
 828             self.params['logger'].debug(message)
 829         else:
 830             self.to_stderr(message, only_once)
 831
 832     def report_file_already_downloaded(self, file_name):
 833         """Report file has already been fully downloaded."""
 834         try:
 835             self.to_screen('[download] %s has already been downloaded' % file_name)
 836         except UnicodeEncodeError:
 837             self.to_screen('[download] The file has already been downloaded')
 838
 839     def report_file_delete(self, file_name):
 840         """Report that existing file will be deleted."""
 841         try:
 842             self.to_screen('Deleting existing file %s' % file_name)
 843         except UnicodeEncodeError:
 844             self.to_screen('Deleting existing file')
 845
 846     def raise_no_formats(self, info, forced=False):
 847         has_drm = info.get('__has_drm')
 848         msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
 849         expected = self.params.get('ignore_no_formats_error')
 850         if forced or not expected:
 851             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
 852                                  expected=has_drm or expected)
 853         else:
 854             self.report_warning(msg)
 855
 856     def parse_outtmpl(self):
 857         outtmpl_dict = self.params.get('outtmpl', {})
 858         if not isinstance(outtmpl_dict, dict):
 859             outtmpl_dict = {'default': outtmpl_dict}
 860         outtmpl_dict.update({
 861             k: v for k, v in DEFAULT_OUTTMPL.items()
 862             if outtmpl_dict.get(k) is None})
 863         for key, val in outtmpl_dict.items():
 864             if isinstance(val, bytes):
 865                 self.report_warning(
 866                     'Parameter outtmpl is bytes, but should be a unicode string. '
 867                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 868         return outtmpl_dict
 869
 870     def get_output_path(self, dir_type='', filename=None):
 871         paths = self.params.get('paths', {})
 872         assert isinstance(paths, dict)
 873         path = os.path.join(
 874             expand_path(paths.get('home', '').strip()),
 875             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
 876             filename or '')
 877
 878         # Temporary fix for #4787
 879         # 'Treat' all problem characters by passing filename through preferredencoding
 880         # to workaround encoding issues with subprocess on python2 @ Windows
 881         if sys.version_info < (3, 0) and sys.platform == 'win32':
 882             path = encodeFilename(path, True).decode(preferredencoding())
 883         return sanitize_path(path, force=self.params.get('windowsfilenames'))
 884
 885     @staticmethod
 886     def _outtmpl_expandpath(outtmpl):
 887         # expand_path translates '%%' into '%' and '$$' into '$'
 888         # correspondingly that is not what we want since we need to keep
 889         # '%%' intact for template dict substitution step. Working around
 890         # with boundary-alike separator hack.
 891         sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 892         outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 893
 894         # outtmpl should be expand_path'ed before template dict substitution
 895         # because meta fields may contain env variables we don't want to
 896         # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 897         # title "Hello $PATH", we don't want `$PATH` to be expanded.
 898         return expand_path(outtmpl).replace(sep, '')
 899
 900     @staticmethod
 901     def escape_outtmpl(outtmpl):
 902         ''' Escape any remaining strings like %s, %abc% etc. '''
 903         return re.sub(
 904             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
 905             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
 906             outtmpl)
 907
 908     @classmethod
 909     def validate_outtmpl(cls, outtmpl):
 910         ''' @return None or Exception object '''
 911         outtmpl = re.sub(
 912             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBU]'),
 913             lambda mobj: f'{mobj.group(0)[:-1]}s',
 914             cls._outtmpl_expandpath(outtmpl))
 915         try:
 916             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
 917             return None
 918         except ValueError as err:
 919             return err
 920
 921     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
 922         """ Make the template and info_dict suitable for substitution : ydl.outtmpl_escape(outtmpl) % info_dict """
 923         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
 924
 925         info_dict = dict(info_dict)  # Do not sanitize so as not to consume LazyList
 926         for key in ('__original_infodict', '__postprocessors'):
 927             info_dict.pop(key, None)
 928         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
 929             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
 930             if info_dict.get('duration', None) is not None
 931             else None)
 932         info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
 933         if info_dict.get('resolution') is None:
 934             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
 935
 936         # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
 937         # of %(field)s to %(field)0Nd for backward compatibility
 938         field_size_compat_map = {
 939             'playlist_index': len(str(info_dict.get('_last_playlist_index') or '')),
 940             'playlist_autonumber': len(str(info_dict.get('n_entries') or '')),
 941             'autonumber': self.params.get('autonumber_size') or 5,
 942         }
 943
 944         TMPL_DICT = {}
 945         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBU]'))
 946         MATH_FUNCTIONS = {
 947             '+': float.__add__,
 948             '-': float.__sub__,
 949         }
 950         # Field is of the form key1.key2...
 951         # where keys (except first) can be string, int or slice
 952         FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
 953         MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
 954         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
 955         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
 956             (?P<negate>-)?
 957             (?P<fields>{field})
 958             (?P<maths>(?:{math_op}{math_field})*)
 959             (?:>(?P<strf_format>.+?))?
 960             (?P<alternate>(?<!\\),[^|)]+)?
 961             (?:\|(?P<default>.*?))?
 962             $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
 963
 964         def _traverse_infodict(k):
 965             k = k.split('.')
 966             if k[0] == '':
 967                 k.pop(0)
 968             return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
 969
 970         def get_value(mdict):
 971             # Object traversal
 972             value = _traverse_infodict(mdict['fields'])
 973             # Negative
 974             if mdict['negate']:
 975                 value = float_or_none(value)
 976                 if value is not None:
 977                     value *= -1
 978             # Do maths
 979             offset_key = mdict['maths']
 980             if offset_key:
 981                 value = float_or_none(value)
 982                 operator = None
 983                 while offset_key:
 984                     item = re.match(
 985                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
 986                         offset_key).group(0)
 987                     offset_key = offset_key[len(item):]
 988                     if operator is None:
 989                         operator = MATH_FUNCTIONS[item]
 990                         continue
 991                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
 992                     offset = float_or_none(item)
 993                     if offset is None:
 994                         offset = float_or_none(_traverse_infodict(item))
 995                     try:
 996                         value = operator(value, multiplier * offset)
 997                     except (TypeError, ZeroDivisionError):
 998                         return None
 999                     operator = None
1000             # Datetime formatting
1001             if mdict['strf_format']:
1002                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1003
1004             return value
1005
1006         na = self.params.get('outtmpl_na_placeholder', 'NA')
1007
1008         def _dumpjson_default(obj):
1009             if isinstance(obj, (set, LazyList)):
1010                 return list(obj)
1011             raise TypeError(f'Object of type {type(obj).__name__} is not JSON serializable')
1012
1013         def create_key(outer_mobj):
1014             if not outer_mobj.group('has_key'):
1015                 return f'%{outer_mobj.group(0)}'
1016             key = outer_mobj.group('key')
1017             mobj = re.match(INTERNAL_FORMAT_RE, key)
1018             initial_field = mobj.group('fields').split('.')[-1] if mobj else ''
1019             value, default = None, na
1020             while mobj:
1021                 mobj = mobj.groupdict()
1022                 default = mobj['default'] if mobj['default'] is not None else default
1023                 value = get_value(mobj)
1024                 if value is None and mobj['alternate']:
1025                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
1026                 else:
1027                     break
1028
1029             fmt = outer_mobj.group('format')
1030             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1031                 fmt = '0{:d}d'.format(field_size_compat_map[key])
1032
1033             value = default if value is None else value
1034
1035             str_fmt = f'{fmt[:-1]}s'
1036             if fmt[-1] == 'l':  # list
1037                 delim = '\n' if '#' in (outer_mobj.group('conversion') or '') else ', '
1038                 value, fmt = delim.join(variadic(value)), str_fmt
1039             elif fmt[-1] == 'j':  # json
1040                 value, fmt = json.dumps(value, default=_dumpjson_default), str_fmt
1041             elif fmt[-1] == 'q':  # quoted
1042                 value, fmt = compat_shlex_quote(str(value)), str_fmt
1043             elif fmt[-1] == 'B':  # bytes
1044                 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1045                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1046             elif fmt[-1] == 'U':  # unicode normalized
1047                 opts = outer_mobj.group('conversion') or ''
1048                 value, fmt = unicodedata.normalize(
1049                     # "+" = compatibility equivalence, "#" = NFD
1050                     'NF%s%s' % ('K' if '+' in opts else '', 'D' if '#' in opts else 'C'),
1051                     value), str_fmt
1052             elif fmt[-1] == 'c':
1053                 if value:
1054                     value = str(value)[0]
1055                 else:
1056                     fmt = str_fmt
1057             elif fmt[-1] not in 'rs':  # numeric
1058                 value = float_or_none(value)
1059                 if value is None:
1060                     value, fmt = default, 's'
1061
1062             if sanitize:
1063                 if fmt[-1] == 'r':
1064                     # If value is an object, sanitize might convert it to a string
1065                     # So we convert it to repr first
1066                     value, fmt = repr(value), str_fmt
1067                 if fmt[-1] in 'csr':
1068                     value = sanitize(initial_field, value)
1069
1070             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1071             TMPL_DICT[key] = value
1072             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1073
1074         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1075
1076     def _prepare_filename(self, info_dict, tmpl_type='default'):
1077         try:
1078             sanitize = lambda k, v: sanitize_filename(
1079                 compat_str(v),
1080                 restricted=self.params.get('restrictfilenames'),
1081                 is_id=(k == 'id' or k.endswith('_id')))
1082             outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
1083             outtmpl, template_dict = self.prepare_outtmpl(outtmpl, info_dict, sanitize)
1084             outtmpl = self.escape_outtmpl(self._outtmpl_expandpath(outtmpl))
1085             filename = outtmpl % template_dict
1086
1087             force_ext = OUTTMPL_TYPES.get(tmpl_type)
1088             if filename and force_ext is not None:
1089                 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1090
1091             # https://github.com/blackjack4494/youtube-dlc/issues/85
1092             trim_file_name = self.params.get('trim_file_name', False)
1093             if trim_file_name:
1094                 fn_groups = filename.rsplit('.')
1095                 ext = fn_groups[-1]
1096                 sub_ext = ''
1097                 if len(fn_groups) > 2:
1098                     sub_ext = fn_groups[-2]
1099                 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
1100
1101             return filename
1102         except ValueError as err:
1103             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1104             return None
1105
1106     def prepare_filename(self, info_dict, dir_type='', warn=False):
1107         """Generate the output filename."""
1108
1109         filename = self._prepare_filename(info_dict, dir_type or 'default')
1110         if not filename and dir_type not in ('', 'temp'):
1111             return ''
1112
1113         if warn:
1114             if not self.params.get('paths'):
1115                 pass
1116             elif filename == '-':
1117                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1118             elif os.path.isabs(filename):
1119                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1120         if filename == '-' or not filename:
1121             return filename
1122
1123         return self.get_output_path(dir_type, filename)
1124
1125     def _match_entry(self, info_dict, incomplete=False, silent=False):
1126         """ Returns None if the file should be downloaded """
1127
1128         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1129
1130         def check_filter():
1131             if 'title' in info_dict:
1132                 # This can happen when we're just evaluating the playlist
1133                 title = info_dict['title']
1134                 matchtitle = self.params.get('matchtitle', False)
1135                 if matchtitle:
1136                     if not re.search(matchtitle, title, re.IGNORECASE):
1137                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1138                 rejecttitle = self.params.get('rejecttitle', False)
1139                 if rejecttitle:
1140                     if re.search(rejecttitle, title, re.IGNORECASE):
1141                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1142             date = info_dict.get('upload_date')
1143             if date is not None:
1144                 dateRange = self.params.get('daterange', DateRange())
1145                 if date not in dateRange:
1146                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1147             view_count = info_dict.get('view_count')
1148             if view_count is not None:
1149                 min_views = self.params.get('min_views')
1150                 if min_views is not None and view_count < min_views:
1151                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1152                 max_views = self.params.get('max_views')
1153                 if max_views is not None and view_count > max_views:
1154                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1155             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1156                 return 'Skipping "%s" because it is age restricted' % video_title
1157
1158             match_filter = self.params.get('match_filter')
1159             if match_filter is not None:
1160                 try:
1161                     ret = match_filter(info_dict, incomplete=incomplete)
1162                 except TypeError:
1163                     # For backward compatibility
1164                     ret = None if incomplete else match_filter(info_dict)
1165                 if ret is not None:
1166                     return ret
1167             return None
1168
1169         if self.in_download_archive(info_dict):
1170             reason = '%s has already been recorded in the archive' % video_title
1171             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1172         else:
1173             reason = check_filter()
1174             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1175         if reason is not None:
1176             if not silent:
1177                 self.to_screen('[download] ' + reason)
1178             if self.params.get(break_opt, False):
1179                 raise break_err()
1180         return reason
1181
1182     @staticmethod
1183     def add_extra_info(info_dict, extra_info):
1184         '''Set the keys from extra_info in info dict if they are missing'''
1185         for key, value in extra_info.items():
1186             info_dict.setdefault(key, value)
1187
1188     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1189                      process=True, force_generic_extractor=False):
1190         """
1191         Return a list with a dictionary for each video extracted.
1192
1193         Arguments:
1194         url -- URL to extract
1195
1196         Keyword arguments:
1197         download -- whether to download videos during extraction
1198         ie_key -- extractor key hint
1199         extra_info -- dictionary containing the extra values to add to each result
1200         process -- whether to resolve all unresolved references (URLs, playlist items),
1201             must be True for download to work.
1202         force_generic_extractor -- force using the generic extractor
1203         """
1204
1205         if extra_info is None:
1206             extra_info = {}
1207
1208         if not ie_key and force_generic_extractor:
1209             ie_key = 'Generic'
1210
1211         if ie_key:
1212             ies = {ie_key: self._get_info_extractor_class(ie_key)}
1213         else:
1214             ies = self._ies
1215
1216         for ie_key, ie in ies.items():
1217             if not ie.suitable(url):
1218                 continue
1219
1220             if not ie.working():
1221                 self.report_warning('The program functionality for this site has been marked as broken, '
1222                                     'and will probably not work.')
1223
1224             temp_id = ie.get_temp_id(url)
1225             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1226                 self.to_screen("[%s] %s: has already been recorded in archive" % (
1227                                ie_key, temp_id))
1228                 break
1229             return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1230         else:
1231             self.report_error('no suitable InfoExtractor for URL %s' % url)
1232
1233     def __handle_extraction_exceptions(func):
1234
1235         def wrapper(self, *args, **kwargs):
1236             try:
1237                 return func(self, *args, **kwargs)
1238             except GeoRestrictedError as e:
1239                 msg = e.msg
1240                 if e.countries:
1241                     msg += '\nThis video is available in %s.' % ', '.join(
1242                         map(ISO3166Utils.short2full, e.countries))
1243                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1244                 self.report_error(msg)
1245             except ExtractorError as e:  # An error we somewhat expected
1246                 self.report_error(compat_str(e), e.format_traceback())
1247             except ThrottledDownload:
1248                 self.to_stderr('\r')
1249                 self.report_warning('The download speed is below throttle limit. Re-extracting data')
1250                 return wrapper(self, *args, **kwargs)
1251             except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached, LazyList.IndexError):
1252                 raise
1253             except Exception as e:
1254                 if self.params.get('ignoreerrors'):
1255                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
1256                 else:
1257                     raise
1258         return wrapper
1259
1260     @__handle_extraction_exceptions
1261     def __extract_info(self, url, ie, download, extra_info, process):
1262         ie_result = ie.extract(url)
1263         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1264             return
1265         if isinstance(ie_result, list):
1266             # Backwards compatibility: old IE result format
1267             ie_result = {
1268                 '_type': 'compat_list',
1269                 'entries': ie_result,
1270             }
1271         if extra_info.get('original_url'):
1272             ie_result.setdefault('original_url', extra_info['original_url'])
1273         self.add_default_extra_info(ie_result, ie, url)
1274         if process:
1275             return self.process_ie_result(ie_result, download, extra_info)
1276         else:
1277             return ie_result
1278
1279     def add_default_extra_info(self, ie_result, ie, url):
1280         if url is not None:
1281             self.add_extra_info(ie_result, {
1282                 'webpage_url': url,
1283                 'original_url': url,
1284                 'webpage_url_basename': url_basename(url),
1285             })
1286         if ie is not None:
1287             self.add_extra_info(ie_result, {
1288                 'extractor': ie.IE_NAME,
1289                 'extractor_key': ie.ie_key(),
1290             })
1291
1292     def process_ie_result(self, ie_result, download=True, extra_info=None):
1293         """
1294         Take the result of the ie(may be modified) and resolve all unresolved
1295         references (URLs, playlist items).
1296
1297         It will also download the videos if 'download'.
1298         Returns the resolved ie_result.
1299         """
1300         if extra_info is None:
1301             extra_info = {}
1302         result_type = ie_result.get('_type', 'video')
1303
1304         if result_type in ('url', 'url_transparent'):
1305             ie_result['url'] = sanitize_url(ie_result['url'])
1306             if ie_result.get('original_url'):
1307                 extra_info.setdefault('original_url', ie_result['original_url'])
1308
1309             extract_flat = self.params.get('extract_flat', False)
1310             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1311                     or extract_flat is True):
1312                 info_copy = ie_result.copy()
1313                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1314                 if ie and not ie_result.get('id'):
1315                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1316                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1317                 self.add_extra_info(info_copy, extra_info)
1318                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1319                 if self.params.get('force_write_download_archive', False):
1320                     self.record_download_archive(info_copy)
1321                 return ie_result
1322
1323         if result_type == 'video':
1324             self.add_extra_info(ie_result, extra_info)
1325             ie_result = self.process_video_result(ie_result, download=download)
1326             additional_urls = (ie_result or {}).get('additional_urls')
1327             if additional_urls:
1328                 # TODO: Improve MetadataParserPP to allow setting a list
1329                 if isinstance(additional_urls, compat_str):
1330                     additional_urls = [additional_urls]
1331                 self.to_screen(
1332                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1333                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1334                 ie_result['additional_entries'] = [
1335                     self.extract_info(
1336                         url, download, extra_info,
1337                         force_generic_extractor=self.params.get('force_generic_extractor'))
1338                     for url in additional_urls
1339                 ]
1340             return ie_result
1341         elif result_type == 'url':
1342             # We have to add extra_info to the results because it may be
1343             # contained in a playlist
1344             return self.extract_info(
1345                 ie_result['url'], download,
1346                 ie_key=ie_result.get('ie_key'),
1347                 extra_info=extra_info)
1348         elif result_type == 'url_transparent':
1349             # Use the information from the embedding page
1350             info = self.extract_info(
1351                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1352                 extra_info=extra_info, download=False, process=False)
1353
1354             # extract_info may return None when ignoreerrors is enabled and
1355             # extraction failed with an error, don't crash and return early
1356             # in this case
1357             if not info:
1358                 return info
1359
1360             force_properties = dict(
1361                 (k, v) for k, v in ie_result.items() if v is not None)
1362             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1363                 if f in force_properties:
1364                     del force_properties[f]
1365             new_result = info.copy()
1366             new_result.update(force_properties)
1367
1368             # Extracted info may not be a video result (i.e.
1369             # info.get('_type', 'video') != video) but rather an url or
1370             # url_transparent. In such cases outer metadata (from ie_result)
1371             # should be propagated to inner one (info). For this to happen
1372             # _type of info should be overridden with url_transparent. This
1373             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1374             if new_result.get('_type') == 'url':
1375                 new_result['_type'] = 'url_transparent'
1376
1377             return self.process_ie_result(
1378                 new_result, download=download, extra_info=extra_info)
1379         elif result_type in ('playlist', 'multi_video'):
1380             # Protect from infinite recursion due to recursively nested playlists
1381             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1382             webpage_url = ie_result['webpage_url']
1383             if webpage_url in self._playlist_urls:
1384                 self.to_screen(
1385                     '[download] Skipping already downloaded playlist: %s'
1386                     % ie_result.get('title') or ie_result.get('id'))
1387                 return
1388
1389             self._playlist_level += 1
1390             self._playlist_urls.add(webpage_url)
1391             self._sanitize_thumbnails(ie_result)
1392             try:
1393                 return self.__process_playlist(ie_result, download)
1394             finally:
1395                 self._playlist_level -= 1
1396                 if not self._playlist_level:
1397                     self._playlist_urls.clear()
1398         elif result_type == 'compat_list':
1399             self.report_warning(
1400                 'Extractor %s returned a compat_list result. '
1401                 'It needs to be updated.' % ie_result.get('extractor'))
1402
1403             def _fixup(r):
1404                 self.add_extra_info(r, {
1405                     'extractor': ie_result['extractor'],
1406                     'webpage_url': ie_result['webpage_url'],
1407                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1408                     'extractor_key': ie_result['extractor_key'],
1409                 })
1410                 return r
1411             ie_result['entries'] = [
1412                 self.process_ie_result(_fixup(r), download, extra_info)
1413                 for r in ie_result['entries']
1414             ]
1415             return ie_result
1416         else:
1417             raise Exception('Invalid result type: %s' % result_type)
1418
1419     def _ensure_dir_exists(self, path):
1420         return make_dir(path, self.report_error)
1421
1422     def __process_playlist(self, ie_result, download):
1423         # We process each entry in the playlist
1424         playlist = ie_result.get('title') or ie_result.get('id')
1425         self.to_screen('[download] Downloading playlist: %s' % playlist)
1426
1427         if 'entries' not in ie_result:
1428             raise EntryNotInPlaylist()
1429         incomplete_entries = bool(ie_result.get('requested_entries'))
1430         if incomplete_entries:
1431             def fill_missing_entries(entries, indexes):
1432                 ret = [None] * max(*indexes)
1433                 for i, entry in zip(indexes, entries):
1434                     ret[i - 1] = entry
1435                 return ret
1436             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1437
1438         playlist_results = []
1439
1440         playliststart = self.params.get('playliststart', 1)
1441         playlistend = self.params.get('playlistend')
1442         # For backwards compatibility, interpret -1 as whole list
1443         if playlistend == -1:
1444             playlistend = None
1445
1446         playlistitems_str = self.params.get('playlist_items')
1447         playlistitems = None
1448         if playlistitems_str is not None:
1449             def iter_playlistitems(format):
1450                 for string_segment in format.split(','):
1451                     if '-' in string_segment:
1452                         start, end = string_segment.split('-')
1453                         for item in range(int(start), int(end) + 1):
1454                             yield int(item)
1455                     else:
1456                         yield int(string_segment)
1457             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1458
1459         ie_entries = ie_result['entries']
1460         msg = (
1461             'Downloading %d videos' if not isinstance(ie_entries, list)
1462             else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
1463
1464         if isinstance(ie_entries, list):
1465             def get_entry(i):
1466                 return ie_entries[i - 1]
1467         else:
1468             if not isinstance(ie_entries, PagedList):
1469                 ie_entries = LazyList(ie_entries)
1470
1471             def get_entry(i):
1472                 return YoutubeDL.__handle_extraction_exceptions(
1473                     lambda self, i: ie_entries[i - 1]
1474                 )(self, i)
1475
1476         entries = []
1477         items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1478         for i in items:
1479             if i == 0:
1480                 continue
1481             if playlistitems is None and playlistend is not None and playlistend < i:
1482                 break
1483             entry = None
1484             try:
1485                 entry = get_entry(i)
1486                 if entry is None:
1487                     raise EntryNotInPlaylist()
1488             except (IndexError, EntryNotInPlaylist):
1489                 if incomplete_entries:
1490                     raise EntryNotInPlaylist()
1491                 elif not playlistitems:
1492                     break
1493             entries.append(entry)
1494             try:
1495                 if entry is not None:
1496                     self._match_entry(entry, incomplete=True, silent=True)
1497             except (ExistingVideoReached, RejectedVideoReached):
1498                 break
1499         ie_result['entries'] = entries
1500
1501         # Save playlist_index before re-ordering
1502         entries = [
1503             ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
1504             for i, entry in enumerate(entries, 1)
1505             if entry is not None]
1506         n_entries = len(entries)
1507
1508         if not playlistitems and (playliststart or playlistend):
1509             playlistitems = list(range(playliststart, playliststart + n_entries))
1510         ie_result['requested_entries'] = playlistitems
1511
1512         if self.params.get('allow_playlist_files', True):
1513             ie_copy = {
1514                 'playlist': playlist,
1515                 'playlist_id': ie_result.get('id'),
1516                 'playlist_title': ie_result.get('title'),
1517                 'playlist_uploader': ie_result.get('uploader'),
1518                 'playlist_uploader_id': ie_result.get('uploader_id'),
1519                 'playlist_index': 0,
1520             }
1521             ie_copy.update(dict(ie_result))
1522
1523             if self._write_info_json('playlist', ie_result,
1524                                      self.prepare_filename(ie_copy, 'pl_infojson')) is None:
1525                 return
1526             if self._write_description('playlist', ie_result,
1527                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1528                 return
1529             # TODO: This should be passed to ThumbnailsConvertor if necessary
1530             self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1531
1532         if self.params.get('playlistreverse', False):
1533             entries = entries[::-1]
1534         if self.params.get('playlistrandom', False):
1535             random.shuffle(entries)
1536
1537         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1538
1539         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1540         failures = 0
1541         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1542         for i, entry_tuple in enumerate(entries, 1):
1543             playlist_index, entry = entry_tuple
1544             if 'playlist-index' in self.params.get('compat_opts', []):
1545                 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
1546             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1547             # This __x_forwarded_for_ip thing is a bit ugly but requires
1548             # minimal changes
1549             if x_forwarded_for:
1550                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1551             extra = {
1552                 'n_entries': n_entries,
1553                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1554                 'playlist_index': playlist_index,
1555                 'playlist_autonumber': i,
1556                 'playlist': playlist,
1557                 'playlist_id': ie_result.get('id'),
1558                 'playlist_title': ie_result.get('title'),
1559                 'playlist_uploader': ie_result.get('uploader'),
1560                 'playlist_uploader_id': ie_result.get('uploader_id'),
1561                 'extractor': ie_result['extractor'],
1562                 'webpage_url': ie_result['webpage_url'],
1563                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1564                 'extractor_key': ie_result['extractor_key'],
1565             }
1566
1567             if self._match_entry(entry, incomplete=True) is not None:
1568                 continue
1569
1570             entry_result = self.__process_iterable_entry(entry, download, extra)
1571             if not entry_result:
1572                 failures += 1
1573             if failures >= max_failures:
1574                 self.report_error(
1575                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1576                 break
1577             # TODO: skip failed (empty) entries?
1578             playlist_results.append(entry_result)
1579         ie_result['entries'] = playlist_results
1580         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1581         return ie_result
1582
1583     @__handle_extraction_exceptions
1584     def __process_iterable_entry(self, entry, download, extra_info):
1585         return self.process_ie_result(
1586             entry, download=download, extra_info=extra_info)
1587
1588     def _build_format_filter(self, filter_spec):
1589         " Returns a function to filter the formats according to the filter_spec "
1590
1591         OPERATORS = {
1592             '<': operator.lt,
1593             '<=': operator.le,
1594             '>': operator.gt,
1595             '>=': operator.ge,
1596             '=': operator.eq,
1597             '!=': operator.ne,
1598         }
1599         operator_rex = re.compile(r'''(?x)\s*
1600             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1601             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1602             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1603             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1604         m = operator_rex.fullmatch(filter_spec)
1605         if m:
1606             try:
1607                 comparison_value = int(m.group('value'))
1608             except ValueError:
1609                 comparison_value = parse_filesize(m.group('value'))
1610                 if comparison_value is None:
1611                     comparison_value = parse_filesize(m.group('value') + 'B')
1612                 if comparison_value is None:
1613                     raise ValueError(
1614                         'Invalid value %r in format specification %r' % (
1615                             m.group('value'), filter_spec))
1616             op = OPERATORS[m.group('op')]
1617
1618         if not m:
1619             STR_OPERATORS = {
1620                 '=': operator.eq,
1621                 '^=': lambda attr, value: attr.startswith(value),
1622                 '$=': lambda attr, value: attr.endswith(value),
1623                 '*=': lambda attr, value: value in attr,
1624             }
1625             str_operator_rex = re.compile(r'''(?x)\s*
1626                 (?P<key>[a-zA-Z0-9._-]+)\s*
1627                 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1628                 (?P<value>[a-zA-Z0-9._-]+)\s*
1629                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1630             m = str_operator_rex.fullmatch(filter_spec)
1631             if m:
1632                 comparison_value = m.group('value')
1633                 str_op = STR_OPERATORS[m.group('op')]
1634                 if m.group('negation'):
1635                     op = lambda attr, value: not str_op(attr, value)
1636                 else:
1637                     op = str_op
1638
1639         if not m:
1640             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1641
1642         def _filter(f):
1643             actual_value = f.get(m.group('key'))
1644             if actual_value is None:
1645                 return m.group('none_inclusive')
1646             return op(actual_value, comparison_value)
1647         return _filter
1648
1649     def _default_format_spec(self, info_dict, download=True):
1650
1651         def can_merge():
1652             merger = FFmpegMergerPP(self)
1653             return merger.available and merger.can_merge()
1654
1655         prefer_best = (
1656             not self.params.get('simulate')
1657             and download
1658             and (
1659                 not can_merge()
1660                 or info_dict.get('is_live', False)
1661                 or self.outtmpl_dict['default'] == '-'))
1662         compat = (
1663             prefer_best
1664             or self.params.get('allow_multiple_audio_streams', False)
1665             or 'format-spec' in self.params.get('compat_opts', []))
1666
1667         return (
1668             'best/bestvideo+bestaudio' if prefer_best
1669             else 'bestvideo*+bestaudio/best' if not compat
1670             else 'bestvideo+bestaudio/best')
1671
1672     def build_format_selector(self, format_spec):
1673         def syntax_error(note, start):
1674             message = (
1675                 'Invalid format specification: '
1676                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1677             return SyntaxError(message)
1678
1679         PICKFIRST = 'PICKFIRST'
1680         MERGE = 'MERGE'
1681         SINGLE = 'SINGLE'
1682         GROUP = 'GROUP'
1683         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1684
1685         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1686                                   'video': self.params.get('allow_multiple_video_streams', False)}
1687
1688         check_formats = self.params.get('check_formats')
1689
1690         def _parse_filter(tokens):
1691             filter_parts = []
1692             for type, string, start, _, _ in tokens:
1693                 if type == tokenize.OP and string == ']':
1694                     return ''.join(filter_parts)
1695                 else:
1696                     filter_parts.append(string)
1697
1698         def _remove_unused_ops(tokens):
1699             # Remove operators that we don't use and join them with the surrounding strings
1700             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1701             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1702             last_string, last_start, last_end, last_line = None, None, None, None
1703             for type, string, start, end, line in tokens:
1704                 if type == tokenize.OP and string == '[':
1705                     if last_string:
1706                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1707                         last_string = None
1708                     yield type, string, start, end, line
1709                     # everything inside brackets will be handled by _parse_filter
1710                     for type, string, start, end, line in tokens:
1711                         yield type, string, start, end, line
1712                         if type == tokenize.OP and string == ']':
1713                             break
1714                 elif type == tokenize.OP and string in ALLOWED_OPS:
1715                     if last_string:
1716                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1717                         last_string = None
1718                     yield type, string, start, end, line
1719                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1720                     if not last_string:
1721                         last_string = string
1722                         last_start = start
1723                         last_end = end
1724                     else:
1725                         last_string += string
1726             if last_string:
1727                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1728
1729         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1730             selectors = []
1731             current_selector = None
1732             for type, string, start, _, _ in tokens:
1733                 # ENCODING is only defined in python 3.x
1734                 if type == getattr(tokenize, 'ENCODING', None):
1735                     continue
1736                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1737                     current_selector = FormatSelector(SINGLE, string, [])
1738                 elif type == tokenize.OP:
1739                     if string == ')':
1740                         if not inside_group:
1741                             # ')' will be handled by the parentheses group
1742                             tokens.restore_last_token()
1743                         break
1744                     elif inside_merge and string in ['/', ',']:
1745                         tokens.restore_last_token()
1746                         break
1747                     elif inside_choice and string == ',':
1748                         tokens.restore_last_token()
1749                         break
1750                     elif string == ',':
1751                         if not current_selector:
1752                             raise syntax_error('"," must follow a format selector', start)
1753                         selectors.append(current_selector)
1754                         current_selector = None
1755                     elif string == '/':
1756                         if not current_selector:
1757                             raise syntax_error('"/" must follow a format selector', start)
1758                         first_choice = current_selector
1759                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1760                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1761                     elif string == '[':
1762                         if not current_selector:
1763                             current_selector = FormatSelector(SINGLE, 'best', [])
1764                         format_filter = _parse_filter(tokens)
1765                         current_selector.filters.append(format_filter)
1766                     elif string == '(':
1767                         if current_selector:
1768                             raise syntax_error('Unexpected "("', start)
1769                         group = _parse_format_selection(tokens, inside_group=True)
1770                         current_selector = FormatSelector(GROUP, group, [])
1771                     elif string == '+':
1772                         if not current_selector:
1773                             raise syntax_error('Unexpected "+"', start)
1774                         selector_1 = current_selector
1775                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1776                         if not selector_2:
1777                             raise syntax_error('Expected a selector', start)
1778                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1779                     else:
1780                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1781                 elif type == tokenize.ENDMARKER:
1782                     break
1783             if current_selector:
1784                 selectors.append(current_selector)
1785             return selectors
1786
1787         def _merge(formats_pair):
1788             format_1, format_2 = formats_pair
1789
1790             formats_info = []
1791             formats_info.extend(format_1.get('requested_formats', (format_1,)))
1792             formats_info.extend(format_2.get('requested_formats', (format_2,)))
1793
1794             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1795                 get_no_more = {'video': False, 'audio': False}
1796                 for (i, fmt_info) in enumerate(formats_info):
1797                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
1798                         formats_info.pop(i)
1799                         continue
1800                     for aud_vid in ['audio', 'video']:
1801                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1802                             if get_no_more[aud_vid]:
1803                                 formats_info.pop(i)
1804                                 break
1805                             get_no_more[aud_vid] = True
1806
1807             if len(formats_info) == 1:
1808                 return formats_info[0]
1809
1810             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1811             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1812
1813             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1814             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1815
1816             output_ext = self.params.get('merge_output_format')
1817             if not output_ext:
1818                 if the_only_video:
1819                     output_ext = the_only_video['ext']
1820                 elif the_only_audio and not video_fmts:
1821                     output_ext = the_only_audio['ext']
1822                 else:
1823                     output_ext = 'mkv'
1824
1825             new_dict = {
1826                 'requested_formats': formats_info,
1827                 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1828                 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1829                 'ext': output_ext,
1830             }
1831
1832             if the_only_video:
1833                 new_dict.update({
1834                     'width': the_only_video.get('width'),
1835                     'height': the_only_video.get('height'),
1836                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1837                     'fps': the_only_video.get('fps'),
1838                     'vcodec': the_only_video.get('vcodec'),
1839                     'vbr': the_only_video.get('vbr'),
1840                     'stretched_ratio': the_only_video.get('stretched_ratio'),
1841                 })
1842
1843             if the_only_audio:
1844                 new_dict.update({
1845                     'acodec': the_only_audio.get('acodec'),
1846                     'abr': the_only_audio.get('abr'),
1847                 })
1848
1849             return new_dict
1850
1851         def _check_formats(formats):
1852             if not check_formats:
1853                 yield from formats
1854                 return
1855             for f in formats:
1856                 self.to_screen('[info] Testing format %s' % f['format_id'])
1857                 temp_file = tempfile.NamedTemporaryFile(
1858                     suffix='.tmp', delete=False,
1859                     dir=self.get_output_path('temp') or None)
1860                 temp_file.close()
1861                 try:
1862                     success, _ = self.dl(temp_file.name, f, test=True)
1863                 except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1864                     success = False
1865                 finally:
1866                     if os.path.exists(temp_file.name):
1867                         try:
1868                             os.remove(temp_file.name)
1869                         except OSError:
1870                             self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1871                 if success:
1872                     yield f
1873                 else:
1874                     self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1875
1876         def _build_selector_function(selector):
1877             if isinstance(selector, list):  # ,
1878                 fs = [_build_selector_function(s) for s in selector]
1879
1880                 def selector_function(ctx):
1881                     for f in fs:
1882                         yield from f(ctx)
1883                 return selector_function
1884
1885             elif selector.type == GROUP:  # ()
1886                 selector_function = _build_selector_function(selector.selector)
1887
1888             elif selector.type == PICKFIRST:  # /
1889                 fs = [_build_selector_function(s) for s in selector.selector]
1890
1891                 def selector_function(ctx):
1892                     for f in fs:
1893                         picked_formats = list(f(ctx))
1894                         if picked_formats:
1895                             return picked_formats
1896                     return []
1897
1898             elif selector.type == MERGE:  # +
1899                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1900
1901                 def selector_function(ctx):
1902                     for pair in itertools.product(
1903                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1904                         yield _merge(pair)
1905
1906             elif selector.type == SINGLE:  # atom
1907                 format_spec = selector.selector or 'best'
1908
1909                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
1910                 if format_spec == 'all':
1911                     def selector_function(ctx):
1912                         yield from _check_formats(ctx['formats'])
1913                 elif format_spec == 'mergeall':
1914                     def selector_function(ctx):
1915                         formats = list(_check_formats(ctx['formats']))
1916                         if not formats:
1917                             return
1918                         merged_format = formats[-1]
1919                         for f in formats[-2::-1]:
1920                             merged_format = _merge((merged_format, f))
1921                         yield merged_format
1922
1923                 else:
1924                     format_fallback, format_reverse, format_idx = False, True, 1
1925                     mobj = re.match(
1926                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
1927                         format_spec)
1928                     if mobj is not None:
1929                         format_idx = int_or_none(mobj.group('n'), default=1)
1930                         format_reverse = mobj.group('bw')[0] == 'b'
1931                         format_type = (mobj.group('type') or [None])[0]
1932                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
1933                         format_modified = mobj.group('mod') is not None
1934
1935                         format_fallback = not format_type and not format_modified  # for b, w
1936                         _filter_f = (
1937                             (lambda f: f.get('%scodec' % format_type) != 'none')
1938                             if format_type and format_modified  # bv*, ba*, wv*, wa*
1939                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
1940                             if format_type  # bv, ba, wv, wa
1941                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1942                             if not format_modified  # b, w
1943                             else lambda f: True)  # b*, w*
1944                         filter_f = lambda f: _filter_f(f) and (
1945                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
1946                     else:
1947                         filter_f = ((lambda f: f.get('ext') == format_spec)
1948                                     if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']  # extension
1949                                     else (lambda f: f.get('format_id') == format_spec))  # id
1950
1951                     def selector_function(ctx):
1952                         formats = list(ctx['formats'])
1953                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
1954                         if format_fallback and ctx['incomplete_formats'] and not matches:
1955                             # for extractors with incomplete formats (audio only (soundcloud)
1956                             # or video only (imgur)) best/worst will fallback to
1957                             # best/worst {video,audio}-only format
1958                             matches = formats
1959                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
1960                         try:
1961                             yield matches[format_idx - 1]
1962                         except IndexError:
1963                             return
1964
1965             filters = [self._build_format_filter(f) for f in selector.filters]
1966
1967             def final_selector(ctx):
1968                 ctx_copy = copy.deepcopy(ctx)
1969                 for _filter in filters:
1970                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1971                 return selector_function(ctx_copy)
1972             return final_selector
1973
1974         stream = io.BytesIO(format_spec.encode('utf-8'))
1975         try:
1976             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1977         except tokenize.TokenError:
1978             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1979
1980         class TokenIterator(object):
1981             def __init__(self, tokens):
1982                 self.tokens = tokens
1983                 self.counter = 0
1984
1985             def __iter__(self):
1986                 return self
1987
1988             def __next__(self):
1989                 if self.counter >= len(self.tokens):
1990                     raise StopIteration()
1991                 value = self.tokens[self.counter]
1992                 self.counter += 1
1993                 return value
1994
1995             next = __next__
1996
1997             def restore_last_token(self):
1998                 self.counter -= 1
1999
2000         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2001         return _build_selector_function(parsed_selector)
2002
2003     def _calc_headers(self, info_dict):
2004         res = std_headers.copy()
2005
2006         add_headers = info_dict.get('http_headers')
2007         if add_headers:
2008             res.update(add_headers)
2009
2010         cookies = self._calc_cookies(info_dict)
2011         if cookies:
2012             res['Cookie'] = cookies
2013
2014         if 'X-Forwarded-For' not in res:
2015             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2016             if x_forwarded_for_ip:
2017                 res['X-Forwarded-For'] = x_forwarded_for_ip
2018
2019         return res
2020
2021     def _calc_cookies(self, info_dict):
2022         pr = sanitized_Request(info_dict['url'])
2023         self.cookiejar.add_cookie_header(pr)
2024         return pr.get_header('Cookie')
2025
2026     def _sanitize_thumbnails(self, info_dict):
2027         thumbnails = info_dict.get('thumbnails')
2028         if thumbnails is None:
2029             thumbnail = info_dict.get('thumbnail')
2030             if thumbnail:
2031                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2032         if thumbnails:
2033             thumbnails.sort(key=lambda t: (
2034                 t.get('preference') if t.get('preference') is not None else -1,
2035                 t.get('width') if t.get('width') is not None else -1,
2036                 t.get('height') if t.get('height') is not None else -1,
2037                 t.get('id') if t.get('id') is not None else '',
2038                 t.get('url')))
2039
2040             def thumbnail_tester():
2041                 if self.params.get('check_formats'):
2042                     test_all = True
2043                     to_screen = lambda msg: self.to_screen(f'[info] {msg}')
2044                 else:
2045                     test_all = False
2046                     to_screen = self.write_debug
2047
2048                 def test_thumbnail(t):
2049                     if not test_all and not t.get('_test_url'):
2050                         return True
2051                     to_screen('Testing thumbnail %s' % t['id'])
2052                     try:
2053                         self.urlopen(HEADRequest(t['url']))
2054                     except network_exceptions as err:
2055                         to_screen('Unable to connect to thumbnail %s URL "%s" - %s. Skipping...' % (
2056                             t['id'], t['url'], error_to_compat_str(err)))
2057                         return False
2058                     return True
2059
2060                 return test_thumbnail
2061
2062             for i, t in enumerate(thumbnails):
2063                 if t.get('id') is None:
2064                     t['id'] = '%d' % i
2065                 if t.get('width') and t.get('height'):
2066                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
2067                 t['url'] = sanitize_url(t['url'])
2068
2069             if self.params.get('check_formats') is not False:
2070                 info_dict['thumbnails'] = LazyList(filter(thumbnail_tester(), thumbnails[::-1])).reverse()
2071             else:
2072                 info_dict['thumbnails'] = thumbnails
2073
2074     def process_video_result(self, info_dict, download=True):
2075         assert info_dict.get('_type', 'video') == 'video'
2076
2077         if 'id' not in info_dict:
2078             raise ExtractorError('Missing "id" field in extractor result')
2079         if 'title' not in info_dict:
2080             raise ExtractorError('Missing "title" field in extractor result',
2081                                  video_id=info_dict['id'], ie=info_dict['extractor'])
2082
2083         def report_force_conversion(field, field_not, conversion):
2084             self.report_warning(
2085                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2086                 % (field, field_not, conversion))
2087
2088         def sanitize_string_field(info, string_field):
2089             field = info.get(string_field)
2090             if field is None or isinstance(field, compat_str):
2091                 return
2092             report_force_conversion(string_field, 'a string', 'string')
2093             info[string_field] = compat_str(field)
2094
2095         def sanitize_numeric_fields(info):
2096             for numeric_field in self._NUMERIC_FIELDS:
2097                 field = info.get(numeric_field)
2098                 if field is None or isinstance(field, compat_numeric_types):
2099                     continue
2100                 report_force_conversion(numeric_field, 'numeric', 'int')
2101                 info[numeric_field] = int_or_none(field)
2102
2103         sanitize_string_field(info_dict, 'id')
2104         sanitize_numeric_fields(info_dict)
2105
2106         if 'playlist' not in info_dict:
2107             # It isn't part of a playlist
2108             info_dict['playlist'] = None
2109             info_dict['playlist_index'] = None
2110
2111         self._sanitize_thumbnails(info_dict)
2112
2113         thumbnail = info_dict.get('thumbnail')
2114         thumbnails = info_dict.get('thumbnails')
2115         if thumbnail:
2116             info_dict['thumbnail'] = sanitize_url(thumbnail)
2117         elif thumbnails:
2118             info_dict['thumbnail'] = thumbnails[-1]['url']
2119
2120         if info_dict.get('display_id') is None and 'id' in info_dict:
2121             info_dict['display_id'] = info_dict['id']
2122
2123         for ts_key, date_key in (
2124                 ('timestamp', 'upload_date'),
2125                 ('release_timestamp', 'release_date'),
2126         ):
2127             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2128                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2129                 # see http://bugs.python.org/issue1646728)
2130                 try:
2131                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2132                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2133                 except (ValueError, OverflowError, OSError):
2134                     pass
2135
2136         live_keys = ('is_live', 'was_live')
2137         live_status = info_dict.get('live_status')
2138         if live_status is None:
2139             for key in live_keys:
2140                 if info_dict.get(key) is False:
2141                     continue
2142                 if info_dict.get(key):
2143                     live_status = key
2144                 break
2145             if all(info_dict.get(key) is False for key in live_keys):
2146                 live_status = 'not_live'
2147         if live_status:
2148             info_dict['live_status'] = live_status
2149             for key in live_keys:
2150                 if info_dict.get(key) is None:
2151                     info_dict[key] = (live_status == key)
2152
2153         # Auto generate title fields corresponding to the *_number fields when missing
2154         # in order to always have clean titles. This is very common for TV series.
2155         for field in ('chapter', 'season', 'episode'):
2156             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2157                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2158
2159         for cc_kind in ('subtitles', 'automatic_captions'):
2160             cc = info_dict.get(cc_kind)
2161             if cc:
2162                 for _, subtitle in cc.items():
2163                     for subtitle_format in subtitle:
2164                         if subtitle_format.get('url'):
2165                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2166                         if subtitle_format.get('ext') is None:
2167                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2168
2169         automatic_captions = info_dict.get('automatic_captions')
2170         subtitles = info_dict.get('subtitles')
2171
2172         info_dict['requested_subtitles'] = self.process_subtitles(
2173             info_dict['id'], subtitles, automatic_captions)
2174
2175         # We now pick which formats have to be downloaded
2176         if info_dict.get('formats') is None:
2177             # There's only one format available
2178             formats = [info_dict]
2179         else:
2180             formats = info_dict['formats']
2181
2182         info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
2183         if not self.params.get('allow_unplayable_formats'):
2184             formats = [f for f in formats if not f.get('has_drm')]
2185
2186         if not formats:
2187             self.raise_no_formats(info_dict)
2188
2189         def is_wellformed(f):
2190             url = f.get('url')
2191             if not url:
2192                 self.report_warning(
2193                     '"url" field is missing or empty - skipping format, '
2194                     'there is an error in extractor')
2195                 return False
2196             if isinstance(url, bytes):
2197                 sanitize_string_field(f, 'url')
2198             return True
2199
2200         # Filter out malformed formats for better extraction robustness
2201         formats = list(filter(is_wellformed, formats))
2202
2203         formats_dict = {}
2204
2205         # We check that all the formats have the format and format_id fields
2206         for i, format in enumerate(formats):
2207             sanitize_string_field(format, 'format_id')
2208             sanitize_numeric_fields(format)
2209             format['url'] = sanitize_url(format['url'])
2210             if not format.get('format_id'):
2211                 format['format_id'] = compat_str(i)
2212             else:
2213                 # Sanitize format_id from characters used in format selector expression
2214                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2215             format_id = format['format_id']
2216             if format_id not in formats_dict:
2217                 formats_dict[format_id] = []
2218             formats_dict[format_id].append(format)
2219
2220         # Make sure all formats have unique format_id
2221         for format_id, ambiguous_formats in formats_dict.items():
2222             if len(ambiguous_formats) > 1:
2223                 for i, format in enumerate(ambiguous_formats):
2224                     format['format_id'] = '%s-%d' % (format_id, i)
2225
2226         for i, format in enumerate(formats):
2227             if format.get('format') is None:
2228                 format['format'] = '{id} - {res}{note}'.format(
2229                     id=format['format_id'],
2230                     res=self.format_resolution(format),
2231                     note=format_field(format, 'format_note', ' (%s)'),
2232                 )
2233             # Automatically determine file extension if missing
2234             if format.get('ext') is None:
2235                 format['ext'] = determine_ext(format['url']).lower()
2236             # Automatically determine protocol if missing (useful for format
2237             # selection purposes)
2238             if format.get('protocol') is None:
2239                 format['protocol'] = determine_protocol(format)
2240             # Add HTTP headers, so that external programs can use them from the
2241             # json output
2242             full_format_info = info_dict.copy()
2243             full_format_info.update(format)
2244             format['http_headers'] = self._calc_headers(full_format_info)
2245         # Remove private housekeeping stuff
2246         if '__x_forwarded_for_ip' in info_dict:
2247             del info_dict['__x_forwarded_for_ip']
2248
2249         # TODO Central sorting goes here
2250
2251         if not formats or formats[0] is not info_dict:
2252             # only set the 'formats' fields if the original info_dict list them
2253             # otherwise we end up with a circular reference, the first (and unique)
2254             # element in the 'formats' field in info_dict is info_dict itself,
2255             # which can't be exported to json
2256             info_dict['formats'] = formats
2257
2258         info_dict, _ = self.pre_process(info_dict)
2259
2260         if self.params.get('list_thumbnails'):
2261             self.list_thumbnails(info_dict)
2262         if self.params.get('listformats'):
2263             if not info_dict.get('formats') and not info_dict.get('url'):
2264                 self.to_screen('%s has no formats' % info_dict['id'])
2265             else:
2266                 self.list_formats(info_dict)
2267         if self.params.get('listsubtitles'):
2268             if 'automatic_captions' in info_dict:
2269                 self.list_subtitles(
2270                     info_dict['id'], automatic_captions, 'automatic captions')
2271             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2272         list_only = self.params.get('simulate') is None and (
2273             self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2274         if list_only:
2275             # Without this printing, -F --print-json will not work
2276             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2277             return
2278
2279         format_selector = self.format_selector
2280         if format_selector is None:
2281             req_format = self._default_format_spec(info_dict, download=download)
2282             self.write_debug('Default format spec: %s' % req_format)
2283             format_selector = self.build_format_selector(req_format)
2284
2285         # While in format selection we may need to have an access to the original
2286         # format set in order to calculate some metrics or do some processing.
2287         # For now we need to be able to guess whether original formats provided
2288         # by extractor are incomplete or not (i.e. whether extractor provides only
2289         # video-only or audio-only formats) for proper formats selection for
2290         # extractors with such incomplete formats (see
2291         # https://github.com/ytdl-org/youtube-dl/pull/5556).
2292         # Since formats may be filtered during format selection and may not match
2293         # the original formats the results may be incorrect. Thus original formats
2294         # or pre-calculated metrics should be passed to format selection routines
2295         # as well.
2296         # We will pass a context object containing all necessary additional data
2297         # instead of just formats.
2298         # This fixes incorrect format selection issue (see
2299         # https://github.com/ytdl-org/youtube-dl/issues/10083).
2300         incomplete_formats = (
2301             # All formats are video-only or
2302             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2303             # all formats are audio-only
2304             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2305
2306         ctx = {
2307             'formats': formats,
2308             'incomplete_formats': incomplete_formats,
2309         }
2310
2311         formats_to_download = list(format_selector(ctx))
2312         if not formats_to_download:
2313             if not self.params.get('ignore_no_formats_error'):
2314                 raise ExtractorError('Requested format is not available', expected=True,
2315                                      video_id=info_dict['id'], ie=info_dict['extractor'])
2316             else:
2317                 self.report_warning('Requested format is not available')
2318                 # Process what we can, even without any available formats.
2319                 self.process_info(dict(info_dict))
2320         elif download:
2321             self.to_screen(
2322                 '[info] %s: Downloading %d format(s): %s' % (
2323                     info_dict['id'], len(formats_to_download),
2324                     ", ".join([f['format_id'] for f in formats_to_download])))
2325             for fmt in formats_to_download:
2326                 new_info = dict(info_dict)
2327                 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2328                 new_info['__original_infodict'] = info_dict
2329                 new_info.update(fmt)
2330                 self.process_info(new_info)
2331         # We update the info dict with the best quality format (backwards compatibility)
2332         if formats_to_download:
2333             info_dict.update(formats_to_download[-1])
2334         return info_dict
2335
2336     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2337         """Select the requested subtitles and their format"""
2338         available_subs = {}
2339         if normal_subtitles and self.params.get('writesubtitles'):
2340             available_subs.update(normal_subtitles)
2341         if automatic_captions and self.params.get('writeautomaticsub'):
2342             for lang, cap_info in automatic_captions.items():
2343                 if lang not in available_subs:
2344                     available_subs[lang] = cap_info
2345
2346         if (not self.params.get('writesubtitles') and not
2347                 self.params.get('writeautomaticsub') or not
2348                 available_subs):
2349             return None
2350
2351         all_sub_langs = available_subs.keys()
2352         if self.params.get('allsubtitles', False):
2353             requested_langs = all_sub_langs
2354         elif self.params.get('subtitleslangs', False):
2355             # A list is used so that the order of languages will be the same as
2356             # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2357             requested_langs = []
2358             for lang_re in self.params.get('subtitleslangs'):
2359                 if lang_re == 'all':
2360                     requested_langs.extend(all_sub_langs)
2361                     continue
2362                 discard = lang_re[0] == '-'
2363                 if discard:
2364                     lang_re = lang_re[1:]
2365                 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2366                 if discard:
2367                     for lang in current_langs:
2368                         while lang in requested_langs:
2369                             requested_langs.remove(lang)
2370                 else:
2371                     requested_langs.extend(current_langs)
2372             requested_langs = orderedSet(requested_langs)
2373         elif 'en' in available_subs:
2374             requested_langs = ['en']
2375         else:
2376             requested_langs = [list(all_sub_langs)[0]]
2377         if requested_langs:
2378             self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2379
2380         formats_query = self.params.get('subtitlesformat', 'best')
2381         formats_preference = formats_query.split('/') if formats_query else []
2382         subs = {}
2383         for lang in requested_langs:
2384             formats = available_subs.get(lang)
2385             if formats is None:
2386                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2387                 continue
2388             for ext in formats_preference:
2389                 if ext == 'best':
2390                     f = formats[-1]
2391                     break
2392                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2393                 if matches:
2394                     f = matches[-1]
2395                     break
2396             else:
2397                 f = formats[-1]
2398                 self.report_warning(
2399                     'No subtitle format found matching "%s" for language %s, '
2400                     'using %s' % (formats_query, lang, f['ext']))
2401             subs[lang] = f
2402         return subs
2403
2404     def __forced_printings(self, info_dict, filename, incomplete):
2405         def print_mandatory(field, actual_field=None):
2406             if actual_field is None:
2407                 actual_field = field
2408             if (self.params.get('force%s' % field, False)
2409                     and (not incomplete or info_dict.get(actual_field) is not None)):
2410                 self.to_stdout(info_dict[actual_field])
2411
2412         def print_optional(field):
2413             if (self.params.get('force%s' % field, False)
2414                     and info_dict.get(field) is not None):
2415                 self.to_stdout(info_dict[field])
2416
2417         info_dict = info_dict.copy()
2418         if filename is not None:
2419             info_dict['filename'] = filename
2420         if info_dict.get('requested_formats') is not None:
2421             # For RTMP URLs, also include the playpath
2422             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2423         elif 'url' in info_dict:
2424             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2425
2426         if self.params.get('forceprint') or self.params.get('forcejson'):
2427             self.post_extract(info_dict)
2428         for tmpl in self.params.get('forceprint', []):
2429             if re.match(r'\w+$', tmpl):
2430                 tmpl = '%({})s'.format(tmpl)
2431             tmpl, info_copy = self.prepare_outtmpl(tmpl, info_dict)
2432             self.to_stdout(self.escape_outtmpl(tmpl) % info_copy)
2433
2434         print_mandatory('title')
2435         print_mandatory('id')
2436         print_mandatory('url', 'urls')
2437         print_optional('thumbnail')
2438         print_optional('description')
2439         print_optional('filename')
2440         if self.params.get('forceduration') and info_dict.get('duration') is not None:
2441             self.to_stdout(formatSeconds(info_dict['duration']))
2442         print_mandatory('format')
2443
2444         if self.params.get('forcejson'):
2445             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2446
2447     def dl(self, name, info, subtitle=False, test=False):
2448         if not info.get('url'):
2449             self.raise_no_formats(info, True)
2450
2451         if test:
2452             verbose = self.params.get('verbose')
2453             params = {
2454                 'test': True,
2455                 'quiet': not verbose,
2456                 'verbose': verbose,
2457                 'noprogress': not verbose,
2458                 'nopart': True,
2459                 'skip_unavailable_fragments': False,
2460                 'keep_fragments': False,
2461                 'overwrites': True,
2462                 '_no_ytdl_file': True,
2463             }
2464         else:
2465             params = self.params
2466         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2467         if not test:
2468             for ph in self._progress_hooks:
2469                 fd.add_progress_hook(ph)
2470             urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2471             self.write_debug('Invoking downloader on "%s"' % urls)
2472         new_info = dict(info)
2473         if new_info.get('http_headers') is None:
2474             new_info['http_headers'] = self._calc_headers(new_info)
2475         return fd.download(name, new_info, subtitle)
2476
2477     def process_info(self, info_dict):
2478         """Process a single resolved IE result."""
2479
2480         assert info_dict.get('_type', 'video') == 'video'
2481
2482         max_downloads = self.params.get('max_downloads')
2483         if max_downloads is not None:
2484             if self._num_downloads >= int(max_downloads):
2485                 raise MaxDownloadsReached()
2486
2487         # TODO: backward compatibility, to be removed
2488         info_dict['fulltitle'] = info_dict['title']
2489
2490         if 'format' not in info_dict and 'ext' in info_dict:
2491             info_dict['format'] = info_dict['ext']
2492
2493         if self._match_entry(info_dict) is not None:
2494             return
2495
2496         self.post_extract(info_dict)
2497         self._num_downloads += 1
2498
2499         # info_dict['_filename'] needs to be set for backward compatibility
2500         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2501         temp_filename = self.prepare_filename(info_dict, 'temp')
2502         files_to_move = {}
2503
2504         # Forced printings
2505         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2506
2507         if self.params.get('simulate'):
2508             if self.params.get('force_write_download_archive', False):
2509                 self.record_download_archive(info_dict)
2510             # Do nothing else if in simulate mode
2511             return
2512
2513         if full_filename is None:
2514             return
2515         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2516             return
2517         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2518             return
2519
2520         if self._write_description('video', info_dict,
2521                                    self.prepare_filename(info_dict, 'description')) is None:
2522             return
2523
2524         sub_files = self._write_subtitles(info_dict, temp_filename)
2525         if sub_files is None:
2526             return
2527         files_to_move.update(dict(sub_files))
2528
2529         thumb_files = self._write_thumbnails(
2530             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2531         if thumb_files is None:
2532             return
2533         files_to_move.update(dict(thumb_files))
2534
2535         infofn = self.prepare_filename(info_dict, 'infojson')
2536         _infojson_written = self._write_info_json('video', info_dict, infofn)
2537         if _infojson_written:
2538             info_dict['__infojson_filename'] = infofn
2539         elif _infojson_written is None:
2540             return
2541
2542         # Note: Annotations are deprecated
2543         annofn = None
2544         if self.params.get('writeannotations', False):
2545             annofn = self.prepare_filename(info_dict, 'annotation')
2546         if annofn:
2547             if not self._ensure_dir_exists(encodeFilename(annofn)):
2548                 return
2549             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2550                 self.to_screen('[info] Video annotations are already present')
2551             elif not info_dict.get('annotations'):
2552                 self.report_warning('There are no annotations to write.')
2553             else:
2554                 try:
2555                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2556                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2557                         annofile.write(info_dict['annotations'])
2558                 except (KeyError, TypeError):
2559                     self.report_warning('There are no annotations to write.')
2560                 except (OSError, IOError):
2561                     self.report_error('Cannot write annotations file: ' + annofn)
2562                     return
2563
2564         # Write internet shortcut files
2565         url_link = webloc_link = desktop_link = False
2566         if self.params.get('writelink', False):
2567             if sys.platform == "darwin":  # macOS.
2568                 webloc_link = True
2569             elif sys.platform.startswith("linux"):
2570                 desktop_link = True
2571             else:  # if sys.platform in ['win32', 'cygwin']:
2572                 url_link = True
2573         if self.params.get('writeurllink', False):
2574             url_link = True
2575         if self.params.get('writewebloclink', False):
2576             webloc_link = True
2577         if self.params.get('writedesktoplink', False):
2578             desktop_link = True
2579
2580         if url_link or webloc_link or desktop_link:
2581             if 'webpage_url' not in info_dict:
2582                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2583                 return
2584             ascii_url = iri_to_uri(info_dict['webpage_url'])
2585
2586         def _write_link_file(extension, template, newline, embed_filename):
2587             linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
2588             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2589                 self.to_screen('[info] Internet shortcut is already present')
2590             else:
2591                 try:
2592                     self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2593                     with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2594                         template_vars = {'url': ascii_url}
2595                         if embed_filename:
2596                             template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2597                         linkfile.write(template % template_vars)
2598                 except (OSError, IOError):
2599                     self.report_error('Cannot write internet shortcut ' + linkfn)
2600                     return False
2601             return True
2602
2603         if url_link:
2604             if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2605                 return
2606         if webloc_link:
2607             if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2608                 return
2609         if desktop_link:
2610             if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2611                 return
2612
2613         try:
2614             info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2615         except PostProcessingError as err:
2616             self.report_error('Preprocessing: %s' % str(err))
2617             return
2618
2619         must_record_download_archive = False
2620         if self.params.get('skip_download', False):
2621             info_dict['filepath'] = temp_filename
2622             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2623             info_dict['__files_to_move'] = files_to_move
2624             info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2625         else:
2626             # Download
2627             info_dict.setdefault('__postprocessors', [])
2628             try:
2629
2630                 def existing_file(*filepaths):
2631                     ext = info_dict.get('ext')
2632                     final_ext = self.params.get('final_ext', ext)
2633                     existing_files = []
2634                     for file in orderedSet(filepaths):
2635                         if final_ext != ext:
2636                             converted = replace_extension(file, final_ext, ext)
2637                             if os.path.exists(encodeFilename(converted)):
2638                                 existing_files.append(converted)
2639                         if os.path.exists(encodeFilename(file)):
2640                             existing_files.append(file)
2641
2642                     if not existing_files or self.params.get('overwrites', False):
2643                         for file in orderedSet(existing_files):
2644                             self.report_file_delete(file)
2645                             os.remove(encodeFilename(file))
2646                         return None
2647
2648                     info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2649                     return existing_files[0]
2650
2651                 success = True
2652                 if info_dict.get('requested_formats') is not None:
2653
2654                     def compatible_formats(formats):
2655                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2656                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2657                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2658                         if len(video_formats) > 2 or len(audio_formats) > 2:
2659                             return False
2660
2661                         # Check extension
2662                         exts = set(format.get('ext') for format in formats)
2663                         COMPATIBLE_EXTS = (
2664                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2665                             set(('webm',)),
2666                         )
2667                         for ext_sets in COMPATIBLE_EXTS:
2668                             if ext_sets.issuperset(exts):
2669                                 return True
2670                         # TODO: Check acodec/vcodec
2671                         return False
2672
2673                     requested_formats = info_dict['requested_formats']
2674                     old_ext = info_dict['ext']
2675                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
2676                         info_dict['ext'] = 'mkv'
2677                         self.report_warning(
2678                             'Requested formats are incompatible for merge and will be merged into mkv.')
2679                     new_ext = info_dict['ext']
2680
2681                     def correct_ext(filename, ext=new_ext):
2682                         if filename == '-':
2683                             return filename
2684                         filename_real_ext = os.path.splitext(filename)[1][1:]
2685                         filename_wo_ext = (
2686                             os.path.splitext(filename)[0]
2687                             if filename_real_ext in (old_ext, new_ext)
2688                             else filename)
2689                         return '%s.%s' % (filename_wo_ext, ext)
2690
2691                     # Ensure filename always has a correct extension for successful merge
2692                     full_filename = correct_ext(full_filename)
2693                     temp_filename = correct_ext(temp_filename)
2694                     dl_filename = existing_file(full_filename, temp_filename)
2695                     info_dict['__real_download'] = False
2696
2697                     _protocols = set(determine_protocol(f) for f in requested_formats)
2698                     if len(_protocols) == 1:  # All requested formats have same protocol
2699                         info_dict['protocol'] = _protocols.pop()
2700                     directly_mergable = FFmpegFD.can_merge_formats(info_dict, self.params)
2701                     if dl_filename is not None:
2702                         self.report_file_already_downloaded(dl_filename)
2703                     elif (directly_mergable and get_suitable_downloader(
2704                             info_dict, self.params, to_stdout=(temp_filename == '-')) == FFmpegFD):
2705                         info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
2706                         success, real_download = self.dl(temp_filename, info_dict)
2707                         info_dict['__real_download'] = real_download
2708                     else:
2709                         downloaded = []
2710                         merger = FFmpegMergerPP(self)
2711                         if self.params.get('allow_unplayable_formats'):
2712                             self.report_warning(
2713                                 'You have requested merging of multiple formats '
2714                                 'while also allowing unplayable formats to be downloaded. '
2715                                 'The formats won\'t be merged to prevent data corruption.')
2716                         elif not merger.available:
2717                             self.report_warning(
2718                                 'You have requested merging of multiple formats but ffmpeg is not installed. '
2719                                 'The formats won\'t be merged.')
2720
2721                         if temp_filename == '-':
2722                             reason = ('using a downloader other than ffmpeg' if directly_mergable
2723                                       else 'but the formats are incompatible for simultaneous download' if merger.available
2724                                       else 'but ffmpeg is not installed')
2725                             self.report_warning(
2726                                 f'You have requested downloading multiple formats to stdout {reason}. '
2727                                 'The formats will be streamed one after the other')
2728                             fname = temp_filename
2729                         for f in requested_formats:
2730                             new_info = dict(info_dict)
2731                             del new_info['requested_formats']
2732                             new_info.update(f)
2733                             if temp_filename != '-':
2734                                 fname = prepend_extension(
2735                                     correct_ext(temp_filename, new_info['ext']),
2736                                     'f%s' % f['format_id'], new_info['ext'])
2737                                 if not self._ensure_dir_exists(fname):
2738                                     return
2739                                 f['filepath'] = fname
2740                                 downloaded.append(fname)
2741                             partial_success, real_download = self.dl(fname, new_info)
2742                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
2743                             success = success and partial_success
2744                         if merger.available and not self.params.get('allow_unplayable_formats'):
2745                             info_dict['__postprocessors'].append(merger)
2746                             info_dict['__files_to_merge'] = downloaded
2747                             # Even if there were no downloads, it is being merged only now
2748                             info_dict['__real_download'] = True
2749                         else:
2750                             for file in downloaded:
2751                                 files_to_move[file] = None
2752                 else:
2753                     # Just a single file
2754                     dl_filename = existing_file(full_filename, temp_filename)
2755                     if dl_filename is None or dl_filename == temp_filename:
2756                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
2757                         # So we should try to resume the download
2758                         success, real_download = self.dl(temp_filename, info_dict)
2759                         info_dict['__real_download'] = real_download
2760                     else:
2761                         self.report_file_already_downloaded(dl_filename)
2762
2763                 dl_filename = dl_filename or temp_filename
2764                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2765
2766             except network_exceptions as err:
2767                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2768                 return
2769             except (OSError, IOError) as err:
2770                 raise UnavailableVideoError(err)
2771             except (ContentTooShortError, ) as err:
2772                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2773                 return
2774
2775             if success and full_filename != '-':
2776
2777                 def fixup():
2778                     do_fixup = True
2779                     fixup_policy = self.params.get('fixup')
2780                     vid = info_dict['id']
2781
2782                     if fixup_policy in ('ignore', 'never'):
2783                         return
2784                     elif fixup_policy == 'warn':
2785                         do_fixup = False
2786                     elif fixup_policy != 'force':
2787                         assert fixup_policy in ('detect_or_warn', None)
2788                         if not info_dict.get('__real_download'):
2789                             do_fixup = False
2790
2791                     def ffmpeg_fixup(cndn, msg, cls):
2792                         if not cndn:
2793                             return
2794                         if not do_fixup:
2795                             self.report_warning(f'{vid}: {msg}')
2796                             return
2797                         pp = cls(self)
2798                         if pp.available:
2799                             info_dict['__postprocessors'].append(pp)
2800                         else:
2801                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
2802
2803                     stretched_ratio = info_dict.get('stretched_ratio')
2804                     ffmpeg_fixup(
2805                         stretched_ratio not in (1, None),
2806                         f'Non-uniform pixel ratio {stretched_ratio}',
2807                         FFmpegFixupStretchedPP)
2808
2809                     ffmpeg_fixup(
2810                         (info_dict.get('requested_formats') is None
2811                          and info_dict.get('container') == 'm4a_dash'
2812                          and info_dict.get('ext') == 'm4a'),
2813                         'writing DASH m4a. Only some players support this container',
2814                         FFmpegFixupM4aPP)
2815
2816                     downloader = (get_suitable_downloader(info_dict, self.params).__name__
2817                                   if 'protocol' in info_dict else None)
2818                     ffmpeg_fixup(downloader == 'HlsFD', 'malformed AAC bitstream detected', FFmpegFixupM3u8PP)
2819                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP)
2820                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed duration detected', FFmpegFixupDurationPP)
2821
2822                 fixup()
2823                 try:
2824                     info_dict = self.post_process(dl_filename, info_dict, files_to_move)
2825                 except PostProcessingError as err:
2826                     self.report_error('Postprocessing: %s' % str(err))
2827                     return
2828                 try:
2829                     for ph in self._post_hooks:
2830                         ph(info_dict['filepath'])
2831                 except Exception as err:
2832                     self.report_error('post hooks: %s' % str(err))
2833                     return
2834                 must_record_download_archive = True
2835
2836         if must_record_download_archive or self.params.get('force_write_download_archive', False):
2837             self.record_download_archive(info_dict)
2838         max_downloads = self.params.get('max_downloads')
2839         if max_downloads is not None and self._num_downloads >= int(max_downloads):
2840             raise MaxDownloadsReached()
2841
2842     def download(self, url_list):
2843         """Download a given list of URLs."""
2844         outtmpl = self.outtmpl_dict['default']
2845         if (len(url_list) > 1
2846                 and outtmpl != '-'
2847                 and '%' not in outtmpl
2848                 and self.params.get('max_downloads') != 1):
2849             raise SameFileError(outtmpl)
2850
2851         for url in url_list:
2852             try:
2853                 # It also downloads the videos
2854                 res = self.extract_info(
2855                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2856             except UnavailableVideoError:
2857                 self.report_error('unable to download video')
2858             except MaxDownloadsReached:
2859                 self.to_screen('[info] Maximum number of downloads reached')
2860                 raise
2861             except ExistingVideoReached:
2862                 self.to_screen('[info] Encountered a video that is already in the archive, stopping due to --break-on-existing')
2863                 raise
2864             except RejectedVideoReached:
2865                 self.to_screen('[info] Encountered a video that did not match filter, stopping due to --break-on-reject')
2866                 raise
2867             else:
2868                 if self.params.get('dump_single_json', False):
2869                     self.post_extract(res)
2870                     self.to_stdout(json.dumps(self.sanitize_info(res)))
2871
2872         return self._download_retcode
2873
2874     def download_with_info_file(self, info_filename):
2875         with contextlib.closing(fileinput.FileInput(
2876                 [info_filename], mode='r',
2877                 openhook=fileinput.hook_encoded('utf-8'))) as f:
2878             # FileInput doesn't have a read method, we can't call json.load
2879             info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
2880         try:
2881             self.process_ie_result(info, download=True)
2882         except (DownloadError, EntryNotInPlaylist, ThrottledDownload):
2883             webpage_url = info.get('webpage_url')
2884             if webpage_url is not None:
2885                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2886                 return self.download([webpage_url])
2887             else:
2888                 raise
2889         return self._download_retcode
2890
2891     @staticmethod
2892     def sanitize_info(info_dict, remove_private_keys=False):
2893         ''' Sanitize the infodict for converting to json '''
2894         if info_dict is None:
2895             return info_dict
2896         info_dict.setdefault('epoch', int(time.time()))
2897         remove_keys = {'__original_infodict'}  # Always remove this since this may contain a copy of the entire dict
2898         keep_keys = ['_type'],  # Always keep this to facilitate load-info-json
2899         if remove_private_keys:
2900             remove_keys |= {
2901                 'requested_formats', 'requested_subtitles', 'requested_entries',
2902                 'filepath', 'entries', 'original_url', 'playlist_autonumber',
2903             }
2904             empty_values = (None, {}, [], set(), tuple())
2905             reject = lambda k, v: k not in keep_keys and (
2906                 k.startswith('_') or k in remove_keys or v in empty_values)
2907         else:
2908             reject = lambda k, v: k in remove_keys
2909         filter_fn = lambda obj: (
2910             list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))
2911             else obj if not isinstance(obj, dict)
2912             else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
2913         return filter_fn(info_dict)
2914
2915     @staticmethod
2916     def filter_requested_info(info_dict, actually_filter=True):
2917         ''' Alias of sanitize_info for backward compatibility '''
2918         return YoutubeDL.sanitize_info(info_dict, actually_filter)
2919
2920     def run_pp(self, pp, infodict):
2921         files_to_delete = []
2922         if '__files_to_move' not in infodict:
2923             infodict['__files_to_move'] = {}
2924         try:
2925             files_to_delete, infodict = pp.run(infodict)
2926         except PostProcessingError as e:
2927             # Must be True and not 'only_download'
2928             if self.params.get('ignoreerrors') is True:
2929                 self.report_error(e)
2930                 return infodict
2931             raise
2932
2933         if not files_to_delete:
2934             return infodict
2935         if self.params.get('keepvideo', False):
2936             for f in files_to_delete:
2937                 infodict['__files_to_move'].setdefault(f, '')
2938         else:
2939             for old_filename in set(files_to_delete):
2940                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2941                 try:
2942                     os.remove(encodeFilename(old_filename))
2943                 except (IOError, OSError):
2944                     self.report_warning('Unable to remove downloaded original file')
2945                 if old_filename in infodict['__files_to_move']:
2946                     del infodict['__files_to_move'][old_filename]
2947         return infodict
2948
2949     @staticmethod
2950     def post_extract(info_dict):
2951         def actual_post_extract(info_dict):
2952             if info_dict.get('_type') in ('playlist', 'multi_video'):
2953                 for video_dict in info_dict.get('entries', {}):
2954                     actual_post_extract(video_dict or {})
2955                 return
2956
2957             post_extractor = info_dict.get('__post_extractor') or (lambda: {})
2958             extra = post_extractor().items()
2959             info_dict.update(extra)
2960             info_dict.pop('__post_extractor', None)
2961
2962             original_infodict = info_dict.get('__original_infodict') or {}
2963             original_infodict.update(extra)
2964             original_infodict.pop('__post_extractor', None)
2965
2966         actual_post_extract(info_dict or {})
2967
2968     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
2969         info = dict(ie_info)
2970         info['__files_to_move'] = files_to_move or {}
2971         for pp in self._pps[key]:
2972             info = self.run_pp(pp, info)
2973         return info, info.pop('__files_to_move', None)
2974
2975     def post_process(self, filename, ie_info, files_to_move=None):
2976         """Run all the postprocessors on the given file."""
2977         info = dict(ie_info)
2978         info['filepath'] = filename
2979         info['__files_to_move'] = files_to_move or {}
2980
2981         for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
2982             info = self.run_pp(pp, info)
2983         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
2984         del info['__files_to_move']
2985         for pp in self._pps['after_move']:
2986             info = self.run_pp(pp, info)
2987         return info
2988
2989     def _make_archive_id(self, info_dict):
2990         video_id = info_dict.get('id')
2991         if not video_id:
2992             return
2993         # Future-proof against any change in case
2994         # and backwards compatibility with prior versions
2995         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
2996         if extractor is None:
2997             url = str_or_none(info_dict.get('url'))
2998             if not url:
2999                 return
3000             # Try to find matching extractor for the URL and take its ie_key
3001             for ie_key, ie in self._ies.items():
3002                 if ie.suitable(url):
3003                     extractor = ie_key
3004                     break
3005             else:
3006                 return
3007         return '%s %s' % (extractor.lower(), video_id)
3008
3009     def in_download_archive(self, info_dict):
3010         fn = self.params.get('download_archive')
3011         if fn is None:
3012             return False
3013
3014         vid_id = self._make_archive_id(info_dict)
3015         if not vid_id:
3016             return False  # Incomplete video information
3017
3018         return vid_id in self.archive
3019
3020     def record_download_archive(self, info_dict):
3021         fn = self.params.get('download_archive')
3022         if fn is None:
3023             return
3024         vid_id = self._make_archive_id(info_dict)
3025         assert vid_id
3026         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3027             archive_file.write(vid_id + '\n')
3028         self.archive.add(vid_id)
3029
3030     @staticmethod
3031     def format_resolution(format, default='unknown'):
3032         if format.get('vcodec') == 'none':
3033             if format.get('acodec') == 'none':
3034                 return 'images'
3035             return 'audio only'
3036         if format.get('resolution') is not None:
3037             return format['resolution']
3038         if format.get('width') and format.get('height'):
3039             res = '%dx%d' % (format['width'], format['height'])
3040         elif format.get('height'):
3041             res = '%sp' % format['height']
3042         elif format.get('width'):
3043             res = '%dx?' % format['width']
3044         else:
3045             res = default
3046         return res
3047
3048     def _format_note(self, fdict):
3049         res = ''
3050         if fdict.get('ext') in ['f4f', 'f4m']:
3051             res += '(unsupported) '
3052         if fdict.get('language'):
3053             if res:
3054                 res += ' '
3055             res += '[%s] ' % fdict['language']
3056         if fdict.get('format_note') is not None:
3057             res += fdict['format_note'] + ' '
3058         if fdict.get('tbr') is not None:
3059             res += '%4dk ' % fdict['tbr']
3060         if fdict.get('container') is not None:
3061             if res:
3062                 res += ', '
3063             res += '%s container' % fdict['container']
3064         if (fdict.get('vcodec') is not None
3065                 and fdict.get('vcodec') != 'none'):
3066             if res:
3067                 res += ', '
3068             res += fdict['vcodec']
3069             if fdict.get('vbr') is not None:
3070                 res += '@'
3071         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3072             res += 'video@'
3073         if fdict.get('vbr') is not None:
3074             res += '%4dk' % fdict['vbr']
3075         if fdict.get('fps') is not None:
3076             if res:
3077                 res += ', '
3078             res += '%sfps' % fdict['fps']
3079         if fdict.get('acodec') is not None:
3080             if res:
3081                 res += ', '
3082             if fdict['acodec'] == 'none':
3083                 res += 'video only'
3084             else:
3085                 res += '%-5s' % fdict['acodec']
3086         elif fdict.get('abr') is not None:
3087             if res:
3088                 res += ', '
3089             res += 'audio'
3090         if fdict.get('abr') is not None:
3091             res += '@%3dk' % fdict['abr']
3092         if fdict.get('asr') is not None:
3093             res += ' (%5dHz)' % fdict['asr']
3094         if fdict.get('filesize') is not None:
3095             if res:
3096                 res += ', '
3097             res += format_bytes(fdict['filesize'])
3098         elif fdict.get('filesize_approx') is not None:
3099             if res:
3100                 res += ', '
3101             res += '~' + format_bytes(fdict['filesize_approx'])
3102         return res
3103
3104     def list_formats(self, info_dict):
3105         formats = info_dict.get('formats', [info_dict])
3106         new_format = (
3107             'list-formats' not in self.params.get('compat_opts', [])
3108             and self.params.get('listformats_table', True) is not False)
3109         if new_format:
3110             table = [
3111                 [
3112                     format_field(f, 'format_id'),
3113                     format_field(f, 'ext'),
3114                     self.format_resolution(f),
3115                     format_field(f, 'fps', '%d'),
3116                     '|',
3117                     format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
3118                     format_field(f, 'tbr', '%4dk'),
3119                     shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
3120                     '|',
3121                     format_field(f, 'vcodec', default='unknown').replace('none', ''),
3122                     format_field(f, 'vbr', '%4dk'),
3123                     format_field(f, 'acodec', default='unknown').replace('none', ''),
3124                     format_field(f, 'abr', '%3dk'),
3125                     format_field(f, 'asr', '%5dHz'),
3126                     ', '.join(filter(None, (
3127                         'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
3128                         format_field(f, 'language', '[%s]'),
3129                         format_field(f, 'format_note'),
3130                         format_field(f, 'container', ignore=(None, f.get('ext'))),
3131                     ))),
3132                 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3133             header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', '  TBR', 'PROTO',
3134                            '|', 'VCODEC', '  VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO']
3135         else:
3136             table = [
3137                 [
3138                     format_field(f, 'format_id'),
3139                     format_field(f, 'ext'),
3140                     self.format_resolution(f),
3141                     self._format_note(f)]
3142                 for f in formats
3143                 if f.get('preference') is None or f['preference'] >= -1000]
3144             header_line = ['format code', 'extension', 'resolution', 'note']
3145
3146         self.to_screen(
3147             '[info] Available formats for %s:' % info_dict['id'])
3148         self.to_stdout(render_table(
3149             header_line, table, delim=new_format, extraGap=(0 if new_format else 1), hideEmpty=new_format))
3150
3151     def list_thumbnails(self, info_dict):
3152         thumbnails = list(info_dict.get('thumbnails'))
3153         if not thumbnails:
3154             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3155             return
3156
3157         self.to_screen(
3158             '[info] Thumbnails for %s:' % info_dict['id'])
3159         self.to_stdout(render_table(
3160             ['ID', 'width', 'height', 'URL'],
3161             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
3162
3163     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3164         if not subtitles:
3165             self.to_screen('%s has no %s' % (video_id, name))
3166             return
3167         self.to_screen(
3168             'Available %s for %s:' % (name, video_id))
3169
3170         def _row(lang, formats):
3171             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3172             if len(set(names)) == 1:
3173                 names = [] if names[0] == 'unknown' else names[:1]
3174             return [lang, ', '.join(names), ', '.join(exts)]
3175
3176         self.to_stdout(render_table(
3177             ['Language', 'Name', 'Formats'],
3178             [_row(lang, formats) for lang, formats in subtitles.items()],
3179             hideEmpty=True))
3180
3181     def urlopen(self, req):
3182         """ Start an HTTP download """
3183         if isinstance(req, compat_basestring):
3184             req = sanitized_Request(req)
3185         return self._opener.open(req, timeout=self._socket_timeout)
3186
3187     def print_debug_header(self):
3188         if not self.params.get('verbose'):
3189             return
3190
3191         stdout_encoding = getattr(
3192             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
3193         encoding_str = (
3194             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
3195                 locale.getpreferredencoding(),
3196                 sys.getfilesystemencoding(),
3197                 stdout_encoding,
3198                 self.get_encoding()))
3199         write_string(encoding_str, encoding=None)
3200
3201         source = detect_variant()
3202         self._write_string('[debug] yt-dlp version %s%s\n' % (__version__, '' if source == 'unknown' else f' ({source})'))
3203         if _LAZY_LOADER:
3204             self._write_string('[debug] Lazy loading extractors enabled\n')
3205         if plugin_extractors or plugin_postprocessors:
3206             self._write_string('[debug] Plugins: %s\n' % [
3207                 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3208                 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3209         if self.params.get('compat_opts'):
3210             self._write_string(
3211                 '[debug] Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts')))
3212         try:
3213             sp = subprocess.Popen(
3214                 ['git', 'rev-parse', '--short', 'HEAD'],
3215                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3216                 cwd=os.path.dirname(os.path.abspath(__file__)))
3217             out, err = process_communicate_or_kill(sp)
3218             out = out.decode().strip()
3219             if re.match('[0-9a-f]+', out):
3220                 self._write_string('[debug] Git HEAD: %s\n' % out)
3221         except Exception:
3222             try:
3223                 sys.exc_clear()
3224             except Exception:
3225                 pass
3226
3227         def python_implementation():
3228             impl_name = platform.python_implementation()
3229             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3230                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3231             return impl_name
3232
3233         self._write_string('[debug] Python version %s (%s %s) - %s\n' % (
3234             platform.python_version(),
3235             python_implementation(),
3236             platform.architecture()[0],
3237             platform_name()))
3238
3239         exe_versions = FFmpegPostProcessor.get_versions(self)
3240         exe_versions['rtmpdump'] = rtmpdump_version()
3241         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3242         exe_str = ', '.join(
3243             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3244         ) or 'none'
3245         self._write_string('[debug] exe versions: %s\n' % exe_str)
3246
3247         from .downloader.websocket import has_websockets
3248         from .postprocessor.embedthumbnail import has_mutagen
3249         from .cookies import SQLITE_AVAILABLE, KEYRING_AVAILABLE
3250
3251         lib_str = ', '.join(sorted(filter(None, (
3252             compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
3253             has_websockets and 'websockets',
3254             has_mutagen and 'mutagen',
3255             SQLITE_AVAILABLE and 'sqlite',
3256             KEYRING_AVAILABLE and 'keyring',
3257         )))) or 'none'
3258         self._write_string('[debug] Optional libraries: %s\n' % lib_str)
3259
3260         proxy_map = {}
3261         for handler in self._opener.handlers:
3262             if hasattr(handler, 'proxies'):
3263                 proxy_map.update(handler.proxies)
3264         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
3265
3266         if self.params.get('call_home', False):
3267             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3268             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
3269             return
3270             latest_version = self.urlopen(
3271                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3272             if version_tuple(latest_version) > version_tuple(__version__):
3273                 self.report_warning(
3274                     'You are using an outdated version (newest version: %s)! '
3275                     'See https://yt-dl.org/update if you need help updating.' %
3276                     latest_version)
3277
3278     def _setup_opener(self):
3279         timeout_val = self.params.get('socket_timeout')
3280         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
3281
3282         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3283         opts_cookiefile = self.params.get('cookiefile')
3284         opts_proxy = self.params.get('proxy')
3285
3286         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3287
3288         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3289         if opts_proxy is not None:
3290             if opts_proxy == '':
3291                 proxies = {}
3292             else:
3293                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3294         else:
3295             proxies = compat_urllib_request.getproxies()
3296             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3297             if 'http' in proxies and 'https' not in proxies:
3298                 proxies['https'] = proxies['http']
3299         proxy_handler = PerRequestProxyHandler(proxies)
3300
3301         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3302         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3303         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3304         redirect_handler = YoutubeDLRedirectHandler()
3305         data_handler = compat_urllib_request_DataHandler()
3306
3307         # When passing our own FileHandler instance, build_opener won't add the
3308         # default FileHandler and allows us to disable the file protocol, which
3309         # can be used for malicious purposes (see
3310         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3311         file_handler = compat_urllib_request.FileHandler()
3312
3313         def file_open(*args, **kwargs):
3314             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3315         file_handler.file_open = file_open
3316
3317         opener = compat_urllib_request.build_opener(
3318             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3319
3320         # Delete the default user-agent header, which would otherwise apply in
3321         # cases where our custom HTTP handler doesn't come into play
3322         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3323         opener.addheaders = []
3324         self._opener = opener
3325
3326     def encode(self, s):
3327         if isinstance(s, bytes):
3328             return s  # Already encoded
3329
3330         try:
3331             return s.encode(self.get_encoding())
3332         except UnicodeEncodeError as err:
3333             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3334             raise
3335
3336     def get_encoding(self):
3337         encoding = self.params.get('encoding')
3338         if encoding is None:
3339             encoding = preferredencoding()
3340         return encoding
3341
3342     def _write_info_json(self, label, ie_result, infofn):
3343         ''' Write infojson and returns True = written, False = skip, None = error '''
3344         if not self.params.get('writeinfojson'):
3345             return False
3346         elif not infofn:
3347             self.write_debug(f'Skipping writing {label} infojson')
3348             return False
3349         elif not self._ensure_dir_exists(infofn):
3350             return None
3351         elif not self.params.get('overwrites', True) and os.path.exists(infofn):
3352             self.to_screen(f'[info] {label.title()} metadata is already present')
3353         else:
3354             self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3355             try:
3356                 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3357             except (OSError, IOError):
3358                 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3359                 return None
3360         return True
3361
3362     def _write_description(self, label, ie_result, descfn):
3363         ''' Write description and returns True = written, False = skip, None = error '''
3364         if not self.params.get('writedescription'):
3365             return False
3366         elif not descfn:
3367             self.write_debug(f'Skipping writing {label} description')
3368             return False
3369         elif not self._ensure_dir_exists(descfn):
3370             return None
3371         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3372             self.to_screen(f'[info] {label.title()} description is already present')
3373         elif ie_result.get('description') is None:
3374             self.report_warning(f'There\'s no {label} description to write')
3375             return False
3376         else:
3377             try:
3378                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3379                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3380                     descfile.write(ie_result['description'])
3381             except (OSError, IOError):
3382                 self.report_error(f'Cannot write {label} description file {descfn}')
3383                 return None
3384         return True
3385
3386     def _write_subtitles(self, info_dict, filename):
3387         ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3388         ret = []
3389         subtitles = info_dict.get('requested_subtitles')
3390         if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3391             # subtitles download errors are already managed as troubles in relevant IE
3392             # that way it will silently go on when used with unsupporting IE
3393             return ret
3394
3395         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3396         if not sub_filename_base:
3397             self.to_screen('[info] Skipping writing video subtitles')
3398             return ret
3399         for sub_lang, sub_info in subtitles.items():
3400             sub_format = sub_info['ext']
3401             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3402             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3403             if not self.params.get('overwrites', True) and os.path.exists(sub_filename):
3404                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3405                 sub_info['filepath'] = sub_filename
3406                 ret.append((sub_filename, sub_filename_final))
3407                 continue
3408
3409             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3410             if sub_info.get('data') is not None:
3411                 try:
3412                     # Use newline='' to prevent conversion of newline characters
3413                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
3414                     with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3415                         subfile.write(sub_info['data'])
3416                     sub_info['filepath'] = sub_filename
3417                     ret.append((sub_filename, sub_filename_final))
3418                     continue
3419                 except (OSError, IOError):
3420                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
3421                     return None
3422
3423             try:
3424                 sub_copy = sub_info.copy()
3425                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3426                 self.dl(sub_filename, sub_copy, subtitle=True)
3427                 sub_info['filepath'] = sub_filename
3428                 ret.append((sub_filename, sub_filename_final))
3429             except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3430                 self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
3431                 continue
3432             return ret
3433
3434     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3435         ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3436         write_all = self.params.get('write_all_thumbnails', False)
3437         thumbnails, ret = [], []
3438         if write_all or self.params.get('writethumbnail', False):
3439             thumbnails = info_dict.get('thumbnails') or []
3440         multiple = write_all and len(thumbnails) > 1
3441
3442         if thumb_filename_base is None:
3443             thumb_filename_base = filename
3444         if thumbnails and not thumb_filename_base:
3445             self.write_debug(f'Skipping writing {label} thumbnail')
3446             return ret
3447
3448         for t in thumbnails[::-1]:
3449             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3450             thumb_display_id = f'{label} thumbnail' + (f' {t["id"]}' if multiple else '')
3451             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3452             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3453
3454             if not self.params.get('overwrites', True) and os.path.exists(thumb_filename):
3455                 ret.append((thumb_filename, thumb_filename_final))
3456                 t['filepath'] = thumb_filename
3457                 self.to_screen(f'[info] {thumb_display_id.title()} is already present')
3458             else:
3459                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3460                 try:
3461                     uf = self.urlopen(t['url'])
3462                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3463                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3464                         shutil.copyfileobj(uf, thumbf)
3465                     ret.append((thumb_filename, thumb_filename_final))
3466                     t['filepath'] = thumb_filename
3467                 except network_exceptions as err:
3468                     self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3469             if ret and not write_all:
3470                 break
3471         return ret