yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import sys
  23 import tempfile
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28 import unicodedata
  29
  30 from string import ascii_letters
  31
  32 from .compat import (
  33     compat_basestring,
  34     compat_get_terminal_size,
  35     compat_kwargs,
  36     compat_numeric_types,
  37     compat_os_name,
  38     compat_pycrypto_AES,
  39     compat_shlex_quote,
  40     compat_str,
  41     compat_tokenize_tokenize,
  42     compat_urllib_error,
  43     compat_urllib_request,
  44     compat_urllib_request_DataHandler,
  45 )
  46 from .cookies import load_cookies
  47 from .utils import (
  48     age_restricted,
  49     args_to_str,
  50     ContentTooShortError,
  51     date_from_str,
  52     DateRange,
  53     DEFAULT_OUTTMPL,
  54     determine_ext,
  55     determine_protocol,
  56     DOT_DESKTOP_LINK_TEMPLATE,
  57     DOT_URL_LINK_TEMPLATE,
  58     DOT_WEBLOC_LINK_TEMPLATE,
  59     DownloadError,
  60     encode_compat_str,
  61     encodeFilename,
  62     EntryNotInPlaylist,
  63     error_to_compat_str,
  64     ExistingVideoReached,
  65     expand_path,
  66     ExtractorError,
  67     float_or_none,
  68     format_bytes,
  69     format_field,
  70     STR_FORMAT_RE_TMPL,
  71     STR_FORMAT_TYPES,
  72     formatSeconds,
  73     GeoRestrictedError,
  74     HEADRequest,
  75     int_or_none,
  76     iri_to_uri,
  77     ISO3166Utils,
  78     LazyList,
  79     locked_file,
  80     make_dir,
  81     make_HTTPS_handler,
  82     MaxDownloadsReached,
  83     network_exceptions,
  84     orderedSet,
  85     OUTTMPL_TYPES,
  86     PagedList,
  87     parse_filesize,
  88     PerRequestProxyHandler,
  89     platform_name,
  90     PostProcessingError,
  91     preferredencoding,
  92     prepend_extension,
  93     process_communicate_or_kill,
  94     register_socks_protocols,
  95     RejectedVideoReached,
  96     render_table,
  97     replace_extension,
  98     SameFileError,
  99     sanitize_filename,
 100     sanitize_path,
 101     sanitize_url,
 102     sanitized_Request,
 103     std_headers,
 104     str_or_none,
 105     strftime_or_none,
 106     subtitles_filename,
 107     ThrottledDownload,
 108     to_high_limit_path,
 109     traverse_obj,
 110     try_get,
 111     UnavailableVideoError,
 112     url_basename,
 113     variadic,
 114     version_tuple,
 115     write_json_file,
 116     write_string,
 117     YoutubeDLCookieProcessor,
 118     YoutubeDLHandler,
 119     YoutubeDLRedirectHandler,
 120 )
 121 from .cache import Cache
 122 from .extractor import (
 123     gen_extractor_classes,
 124     get_info_extractor,
 125     _LAZY_LOADER,
 126     _PLUGIN_CLASSES
 127 )
 128 from .extractor.openload import PhantomJSwrapper
 129 from .downloader import (
 130     FFmpegFD,
 131     get_suitable_downloader,
 132     shorten_protocol_name
 133 )
 134 from .downloader.rtmp import rtmpdump_version
 135 from .postprocessor import (
 136     get_postprocessor,
 137     FFmpegFixupDurationPP,
 138     FFmpegFixupM3u8PP,
 139     FFmpegFixupM4aPP,
 140     FFmpegFixupStretchedPP,
 141     FFmpegFixupTimestampPP,
 142     FFmpegMergerPP,
 143     FFmpegPostProcessor,
 144     MoveFilesAfterDownloadPP,
 145 )
 146 from .update import detect_variant
 147 from .version import __version__
 148
 149 if compat_os_name == 'nt':
 150     import ctypes
 151
 152
 153 class YoutubeDL(object):
 154     """YoutubeDL class.
 155
 156     YoutubeDL objects are the ones responsible of downloading the
 157     actual video file and writing it to disk if the user has requested
 158     it, among some other tasks. In most cases there should be one per
 159     program. As, given a video URL, the downloader doesn't know how to
 160     extract all the needed information, task that InfoExtractors do, it
 161     has to pass the URL to one of them.
 162
 163     For this, YoutubeDL objects have a method that allows
 164     InfoExtractors to be registered in a given order. When it is passed
 165     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 166     finds that reports being able to handle it. The InfoExtractor extracts
 167     all the information about the video or videos the URL refers to, and
 168     YoutubeDL process the extracted information, possibly using a File
 169     Downloader to download the video.
 170
 171     YoutubeDL objects accept a lot of parameters. In order not to saturate
 172     the object constructor with arguments, it receives a dictionary of
 173     options instead. These options are available through the params
 174     attribute for the InfoExtractors to use. The YoutubeDL also
 175     registers itself as the downloader in charge for the InfoExtractors
 176     that are added to it, so this is a "mutual registration".
 177
 178     Available options:
 179
 180     username:          Username for authentication purposes.
 181     password:          Password for authentication purposes.
 182     videopassword:     Password for accessing a video.
 183     ap_mso:            Adobe Pass multiple-system operator identifier.
 184     ap_username:       Multiple-system operator account username.
 185     ap_password:       Multiple-system operator account password.
 186     usenetrc:          Use netrc for authentication instead.
 187     verbose:           Print additional info to stdout.
 188     quiet:             Do not print messages to stdout.
 189     no_warnings:       Do not print out anything for warnings.
 190     forceprint:        A list of templates to force print
 191     forceurl:          Force printing final URL. (Deprecated)
 192     forcetitle:        Force printing title. (Deprecated)
 193     forceid:           Force printing ID. (Deprecated)
 194     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 195     forcedescription:  Force printing description. (Deprecated)
 196     forcefilename:     Force printing final filename. (Deprecated)
 197     forceduration:     Force printing duration. (Deprecated)
 198     forcejson:         Force printing info_dict as JSON.
 199     dump_single_json:  Force printing the info_dict of the whole playlist
 200                        (or video) as a single JSON line.
 201     force_write_download_archive: Force writing download archive regardless
 202                        of 'skip_download' or 'simulate'.
 203     simulate:          Do not download the video files. If unset (or None),
 204                        simulate only if listsubtitles, listformats or list_thumbnails is used
 205     format:            Video format code. see "FORMAT SELECTION" for more details.
 206     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 207     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 208                        extracting metadata even if the video is not actually
 209                        available for download (experimental)
 210     format_sort:       How to sort the video formats. see "Sorting Formats"
 211                        for more details.
 212     format_sort_force: Force the given format_sort. see "Sorting Formats"
 213                        for more details.
 214     allow_multiple_video_streams:   Allow multiple video streams to be merged
 215                        into a single file
 216     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 217                        into a single file
 218     check_formats      Whether to test if the formats are downloadable.
 219                        Can be True (check all), False (check none)
 220                        or None (check only if requested by extractor)
 221     paths:             Dictionary of output paths. The allowed keys are 'home'
 222                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 223     outtmpl:           Dictionary of templates for output names. Allowed keys
 224                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 225                        For compatibility with youtube-dl, a single string can also be used
 226     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 227     restrictfilenames: Do not allow "&" and spaces in file names
 228     trim_file_name:    Limit length of filename (extension excluded)
 229     windowsfilenames:  Force the filenames to be windows compatible
 230     ignoreerrors:      Do not stop on download/postprocessing errors.
 231                        Can be 'only_download' to ignore only download errors.
 232                        Default is 'only_download' for CLI, but False for API
 233     skip_playlist_after_errors: Number of allowed failures until the rest of
 234                        the playlist is skipped
 235     force_generic_extractor: Force downloader to use the generic extractor
 236     overwrites:        Overwrite all video and metadata files if True,
 237                        overwrite only non-video files if None
 238                        and don't overwrite any file if False
 239                        For compatibility with youtube-dl,
 240                        "nooverwrites" may also be used instead
 241     playliststart:     Playlist item to start at.
 242     playlistend:       Playlist item to end at.
 243     playlist_items:    Specific indices of playlist to download.
 244     playlistreverse:   Download playlist items in reverse order.
 245     playlistrandom:    Download playlist items in random order.
 246     matchtitle:        Download only matching titles.
 247     rejecttitle:       Reject downloads for matching titles.
 248     logger:            Log messages to a logging.Logger instance.
 249     logtostderr:       Log messages to stderr instead of stdout.
 250     writedescription:  Write the video description to a .description file
 251     writeinfojson:     Write the video description to a .info.json file
 252     clean_infojson:    Remove private fields from the infojson
 253     getcomments:       Extract video comments. This will not be written to disk
 254                        unless writeinfojson is also given
 255     writeannotations:  Write the video annotations to a .annotations.xml file
 256     writethumbnail:    Write the thumbnail image to a file
 257     allow_playlist_files: Whether to write playlists' description, infojson etc
 258                        also to disk when using the 'write*' options
 259     write_all_thumbnails:  Write all thumbnail formats to files
 260     writelink:         Write an internet shortcut file, depending on the
 261                        current platform (.url/.webloc/.desktop)
 262     writeurllink:      Write a Windows internet shortcut file (.url)
 263     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 264     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 265     writesubtitles:    Write the video subtitles to a file
 266     writeautomaticsub: Write the automatically generated subtitles to a file
 267     allsubtitles:      Deprecated - Use subtitleslangs = ['all']
 268                        Downloads all the subtitles of the video
 269                        (requires writesubtitles or writeautomaticsub)
 270     listsubtitles:     Lists all available subtitles for the video
 271     subtitlesformat:   The format code for subtitles
 272     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 273                        The list may contain "all" to refer to all the available
 274                        subtitles. The language can be prefixed with a "-" to
 275                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 276     keepvideo:         Keep the video file after post-processing
 277     daterange:         A DateRange object, download only if the upload_date is in the range.
 278     skip_download:     Skip the actual download of the video file
 279     cachedir:          Location of the cache files in the filesystem.
 280                        False to disable filesystem cache.
 281     noplaylist:        Download single video instead of a playlist if in doubt.
 282     age_limit:         An integer representing the user's age in years.
 283                        Unsuitable videos for the given age are skipped.
 284     min_views:         An integer representing the minimum view count the video
 285                        must have in order to not be skipped.
 286                        Videos without view count information are always
 287                        downloaded. None for no limit.
 288     max_views:         An integer representing the maximum view count.
 289                        Videos that are more popular than that are not
 290                        downloaded.
 291                        Videos without view count information are always
 292                        downloaded. None for no limit.
 293     download_archive:  File name of a file where all downloads are recorded.
 294                        Videos already present in the file are not downloaded
 295                        again.
 296     break_on_existing: Stop the download process after attempting to download a
 297                        file that is in the archive.
 298     break_on_reject:   Stop the download process when encountering a video that
 299                        has been filtered out.
 300     cookiefile:        File name where cookies should be read from and dumped to
 301     cookiesfrombrowser: A tuple containing the name of the browser and the profile
 302                        name/path from where cookies are loaded.
 303                        Eg: ('chrome', ) or (vivaldi, 'default')
 304     nocheckcertificate:Do not verify SSL certificates
 305     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 306                        At the moment, this is only supported by YouTube.
 307     proxy:             URL of the proxy server to use
 308     geo_verification_proxy:  URL of the proxy to use for IP address verification
 309                        on geo-restricted sites.
 310     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 311     bidi_workaround:   Work around buggy terminals without bidirectional text
 312                        support, using fridibi
 313     debug_printtraffic:Print out sent and received HTTP traffic
 314     include_ads:       Download ads as well
 315     default_search:    Prepend this string if an input url is not valid.
 316                        'auto' for elaborate guessing
 317     encoding:          Use this encoding instead of the system-specified.
 318     extract_flat:      Do not resolve URLs, return the immediate result.
 319                        Pass in 'in_playlist' to only show this behavior for
 320                        playlist items.
 321     postprocessors:    A list of dictionaries, each with an entry
 322                        * key:  The name of the postprocessor. See
 323                                yt_dlp/postprocessor/__init__.py for a list.
 324                        * when: When to run the postprocessor. Can be one of
 325                                pre_process|before_dl|post_process|after_move.
 326                                Assumed to be 'post_process' if not given
 327     post_hooks:        A list of functions that get called as the final step
 328                        for each video file, after all postprocessors have been
 329                        called. The filename will be passed as the only argument.
 330     progress_hooks:    A list of functions that get called on download
 331                        progress, with a dictionary with the entries
 332                        * status: One of "downloading", "error", or "finished".
 333                                  Check this first and ignore unknown values.
 334                        * info_dict: The extracted info_dict
 335
 336                        If status is one of "downloading", or "finished", the
 337                        following properties may also be present:
 338                        * filename: The final filename (always present)
 339                        * tmpfilename: The filename we're currently writing to
 340                        * downloaded_bytes: Bytes on disk
 341                        * total_bytes: Size of the whole file, None if unknown
 342                        * total_bytes_estimate: Guess of the eventual file size,
 343                                                None if unavailable.
 344                        * elapsed: The number of seconds since download started.
 345                        * eta: The estimated time in seconds, None if unknown
 346                        * speed: The download speed in bytes/second, None if
 347                                 unknown
 348                        * fragment_index: The counter of the currently
 349                                          downloaded video fragment.
 350                        * fragment_count: The number of fragments (= individual
 351                                          files that will be merged)
 352
 353                        Progress hooks are guaranteed to be called at least once
 354                        (with status "finished") if the download is successful.
 355     merge_output_format: Extension to use when merging formats.
 356     final_ext:         Expected final extension; used to detect when the file was
 357                        already downloaded and converted. "merge_output_format" is
 358                        replaced by this extension when given
 359     fixup:             Automatically correct known faults of the file.
 360                        One of:
 361                        - "never": do nothing
 362                        - "warn": only emit a warning
 363                        - "detect_or_warn": check whether we can do anything
 364                                            about it, warn otherwise (default)
 365     source_address:    Client-side IP address to bind to.
 366     call_home:         Boolean, true iff we are allowed to contact the
 367                        yt-dlp servers for debugging. (BROKEN)
 368     sleep_interval_requests: Number of seconds to sleep between requests
 369                        during extraction
 370     sleep_interval:    Number of seconds to sleep before each download when
 371                        used alone or a lower bound of a range for randomized
 372                        sleep before each download (minimum possible number
 373                        of seconds to sleep) when used along with
 374                        max_sleep_interval.
 375     max_sleep_interval:Upper bound of a range for randomized sleep before each
 376                        download (maximum possible number of seconds to sleep).
 377                        Must only be used along with sleep_interval.
 378                        Actual sleep time will be a random float from range
 379                        [sleep_interval; max_sleep_interval].
 380     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 381     listformats:       Print an overview of available video formats and exit.
 382     list_thumbnails:   Print a table of all thumbnails and exit.
 383     match_filter:      A function that gets called with the info_dict of
 384                        every video.
 385                        If it returns a message, the video is ignored.
 386                        If it returns None, the video is downloaded.
 387                        match_filter_func in utils.py is one example for this.
 388     no_color:          Do not emit color codes in output.
 389     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 390                        HTTP header
 391     geo_bypass_country:
 392                        Two-letter ISO 3166-2 country code that will be used for
 393                        explicit geographic restriction bypassing via faking
 394                        X-Forwarded-For HTTP header
 395     geo_bypass_ip_block:
 396                        IP range in CIDR notation that will be used similarly to
 397                        geo_bypass_country
 398
 399     The following options determine which downloader is picked:
 400     external_downloader: A dictionary of protocol keys and the executable of the
 401                        external downloader to use for it. The allowed protocols
 402                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 403                        Set the value to 'native' to use the native downloader
 404     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 405                        or {'m3u8': 'ffmpeg'} instead.
 406                        Use the native HLS downloader instead of ffmpeg/avconv
 407                        if True, otherwise use ffmpeg/avconv if False, otherwise
 408                        use downloader suggested by extractor if None.
 409     compat_opts:       Compatibility options. See "Differences in default behavior".
 410                        The following options do not work when used through the API:
 411                        filename, abort-on-error, multistreams, no-live-chat,
 412                        no-clean-infojson, no-playlist-metafiles, no-keep-subs.
 413                        Refer __init__.py for their implementation
 414
 415     The following parameters are not used by YoutubeDL itself, they are used by
 416     the downloader (see yt_dlp/downloader/common.py):
 417     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 418     max_filesize, test, noresizebuffer, retries, continuedl, noprogress, consoletitle,
 419     xattr_set_filesize, external_downloader_args, hls_use_mpegts, http_chunk_size.
 420
 421     The following options are used by the post processors:
 422     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 423                        otherwise prefer ffmpeg. (avconv support is deprecated)
 424     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 425                        to the binary or its containing directory.
 426     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 427                        and a list of additional command-line arguments for the
 428                        postprocessor/executable. The dict can also have "PP+EXE" keys
 429                        which are used when the given exe is used by the given PP.
 430                        Use 'default' as the name for arguments to passed to all PP
 431                        For compatibility with youtube-dl, a single list of args
 432                        can also be used
 433
 434     The following options are used by the extractors:
 435     extractor_retries: Number of times to retry for known errors
 436     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 437     hls_split_discontinuity: Split HLS playlists to different formats at
 438                        discontinuities such as ad breaks (default: False)
 439     extractor_args:    A dictionary of arguments to be passed to the extractors.
 440                        See "EXTRACTOR ARGUMENTS" for details.
 441                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 442     youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
 443                        If True (default), DASH manifests and related
 444                        data will be downloaded and processed by extractor.
 445                        You can reduce network I/O by disabling it if you don't
 446                        care about DASH. (only for youtube)
 447     youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
 448                        If True (default), HLS manifests and related
 449                        data will be downloaded and processed by extractor.
 450                        You can reduce network I/O by disabling it if you don't
 451                        care about HLS. (only for youtube)
 452     """
 453
 454     _NUMERIC_FIELDS = set((
 455         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 456         'timestamp', 'upload_year', 'upload_month', 'upload_day',
 457         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 458         'average_rating', 'comment_count', 'age_limit',
 459         'start_time', 'end_time',
 460         'chapter_number', 'season_number', 'episode_number',
 461         'track_number', 'disc_number', 'release_year',
 462         'playlist_index',
 463     ))
 464
 465     params = None
 466     _ies = {}
 467     _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 468     _printed_messages = set()
 469     _first_webpage_request = True
 470     _download_retcode = None
 471     _num_downloads = None
 472     _playlist_level = 0
 473     _playlist_urls = set()
 474     _screen_file = None
 475
 476     def __init__(self, params=None, auto_init=True):
 477         """Create a FileDownloader object with the given options."""
 478         if params is None:
 479             params = {}
 480         self._ies = {}
 481         self._ies_instances = {}
 482         self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 483         self._printed_messages = set()
 484         self._first_webpage_request = True
 485         self._post_hooks = []
 486         self._progress_hooks = []
 487         self._download_retcode = 0
 488         self._num_downloads = 0
 489         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 490         self._err_file = sys.stderr
 491         self.params = {
 492             # Default parameters
 493             'nocheckcertificate': False,
 494         }
 495         self.params.update(params)
 496         self.cache = Cache(self)
 497
 498         if sys.version_info < (3, 6):
 499             self.report_warning(
 500                 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 501
 502         if self.params.get('allow_unplayable_formats'):
 503             self.report_warning(
 504                 'You have asked for unplayable formats to be listed/downloaded. '
 505                 'This is a developer option intended for debugging. '
 506                 'If you experience any issues while using this option, DO NOT open a bug report')
 507
 508         def check_deprecated(param, option, suggestion):
 509             if self.params.get(param) is not None:
 510                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 511                 return True
 512             return False
 513
 514         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 515             if self.params.get('geo_verification_proxy') is None:
 516                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 517
 518         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 519         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 520         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 521
 522         for msg in self.params.get('warnings', []):
 523             self.report_warning(msg)
 524
 525         if self.params.get('overwrites') is None:
 526             self.params.pop('overwrites', None)
 527         elif self.params.get('nooverwrites') is not None:
 528             # nooverwrites was unnecessarily changed to overwrites
 529             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 530             # This ensures compatibility with both keys
 531             self.params['overwrites'] = not self.params['nooverwrites']
 532         else:
 533             self.params['nooverwrites'] = not self.params['overwrites']
 534
 535         if params.get('bidi_workaround', False):
 536             try:
 537                 import pty
 538                 master, slave = pty.openpty()
 539                 width = compat_get_terminal_size().columns
 540                 if width is None:
 541                     width_args = []
 542                 else:
 543                     width_args = ['-w', str(width)]
 544                 sp_kwargs = dict(
 545                     stdin=subprocess.PIPE,
 546                     stdout=slave,
 547                     stderr=self._err_file)
 548                 try:
 549                     self._output_process = subprocess.Popen(
 550                         ['bidiv'] + width_args, **sp_kwargs
 551                     )
 552                 except OSError:
 553                     self._output_process = subprocess.Popen(
 554                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 555                 self._output_channel = os.fdopen(master, 'rb')
 556             except OSError as ose:
 557                 if ose.errno == errno.ENOENT:
 558                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 559                 else:
 560                     raise
 561
 562         if (sys.platform != 'win32'
 563                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 564                 and not params.get('restrictfilenames', False)):
 565             # Unicode filesystem API will throw errors (#1474, #13027)
 566             self.report_warning(
 567                 'Assuming --restrict-filenames since file system encoding '
 568                 'cannot encode all characters. '
 569                 'Set the LC_ALL environment variable to fix this.')
 570             self.params['restrictfilenames'] = True
 571
 572         self.outtmpl_dict = self.parse_outtmpl()
 573
 574         # Creating format selector here allows us to catch syntax errors before the extraction
 575         self.format_selector = (
 576             None if self.params.get('format') is None
 577             else self.build_format_selector(self.params['format']))
 578
 579         self._setup_opener()
 580
 581         """Preload the archive, if any is specified"""
 582         def preload_download_archive(fn):
 583             if fn is None:
 584                 return False
 585             self.write_debug('Loading archive file %r\n' % fn)
 586             try:
 587                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 588                     for line in archive_file:
 589                         self.archive.add(line.strip())
 590             except IOError as ioe:
 591                 if ioe.errno != errno.ENOENT:
 592                     raise
 593                 return False
 594             return True
 595
 596         self.archive = set()
 597         preload_download_archive(self.params.get('download_archive'))
 598
 599         if auto_init:
 600             self.print_debug_header()
 601             self.add_default_info_extractors()
 602
 603         for pp_def_raw in self.params.get('postprocessors', []):
 604             pp_def = dict(pp_def_raw)
 605             when = pp_def.pop('when', 'post_process')
 606             pp_class = get_postprocessor(pp_def.pop('key'))
 607             pp = pp_class(self, **compat_kwargs(pp_def))
 608             self.add_post_processor(pp, when=when)
 609
 610         for ph in self.params.get('post_hooks', []):
 611             self.add_post_hook(ph)
 612
 613         for ph in self.params.get('progress_hooks', []):
 614             self.add_progress_hook(ph)
 615
 616         register_socks_protocols()
 617
 618     def warn_if_short_id(self, argv):
 619         # short YouTube ID starting with dash?
 620         idxs = [
 621             i for i, a in enumerate(argv)
 622             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 623         if idxs:
 624             correct_argv = (
 625                 ['yt-dlp']
 626                 + [a for i, a in enumerate(argv) if i not in idxs]
 627                 + ['--'] + [argv[i] for i in idxs]
 628             )
 629             self.report_warning(
 630                 'Long argument string detected. '
 631                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 632                 args_to_str(correct_argv))
 633
 634     def add_info_extractor(self, ie):
 635         """Add an InfoExtractor object to the end of the list."""
 636         ie_key = ie.ie_key()
 637         self._ies[ie_key] = ie
 638         if not isinstance(ie, type):
 639             self._ies_instances[ie_key] = ie
 640             ie.set_downloader(self)
 641
 642     def _get_info_extractor_class(self, ie_key):
 643         ie = self._ies.get(ie_key)
 644         if ie is None:
 645             ie = get_info_extractor(ie_key)
 646             self.add_info_extractor(ie)
 647         return ie
 648
 649     def get_info_extractor(self, ie_key):
 650         """
 651         Get an instance of an IE with name ie_key, it will try to get one from
 652         the _ies list, if there's no instance it will create a new one and add
 653         it to the extractor list.
 654         """
 655         ie = self._ies_instances.get(ie_key)
 656         if ie is None:
 657             ie = get_info_extractor(ie_key)()
 658             self.add_info_extractor(ie)
 659         return ie
 660
 661     def add_default_info_extractors(self):
 662         """
 663         Add the InfoExtractors returned by gen_extractors to the end of the list
 664         """
 665         for ie in gen_extractor_classes():
 666             self.add_info_extractor(ie)
 667
 668     def add_post_processor(self, pp, when='post_process'):
 669         """Add a PostProcessor object to the end of the chain."""
 670         self._pps[when].append(pp)
 671         pp.set_downloader(self)
 672
 673     def add_post_hook(self, ph):
 674         """Add the post hook"""
 675         self._post_hooks.append(ph)
 676
 677     def add_progress_hook(self, ph):
 678         """Add the progress hook (currently only for the file downloader)"""
 679         self._progress_hooks.append(ph)
 680
 681     def _bidi_workaround(self, message):
 682         if not hasattr(self, '_output_channel'):
 683             return message
 684
 685         assert hasattr(self, '_output_process')
 686         assert isinstance(message, compat_str)
 687         line_count = message.count('\n') + 1
 688         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 689         self._output_process.stdin.flush()
 690         res = ''.join(self._output_channel.readline().decode('utf-8')
 691                       for _ in range(line_count))
 692         return res[:-len('\n')]
 693
 694     def _write_string(self, message, out=None, only_once=False):
 695         if only_once:
 696             if message in self._printed_messages:
 697                 return
 698             self._printed_messages.add(message)
 699         write_string(message, out=out, encoding=self.params.get('encoding'))
 700
 701     def to_stdout(self, message, skip_eol=False, quiet=False):
 702         """Print message to stdout"""
 703         if self.params.get('logger'):
 704             self.params['logger'].debug(message)
 705         elif not quiet or self.params.get('verbose'):
 706             self._write_string(
 707                 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 708                 self._err_file if quiet else self._screen_file)
 709
 710     def to_stderr(self, message, only_once=False):
 711         """Print message to stderr"""
 712         assert isinstance(message, compat_str)
 713         if self.params.get('logger'):
 714             self.params['logger'].error(message)
 715         else:
 716             self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
 717
 718     def to_console_title(self, message):
 719         if not self.params.get('consoletitle', False):
 720             return
 721         if compat_os_name == 'nt':
 722             if ctypes.windll.kernel32.GetConsoleWindow():
 723                 # c_wchar_p() might not be necessary if `message` is
 724                 # already of type unicode()
 725                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 726         elif 'TERM' in os.environ:
 727             self._write_string('\033]0;%s\007' % message, self._screen_file)
 728
 729     def save_console_title(self):
 730         if not self.params.get('consoletitle', False):
 731             return
 732         if self.params.get('simulate'):
 733             return
 734         if compat_os_name != 'nt' and 'TERM' in os.environ:
 735             # Save the title on stack
 736             self._write_string('\033[22;0t', self._screen_file)
 737
 738     def restore_console_title(self):
 739         if not self.params.get('consoletitle', False):
 740             return
 741         if self.params.get('simulate'):
 742             return
 743         if compat_os_name != 'nt' and 'TERM' in os.environ:
 744             # Restore the title from stack
 745             self._write_string('\033[23;0t', self._screen_file)
 746
 747     def __enter__(self):
 748         self.save_console_title()
 749         return self
 750
 751     def __exit__(self, *args):
 752         self.restore_console_title()
 753
 754         if self.params.get('cookiefile') is not None:
 755             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 756
 757     def trouble(self, message=None, tb=None):
 758         """Determine action to take when a download problem appears.
 759
 760         Depending on if the downloader has been configured to ignore
 761         download errors or not, this method may throw an exception or
 762         not when errors are found, after printing the message.
 763
 764         tb, if given, is additional traceback information.
 765         """
 766         if message is not None:
 767             self.to_stderr(message)
 768         if self.params.get('verbose'):
 769             if tb is None:
 770                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 771                     tb = ''
 772                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 773                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 774                     tb += encode_compat_str(traceback.format_exc())
 775                 else:
 776                     tb_data = traceback.format_list(traceback.extract_stack())
 777                     tb = ''.join(tb_data)
 778             if tb:
 779                 self.to_stderr(tb)
 780         if not self.params.get('ignoreerrors'):
 781             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 782                 exc_info = sys.exc_info()[1].exc_info
 783             else:
 784                 exc_info = sys.exc_info()
 785             raise DownloadError(message, exc_info)
 786         self._download_retcode = 1
 787
 788     def to_screen(self, message, skip_eol=False):
 789         """Print message to stdout if not in quiet mode"""
 790         self.to_stdout(
 791             message, skip_eol, quiet=self.params.get('quiet', False))
 792
 793     def report_warning(self, message, only_once=False):
 794         '''
 795         Print the message to stderr, it will be prefixed with 'WARNING:'
 796         If stderr is a tty file the 'WARNING:' will be colored
 797         '''
 798         if self.params.get('logger') is not None:
 799             self.params['logger'].warning(message)
 800         else:
 801             if self.params.get('no_warnings'):
 802                 return
 803             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 804                 _msg_header = '\033[0;33mWARNING:\033[0m'
 805             else:
 806                 _msg_header = 'WARNING:'
 807             warning_message = '%s %s' % (_msg_header, message)
 808             self.to_stderr(warning_message, only_once)
 809
 810     def report_error(self, message, tb=None):
 811         '''
 812         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 813         in red if stderr is a tty file.
 814         '''
 815         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 816             _msg_header = '\033[0;31mERROR:\033[0m'
 817         else:
 818             _msg_header = 'ERROR:'
 819         error_message = '%s %s' % (_msg_header, message)
 820         self.trouble(error_message, tb)
 821
 822     def write_debug(self, message, only_once=False):
 823         '''Log debug message or Print message to stderr'''
 824         if not self.params.get('verbose', False):
 825             return
 826         message = '[debug] %s' % message
 827         if self.params.get('logger'):
 828             self.params['logger'].debug(message)
 829         else:
 830             self.to_stderr(message, only_once)
 831
 832     def report_file_already_downloaded(self, file_name):
 833         """Report file has already been fully downloaded."""
 834         try:
 835             self.to_screen('[download] %s has already been downloaded' % file_name)
 836         except UnicodeEncodeError:
 837             self.to_screen('[download] The file has already been downloaded')
 838
 839     def report_file_delete(self, file_name):
 840         """Report that existing file will be deleted."""
 841         try:
 842             self.to_screen('Deleting existing file %s' % file_name)
 843         except UnicodeEncodeError:
 844             self.to_screen('Deleting existing file')
 845
 846     def raise_no_formats(self, info, forced=False):
 847         has_drm = info.get('__has_drm')
 848         msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
 849         expected = self.params.get('ignore_no_formats_error')
 850         if forced or not expected:
 851             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
 852                                  expected=has_drm or expected)
 853         else:
 854             self.report_warning(msg)
 855
 856     def parse_outtmpl(self):
 857         outtmpl_dict = self.params.get('outtmpl', {})
 858         if not isinstance(outtmpl_dict, dict):
 859             outtmpl_dict = {'default': outtmpl_dict}
 860         outtmpl_dict.update({
 861             k: v for k, v in DEFAULT_OUTTMPL.items()
 862             if outtmpl_dict.get(k) is None})
 863         for key, val in outtmpl_dict.items():
 864             if isinstance(val, bytes):
 865                 self.report_warning(
 866                     'Parameter outtmpl is bytes, but should be a unicode string. '
 867                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 868         return outtmpl_dict
 869
 870     def get_output_path(self, dir_type='', filename=None):
 871         paths = self.params.get('paths', {})
 872         assert isinstance(paths, dict)
 873         path = os.path.join(
 874             expand_path(paths.get('home', '').strip()),
 875             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
 876             filename or '')
 877
 878         # Temporary fix for #4787
 879         # 'Treat' all problem characters by passing filename through preferredencoding
 880         # to workaround encoding issues with subprocess on python2 @ Windows
 881         if sys.version_info < (3, 0) and sys.platform == 'win32':
 882             path = encodeFilename(path, True).decode(preferredencoding())
 883         return sanitize_path(path, force=self.params.get('windowsfilenames'))
 884
 885     @staticmethod
 886     def _outtmpl_expandpath(outtmpl):
 887         # expand_path translates '%%' into '%' and '$$' into '$'
 888         # correspondingly that is not what we want since we need to keep
 889         # '%%' intact for template dict substitution step. Working around
 890         # with boundary-alike separator hack.
 891         sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 892         outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 893
 894         # outtmpl should be expand_path'ed before template dict substitution
 895         # because meta fields may contain env variables we don't want to
 896         # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 897         # title "Hello $PATH", we don't want `$PATH` to be expanded.
 898         return expand_path(outtmpl).replace(sep, '')
 899
 900     @staticmethod
 901     def escape_outtmpl(outtmpl):
 902         ''' Escape any remaining strings like %s, %abc% etc. '''
 903         return re.sub(
 904             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
 905             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
 906             outtmpl)
 907
 908     @classmethod
 909     def validate_outtmpl(cls, outtmpl):
 910         ''' @return None or Exception object '''
 911         outtmpl = re.sub(
 912             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBU]'),
 913             lambda mobj: f'{mobj.group(0)[:-1]}s',
 914             cls._outtmpl_expandpath(outtmpl))
 915         try:
 916             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
 917             return None
 918         except ValueError as err:
 919             return err
 920
 921     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
 922         """ Make the template and info_dict suitable for substitution : ydl.outtmpl_escape(outtmpl) % info_dict """
 923         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
 924
 925         info_dict = dict(info_dict)  # Do not sanitize so as not to consume LazyList
 926         for key in ('__original_infodict', '__postprocessors'):
 927             info_dict.pop(key, None)
 928         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
 929             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
 930             if info_dict.get('duration', None) is not None
 931             else None)
 932         info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
 933         if info_dict.get('resolution') is None:
 934             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
 935
 936         # For fields playlist_index and autonumber convert all occurrences
 937         # of %(field)s to %(field)0Nd for backward compatibility
 938         field_size_compat_map = {
 939             'playlist_index': len(str(info_dict.get('_last_playlist_index') or '')),
 940             'autonumber': self.params.get('autonumber_size') or 5,
 941         }
 942
 943         TMPL_DICT = {}
 944         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBU]'))
 945         MATH_FUNCTIONS = {
 946             '+': float.__add__,
 947             '-': float.__sub__,
 948         }
 949         # Field is of the form key1.key2...
 950         # where keys (except first) can be string, int or slice
 951         FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
 952         MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
 953         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
 954         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
 955             (?P<negate>-)?
 956             (?P<fields>{field})
 957             (?P<maths>(?:{math_op}{math_field})*)
 958             (?:>(?P<strf_format>.+?))?
 959             (?P<alternate>(?<!\\),[^|)]+)?
 960             (?:\|(?P<default>.*?))?
 961             $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
 962
 963         def _traverse_infodict(k):
 964             k = k.split('.')
 965             if k[0] == '':
 966                 k.pop(0)
 967             return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
 968
 969         def get_value(mdict):
 970             # Object traversal
 971             value = _traverse_infodict(mdict['fields'])
 972             # Negative
 973             if mdict['negate']:
 974                 value = float_or_none(value)
 975                 if value is not None:
 976                     value *= -1
 977             # Do maths
 978             offset_key = mdict['maths']
 979             if offset_key:
 980                 value = float_or_none(value)
 981                 operator = None
 982                 while offset_key:
 983                     item = re.match(
 984                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
 985                         offset_key).group(0)
 986                     offset_key = offset_key[len(item):]
 987                     if operator is None:
 988                         operator = MATH_FUNCTIONS[item]
 989                         continue
 990                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
 991                     offset = float_or_none(item)
 992                     if offset is None:
 993                         offset = float_or_none(_traverse_infodict(item))
 994                     try:
 995                         value = operator(value, multiplier * offset)
 996                     except (TypeError, ZeroDivisionError):
 997                         return None
 998                     operator = None
 999             # Datetime formatting
1000             if mdict['strf_format']:
1001                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1002
1003             return value
1004
1005         na = self.params.get('outtmpl_na_placeholder', 'NA')
1006
1007         def _dumpjson_default(obj):
1008             if isinstance(obj, (set, LazyList)):
1009                 return list(obj)
1010             raise TypeError(f'Object of type {type(obj).__name__} is not JSON serializable')
1011
1012         def create_key(outer_mobj):
1013             if not outer_mobj.group('has_key'):
1014                 return f'%{outer_mobj.group(0)}'
1015             key = outer_mobj.group('key')
1016             mobj = re.match(INTERNAL_FORMAT_RE, key)
1017             initial_field = mobj.group('fields').split('.')[-1] if mobj else ''
1018             value, default = None, na
1019             while mobj:
1020                 mobj = mobj.groupdict()
1021                 default = mobj['default'] if mobj['default'] is not None else default
1022                 value = get_value(mobj)
1023                 if value is None and mobj['alternate']:
1024                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
1025                 else:
1026                     break
1027
1028             fmt = outer_mobj.group('format')
1029             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1030                 fmt = '0{:d}d'.format(field_size_compat_map[key])
1031
1032             value = default if value is None else value
1033
1034             str_fmt = f'{fmt[:-1]}s'
1035             if fmt[-1] == 'l':  # list
1036                 delim = '\n' if '#' in (outer_mobj.group('conversion') or '') else ', '
1037                 value, fmt = delim.join(variadic(value)), str_fmt
1038             elif fmt[-1] == 'j':  # json
1039                 value, fmt = json.dumps(value, default=_dumpjson_default), str_fmt
1040             elif fmt[-1] == 'q':  # quoted
1041                 value, fmt = compat_shlex_quote(str(value)), str_fmt
1042             elif fmt[-1] == 'B':  # bytes
1043                 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1044                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1045             elif fmt[-1] == 'U':  # unicode normalized
1046                 opts = outer_mobj.group('conversion') or ''
1047                 value, fmt = unicodedata.normalize(
1048                     # "+" = compatibility equivalence, "#" = NFD
1049                     'NF%s%s' % ('K' if '+' in opts else '', 'D' if '#' in opts else 'C'),
1050                     value), str_fmt
1051             elif fmt[-1] == 'c':
1052                 if value:
1053                     value = str(value)[0]
1054                 else:
1055                     fmt = str_fmt
1056             elif fmt[-1] not in 'rs':  # numeric
1057                 value = float_or_none(value)
1058                 if value is None:
1059                     value, fmt = default, 's'
1060
1061             if sanitize:
1062                 if fmt[-1] == 'r':
1063                     # If value is an object, sanitize might convert it to a string
1064                     # So we convert it to repr first
1065                     value, fmt = repr(value), str_fmt
1066                 if fmt[-1] in 'csr':
1067                     value = sanitize(initial_field, value)
1068
1069             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1070             TMPL_DICT[key] = value
1071             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1072
1073         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1074
1075     def _prepare_filename(self, info_dict, tmpl_type='default'):
1076         try:
1077             sanitize = lambda k, v: sanitize_filename(
1078                 compat_str(v),
1079                 restricted=self.params.get('restrictfilenames'),
1080                 is_id=(k == 'id' or k.endswith('_id')))
1081             outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
1082             outtmpl, template_dict = self.prepare_outtmpl(outtmpl, info_dict, sanitize)
1083             outtmpl = self.escape_outtmpl(self._outtmpl_expandpath(outtmpl))
1084             filename = outtmpl % template_dict
1085
1086             force_ext = OUTTMPL_TYPES.get(tmpl_type)
1087             if filename and force_ext is not None:
1088                 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1089
1090             # https://github.com/blackjack4494/youtube-dlc/issues/85
1091             trim_file_name = self.params.get('trim_file_name', False)
1092             if trim_file_name:
1093                 fn_groups = filename.rsplit('.')
1094                 ext = fn_groups[-1]
1095                 sub_ext = ''
1096                 if len(fn_groups) > 2:
1097                     sub_ext = fn_groups[-2]
1098                 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
1099
1100             return filename
1101         except ValueError as err:
1102             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1103             return None
1104
1105     def prepare_filename(self, info_dict, dir_type='', warn=False):
1106         """Generate the output filename."""
1107
1108         filename = self._prepare_filename(info_dict, dir_type or 'default')
1109         if not filename and dir_type not in ('', 'temp'):
1110             return ''
1111
1112         if warn:
1113             if not self.params.get('paths'):
1114                 pass
1115             elif filename == '-':
1116                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1117             elif os.path.isabs(filename):
1118                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1119         if filename == '-' or not filename:
1120             return filename
1121
1122         return self.get_output_path(dir_type, filename)
1123
1124     def _match_entry(self, info_dict, incomplete=False, silent=False):
1125         """ Returns None if the file should be downloaded """
1126
1127         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1128
1129         def check_filter():
1130             if 'title' in info_dict:
1131                 # This can happen when we're just evaluating the playlist
1132                 title = info_dict['title']
1133                 matchtitle = self.params.get('matchtitle', False)
1134                 if matchtitle:
1135                     if not re.search(matchtitle, title, re.IGNORECASE):
1136                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1137                 rejecttitle = self.params.get('rejecttitle', False)
1138                 if rejecttitle:
1139                     if re.search(rejecttitle, title, re.IGNORECASE):
1140                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1141             date = info_dict.get('upload_date')
1142             if date is not None:
1143                 dateRange = self.params.get('daterange', DateRange())
1144                 if date not in dateRange:
1145                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1146             view_count = info_dict.get('view_count')
1147             if view_count is not None:
1148                 min_views = self.params.get('min_views')
1149                 if min_views is not None and view_count < min_views:
1150                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1151                 max_views = self.params.get('max_views')
1152                 if max_views is not None and view_count > max_views:
1153                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1154             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1155                 return 'Skipping "%s" because it is age restricted' % video_title
1156
1157             match_filter = self.params.get('match_filter')
1158             if match_filter is not None:
1159                 try:
1160                     ret = match_filter(info_dict, incomplete=incomplete)
1161                 except TypeError:
1162                     # For backward compatibility
1163                     ret = None if incomplete else match_filter(info_dict)
1164                 if ret is not None:
1165                     return ret
1166             return None
1167
1168         if self.in_download_archive(info_dict):
1169             reason = '%s has already been recorded in the archive' % video_title
1170             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1171         else:
1172             reason = check_filter()
1173             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1174         if reason is not None:
1175             if not silent:
1176                 self.to_screen('[download] ' + reason)
1177             if self.params.get(break_opt, False):
1178                 raise break_err()
1179         return reason
1180
1181     @staticmethod
1182     def add_extra_info(info_dict, extra_info):
1183         '''Set the keys from extra_info in info dict if they are missing'''
1184         for key, value in extra_info.items():
1185             info_dict.setdefault(key, value)
1186
1187     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1188                      process=True, force_generic_extractor=False):
1189         """
1190         Return a list with a dictionary for each video extracted.
1191
1192         Arguments:
1193         url -- URL to extract
1194
1195         Keyword arguments:
1196         download -- whether to download videos during extraction
1197         ie_key -- extractor key hint
1198         extra_info -- dictionary containing the extra values to add to each result
1199         process -- whether to resolve all unresolved references (URLs, playlist items),
1200             must be True for download to work.
1201         force_generic_extractor -- force using the generic extractor
1202         """
1203
1204         if extra_info is None:
1205             extra_info = {}
1206
1207         if not ie_key and force_generic_extractor:
1208             ie_key = 'Generic'
1209
1210         if ie_key:
1211             ies = {ie_key: self._get_info_extractor_class(ie_key)}
1212         else:
1213             ies = self._ies
1214
1215         for ie_key, ie in ies.items():
1216             if not ie.suitable(url):
1217                 continue
1218
1219             if not ie.working():
1220                 self.report_warning('The program functionality for this site has been marked as broken, '
1221                                     'and will probably not work.')
1222
1223             temp_id = ie.get_temp_id(url)
1224             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1225                 self.to_screen("[%s] %s: has already been recorded in archive" % (
1226                                ie_key, temp_id))
1227                 break
1228             return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1229         else:
1230             self.report_error('no suitable InfoExtractor for URL %s' % url)
1231
1232     def __handle_extraction_exceptions(func):
1233
1234         def wrapper(self, *args, **kwargs):
1235             try:
1236                 return func(self, *args, **kwargs)
1237             except GeoRestrictedError as e:
1238                 msg = e.msg
1239                 if e.countries:
1240                     msg += '\nThis video is available in %s.' % ', '.join(
1241                         map(ISO3166Utils.short2full, e.countries))
1242                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1243                 self.report_error(msg)
1244             except ExtractorError as e:  # An error we somewhat expected
1245                 self.report_error(compat_str(e), e.format_traceback())
1246             except ThrottledDownload:
1247                 self.to_stderr('\r')
1248                 self.report_warning('The download speed is below throttle limit. Re-extracting data')
1249                 return wrapper(self, *args, **kwargs)
1250             except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached, LazyList.IndexError):
1251                 raise
1252             except Exception as e:
1253                 if self.params.get('ignoreerrors'):
1254                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
1255                 else:
1256                     raise
1257         return wrapper
1258
1259     @__handle_extraction_exceptions
1260     def __extract_info(self, url, ie, download, extra_info, process):
1261         ie_result = ie.extract(url)
1262         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1263             return
1264         if isinstance(ie_result, list):
1265             # Backwards compatibility: old IE result format
1266             ie_result = {
1267                 '_type': 'compat_list',
1268                 'entries': ie_result,
1269             }
1270         if extra_info.get('original_url'):
1271             ie_result.setdefault('original_url', extra_info['original_url'])
1272         self.add_default_extra_info(ie_result, ie, url)
1273         if process:
1274             return self.process_ie_result(ie_result, download, extra_info)
1275         else:
1276             return ie_result
1277
1278     def add_default_extra_info(self, ie_result, ie, url):
1279         if url is not None:
1280             self.add_extra_info(ie_result, {
1281                 'webpage_url': url,
1282                 'original_url': url,
1283                 'webpage_url_basename': url_basename(url),
1284             })
1285         if ie is not None:
1286             self.add_extra_info(ie_result, {
1287                 'extractor': ie.IE_NAME,
1288                 'extractor_key': ie.ie_key(),
1289             })
1290
1291     def process_ie_result(self, ie_result, download=True, extra_info=None):
1292         """
1293         Take the result of the ie(may be modified) and resolve all unresolved
1294         references (URLs, playlist items).
1295
1296         It will also download the videos if 'download'.
1297         Returns the resolved ie_result.
1298         """
1299         if extra_info is None:
1300             extra_info = {}
1301         result_type = ie_result.get('_type', 'video')
1302
1303         if result_type in ('url', 'url_transparent'):
1304             ie_result['url'] = sanitize_url(ie_result['url'])
1305             if ie_result.get('original_url'):
1306                 extra_info.setdefault('original_url', ie_result['original_url'])
1307
1308             extract_flat = self.params.get('extract_flat', False)
1309             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1310                     or extract_flat is True):
1311                 info_copy = ie_result.copy()
1312                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1313                 if ie and not ie_result.get('id'):
1314                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1315                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1316                 self.add_extra_info(info_copy, extra_info)
1317                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1318                 if self.params.get('force_write_download_archive', False):
1319                     self.record_download_archive(info_copy)
1320                 return ie_result
1321
1322         if result_type == 'video':
1323             self.add_extra_info(ie_result, extra_info)
1324             ie_result = self.process_video_result(ie_result, download=download)
1325             additional_urls = (ie_result or {}).get('additional_urls')
1326             if additional_urls:
1327                 # TODO: Improve MetadataParserPP to allow setting a list
1328                 if isinstance(additional_urls, compat_str):
1329                     additional_urls = [additional_urls]
1330                 self.to_screen(
1331                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1332                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1333                 ie_result['additional_entries'] = [
1334                     self.extract_info(
1335                         url, download, extra_info,
1336                         force_generic_extractor=self.params.get('force_generic_extractor'))
1337                     for url in additional_urls
1338                 ]
1339             return ie_result
1340         elif result_type == 'url':
1341             # We have to add extra_info to the results because it may be
1342             # contained in a playlist
1343             return self.extract_info(
1344                 ie_result['url'], download,
1345                 ie_key=ie_result.get('ie_key'),
1346                 extra_info=extra_info)
1347         elif result_type == 'url_transparent':
1348             # Use the information from the embedding page
1349             info = self.extract_info(
1350                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1351                 extra_info=extra_info, download=False, process=False)
1352
1353             # extract_info may return None when ignoreerrors is enabled and
1354             # extraction failed with an error, don't crash and return early
1355             # in this case
1356             if not info:
1357                 return info
1358
1359             force_properties = dict(
1360                 (k, v) for k, v in ie_result.items() if v is not None)
1361             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1362                 if f in force_properties:
1363                     del force_properties[f]
1364             new_result = info.copy()
1365             new_result.update(force_properties)
1366
1367             # Extracted info may not be a video result (i.e.
1368             # info.get('_type', 'video') != video) but rather an url or
1369             # url_transparent. In such cases outer metadata (from ie_result)
1370             # should be propagated to inner one (info). For this to happen
1371             # _type of info should be overridden with url_transparent. This
1372             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1373             if new_result.get('_type') == 'url':
1374                 new_result['_type'] = 'url_transparent'
1375
1376             return self.process_ie_result(
1377                 new_result, download=download, extra_info=extra_info)
1378         elif result_type in ('playlist', 'multi_video'):
1379             # Protect from infinite recursion due to recursively nested playlists
1380             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1381             webpage_url = ie_result['webpage_url']
1382             if webpage_url in self._playlist_urls:
1383                 self.to_screen(
1384                     '[download] Skipping already downloaded playlist: %s'
1385                     % ie_result.get('title') or ie_result.get('id'))
1386                 return
1387
1388             self._playlist_level += 1
1389             self._playlist_urls.add(webpage_url)
1390             self._sanitize_thumbnails(ie_result)
1391             try:
1392                 return self.__process_playlist(ie_result, download)
1393             finally:
1394                 self._playlist_level -= 1
1395                 if not self._playlist_level:
1396                     self._playlist_urls.clear()
1397         elif result_type == 'compat_list':
1398             self.report_warning(
1399                 'Extractor %s returned a compat_list result. '
1400                 'It needs to be updated.' % ie_result.get('extractor'))
1401
1402             def _fixup(r):
1403                 self.add_extra_info(r, {
1404                     'extractor': ie_result['extractor'],
1405                     'webpage_url': ie_result['webpage_url'],
1406                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1407                     'extractor_key': ie_result['extractor_key'],
1408                 })
1409                 return r
1410             ie_result['entries'] = [
1411                 self.process_ie_result(_fixup(r), download, extra_info)
1412                 for r in ie_result['entries']
1413             ]
1414             return ie_result
1415         else:
1416             raise Exception('Invalid result type: %s' % result_type)
1417
1418     def _ensure_dir_exists(self, path):
1419         return make_dir(path, self.report_error)
1420
1421     def __process_playlist(self, ie_result, download):
1422         # We process each entry in the playlist
1423         playlist = ie_result.get('title') or ie_result.get('id')
1424         self.to_screen('[download] Downloading playlist: %s' % playlist)
1425
1426         if 'entries' not in ie_result:
1427             raise EntryNotInPlaylist()
1428         incomplete_entries = bool(ie_result.get('requested_entries'))
1429         if incomplete_entries:
1430             def fill_missing_entries(entries, indexes):
1431                 ret = [None] * max(*indexes)
1432                 for i, entry in zip(indexes, entries):
1433                     ret[i - 1] = entry
1434                 return ret
1435             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1436
1437         playlist_results = []
1438
1439         playliststart = self.params.get('playliststart', 1)
1440         playlistend = self.params.get('playlistend')
1441         # For backwards compatibility, interpret -1 as whole list
1442         if playlistend == -1:
1443             playlistend = None
1444
1445         playlistitems_str = self.params.get('playlist_items')
1446         playlistitems = None
1447         if playlistitems_str is not None:
1448             def iter_playlistitems(format):
1449                 for string_segment in format.split(','):
1450                     if '-' in string_segment:
1451                         start, end = string_segment.split('-')
1452                         for item in range(int(start), int(end) + 1):
1453                             yield int(item)
1454                     else:
1455                         yield int(string_segment)
1456             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1457
1458         ie_entries = ie_result['entries']
1459         msg = (
1460             'Downloading %d videos' if not isinstance(ie_entries, list)
1461             else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
1462
1463         if isinstance(ie_entries, list):
1464             def get_entry(i):
1465                 return ie_entries[i - 1]
1466         else:
1467             if not isinstance(ie_entries, PagedList):
1468                 ie_entries = LazyList(ie_entries)
1469
1470             def get_entry(i):
1471                 return YoutubeDL.__handle_extraction_exceptions(
1472                     lambda self, i: ie_entries[i - 1]
1473                 )(self, i)
1474
1475         entries = []
1476         items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1477         for i in items:
1478             if i == 0:
1479                 continue
1480             if playlistitems is None and playlistend is not None and playlistend < i:
1481                 break
1482             entry = None
1483             try:
1484                 entry = get_entry(i)
1485                 if entry is None:
1486                     raise EntryNotInPlaylist()
1487             except (IndexError, EntryNotInPlaylist):
1488                 if incomplete_entries:
1489                     raise EntryNotInPlaylist()
1490                 elif not playlistitems:
1491                     break
1492             entries.append(entry)
1493             try:
1494                 if entry is not None:
1495                     self._match_entry(entry, incomplete=True, silent=True)
1496             except (ExistingVideoReached, RejectedVideoReached):
1497                 break
1498         ie_result['entries'] = entries
1499
1500         # Save playlist_index before re-ordering
1501         entries = [
1502             ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
1503             for i, entry in enumerate(entries, 1)
1504             if entry is not None]
1505         n_entries = len(entries)
1506
1507         if not playlistitems and (playliststart or playlistend):
1508             playlistitems = list(range(playliststart, playliststart + n_entries))
1509         ie_result['requested_entries'] = playlistitems
1510
1511         if self.params.get('allow_playlist_files', True):
1512             ie_copy = {
1513                 'playlist': playlist,
1514                 'playlist_id': ie_result.get('id'),
1515                 'playlist_title': ie_result.get('title'),
1516                 'playlist_uploader': ie_result.get('uploader'),
1517                 'playlist_uploader_id': ie_result.get('uploader_id'),
1518                 'playlist_index': 0,
1519             }
1520             ie_copy.update(dict(ie_result))
1521
1522             if self._write_info_json('playlist', ie_result,
1523                                      self.prepare_filename(ie_copy, 'pl_infojson')) is None:
1524                 return
1525             if self._write_description('playlist', ie_result,
1526                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1527                 return
1528             # TODO: This should be passed to ThumbnailsConvertor if necessary
1529             self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1530
1531         if self.params.get('playlistreverse', False):
1532             entries = entries[::-1]
1533         if self.params.get('playlistrandom', False):
1534             random.shuffle(entries)
1535
1536         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1537
1538         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1539         failures = 0
1540         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1541         for i, entry_tuple in enumerate(entries, 1):
1542             playlist_index, entry = entry_tuple
1543             if 'playlist-index' in self.params.get('compat_opts', []):
1544                 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
1545             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1546             # This __x_forwarded_for_ip thing is a bit ugly but requires
1547             # minimal changes
1548             if x_forwarded_for:
1549                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1550             extra = {
1551                 'n_entries': n_entries,
1552                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1553                 'playlist_index': playlist_index,
1554                 'playlist_autonumber': i,
1555                 'playlist': playlist,
1556                 'playlist_id': ie_result.get('id'),
1557                 'playlist_title': ie_result.get('title'),
1558                 'playlist_uploader': ie_result.get('uploader'),
1559                 'playlist_uploader_id': ie_result.get('uploader_id'),
1560                 'extractor': ie_result['extractor'],
1561                 'webpage_url': ie_result['webpage_url'],
1562                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1563                 'extractor_key': ie_result['extractor_key'],
1564             }
1565
1566             if self._match_entry(entry, incomplete=True) is not None:
1567                 continue
1568
1569             entry_result = self.__process_iterable_entry(entry, download, extra)
1570             if not entry_result:
1571                 failures += 1
1572             if failures >= max_failures:
1573                 self.report_error(
1574                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1575                 break
1576             # TODO: skip failed (empty) entries?
1577             playlist_results.append(entry_result)
1578         ie_result['entries'] = playlist_results
1579         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1580         return ie_result
1581
1582     @__handle_extraction_exceptions
1583     def __process_iterable_entry(self, entry, download, extra_info):
1584         return self.process_ie_result(
1585             entry, download=download, extra_info=extra_info)
1586
1587     def _build_format_filter(self, filter_spec):
1588         " Returns a function to filter the formats according to the filter_spec "
1589
1590         OPERATORS = {
1591             '<': operator.lt,
1592             '<=': operator.le,
1593             '>': operator.gt,
1594             '>=': operator.ge,
1595             '=': operator.eq,
1596             '!=': operator.ne,
1597         }
1598         operator_rex = re.compile(r'''(?x)\s*
1599             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1600             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1601             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1602             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1603         m = operator_rex.fullmatch(filter_spec)
1604         if m:
1605             try:
1606                 comparison_value = int(m.group('value'))
1607             except ValueError:
1608                 comparison_value = parse_filesize(m.group('value'))
1609                 if comparison_value is None:
1610                     comparison_value = parse_filesize(m.group('value') + 'B')
1611                 if comparison_value is None:
1612                     raise ValueError(
1613                         'Invalid value %r in format specification %r' % (
1614                             m.group('value'), filter_spec))
1615             op = OPERATORS[m.group('op')]
1616
1617         if not m:
1618             STR_OPERATORS = {
1619                 '=': operator.eq,
1620                 '^=': lambda attr, value: attr.startswith(value),
1621                 '$=': lambda attr, value: attr.endswith(value),
1622                 '*=': lambda attr, value: value in attr,
1623             }
1624             str_operator_rex = re.compile(r'''(?x)\s*
1625                 (?P<key>[a-zA-Z0-9._-]+)\s*
1626                 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1627                 (?P<value>[a-zA-Z0-9._-]+)\s*
1628                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1629             m = str_operator_rex.fullmatch(filter_spec)
1630             if m:
1631                 comparison_value = m.group('value')
1632                 str_op = STR_OPERATORS[m.group('op')]
1633                 if m.group('negation'):
1634                     op = lambda attr, value: not str_op(attr, value)
1635                 else:
1636                     op = str_op
1637
1638         if not m:
1639             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1640
1641         def _filter(f):
1642             actual_value = f.get(m.group('key'))
1643             if actual_value is None:
1644                 return m.group('none_inclusive')
1645             return op(actual_value, comparison_value)
1646         return _filter
1647
1648     def _default_format_spec(self, info_dict, download=True):
1649
1650         def can_merge():
1651             merger = FFmpegMergerPP(self)
1652             return merger.available and merger.can_merge()
1653
1654         prefer_best = (
1655             not self.params.get('simulate')
1656             and download
1657             and (
1658                 not can_merge()
1659                 or info_dict.get('is_live', False)
1660                 or self.outtmpl_dict['default'] == '-'))
1661         compat = (
1662             prefer_best
1663             or self.params.get('allow_multiple_audio_streams', False)
1664             or 'format-spec' in self.params.get('compat_opts', []))
1665
1666         return (
1667             'best/bestvideo+bestaudio' if prefer_best
1668             else 'bestvideo*+bestaudio/best' if not compat
1669             else 'bestvideo+bestaudio/best')
1670
1671     def build_format_selector(self, format_spec):
1672         def syntax_error(note, start):
1673             message = (
1674                 'Invalid format specification: '
1675                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1676             return SyntaxError(message)
1677
1678         PICKFIRST = 'PICKFIRST'
1679         MERGE = 'MERGE'
1680         SINGLE = 'SINGLE'
1681         GROUP = 'GROUP'
1682         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1683
1684         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1685                                   'video': self.params.get('allow_multiple_video_streams', False)}
1686
1687         check_formats = self.params.get('check_formats')
1688
1689         def _parse_filter(tokens):
1690             filter_parts = []
1691             for type, string, start, _, _ in tokens:
1692                 if type == tokenize.OP and string == ']':
1693                     return ''.join(filter_parts)
1694                 else:
1695                     filter_parts.append(string)
1696
1697         def _remove_unused_ops(tokens):
1698             # Remove operators that we don't use and join them with the surrounding strings
1699             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1700             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1701             last_string, last_start, last_end, last_line = None, None, None, None
1702             for type, string, start, end, line in tokens:
1703                 if type == tokenize.OP and string == '[':
1704                     if last_string:
1705                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1706                         last_string = None
1707                     yield type, string, start, end, line
1708                     # everything inside brackets will be handled by _parse_filter
1709                     for type, string, start, end, line in tokens:
1710                         yield type, string, start, end, line
1711                         if type == tokenize.OP and string == ']':
1712                             break
1713                 elif type == tokenize.OP and string in ALLOWED_OPS:
1714                     if last_string:
1715                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1716                         last_string = None
1717                     yield type, string, start, end, line
1718                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1719                     if not last_string:
1720                         last_string = string
1721                         last_start = start
1722                         last_end = end
1723                     else:
1724                         last_string += string
1725             if last_string:
1726                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1727
1728         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1729             selectors = []
1730             current_selector = None
1731             for type, string, start, _, _ in tokens:
1732                 # ENCODING is only defined in python 3.x
1733                 if type == getattr(tokenize, 'ENCODING', None):
1734                     continue
1735                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1736                     current_selector = FormatSelector(SINGLE, string, [])
1737                 elif type == tokenize.OP:
1738                     if string == ')':
1739                         if not inside_group:
1740                             # ')' will be handled by the parentheses group
1741                             tokens.restore_last_token()
1742                         break
1743                     elif inside_merge and string in ['/', ',']:
1744                         tokens.restore_last_token()
1745                         break
1746                     elif inside_choice and string == ',':
1747                         tokens.restore_last_token()
1748                         break
1749                     elif string == ',':
1750                         if not current_selector:
1751                             raise syntax_error('"," must follow a format selector', start)
1752                         selectors.append(current_selector)
1753                         current_selector = None
1754                     elif string == '/':
1755                         if not current_selector:
1756                             raise syntax_error('"/" must follow a format selector', start)
1757                         first_choice = current_selector
1758                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1759                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1760                     elif string == '[':
1761                         if not current_selector:
1762                             current_selector = FormatSelector(SINGLE, 'best', [])
1763                         format_filter = _parse_filter(tokens)
1764                         current_selector.filters.append(format_filter)
1765                     elif string == '(':
1766                         if current_selector:
1767                             raise syntax_error('Unexpected "("', start)
1768                         group = _parse_format_selection(tokens, inside_group=True)
1769                         current_selector = FormatSelector(GROUP, group, [])
1770                     elif string == '+':
1771                         if not current_selector:
1772                             raise syntax_error('Unexpected "+"', start)
1773                         selector_1 = current_selector
1774                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1775                         if not selector_2:
1776                             raise syntax_error('Expected a selector', start)
1777                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1778                     else:
1779                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1780                 elif type == tokenize.ENDMARKER:
1781                     break
1782             if current_selector:
1783                 selectors.append(current_selector)
1784             return selectors
1785
1786         def _merge(formats_pair):
1787             format_1, format_2 = formats_pair
1788
1789             formats_info = []
1790             formats_info.extend(format_1.get('requested_formats', (format_1,)))
1791             formats_info.extend(format_2.get('requested_formats', (format_2,)))
1792
1793             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1794                 get_no_more = {'video': False, 'audio': False}
1795                 for (i, fmt_info) in enumerate(formats_info):
1796                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
1797                         formats_info.pop(i)
1798                         continue
1799                     for aud_vid in ['audio', 'video']:
1800                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1801                             if get_no_more[aud_vid]:
1802                                 formats_info.pop(i)
1803                                 break
1804                             get_no_more[aud_vid] = True
1805
1806             if len(formats_info) == 1:
1807                 return formats_info[0]
1808
1809             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1810             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1811
1812             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1813             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1814
1815             output_ext = self.params.get('merge_output_format')
1816             if not output_ext:
1817                 if the_only_video:
1818                     output_ext = the_only_video['ext']
1819                 elif the_only_audio and not video_fmts:
1820                     output_ext = the_only_audio['ext']
1821                 else:
1822                     output_ext = 'mkv'
1823
1824             new_dict = {
1825                 'requested_formats': formats_info,
1826                 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1827                 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1828                 'ext': output_ext,
1829             }
1830
1831             if the_only_video:
1832                 new_dict.update({
1833                     'width': the_only_video.get('width'),
1834                     'height': the_only_video.get('height'),
1835                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1836                     'fps': the_only_video.get('fps'),
1837                     'vcodec': the_only_video.get('vcodec'),
1838                     'vbr': the_only_video.get('vbr'),
1839                     'stretched_ratio': the_only_video.get('stretched_ratio'),
1840                 })
1841
1842             if the_only_audio:
1843                 new_dict.update({
1844                     'acodec': the_only_audio.get('acodec'),
1845                     'abr': the_only_audio.get('abr'),
1846                 })
1847
1848             return new_dict
1849
1850         def _check_formats(formats):
1851             if not check_formats:
1852                 yield from formats
1853                 return
1854             for f in formats:
1855                 self.to_screen('[info] Testing format %s' % f['format_id'])
1856                 temp_file = tempfile.NamedTemporaryFile(
1857                     suffix='.tmp', delete=False,
1858                     dir=self.get_output_path('temp') or None)
1859                 temp_file.close()
1860                 try:
1861                     success, _ = self.dl(temp_file.name, f, test=True)
1862                 except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1863                     success = False
1864                 finally:
1865                     if os.path.exists(temp_file.name):
1866                         try:
1867                             os.remove(temp_file.name)
1868                         except OSError:
1869                             self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1870                 if success:
1871                     yield f
1872                 else:
1873                     self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1874
1875         def _build_selector_function(selector):
1876             if isinstance(selector, list):  # ,
1877                 fs = [_build_selector_function(s) for s in selector]
1878
1879                 def selector_function(ctx):
1880                     for f in fs:
1881                         yield from f(ctx)
1882                 return selector_function
1883
1884             elif selector.type == GROUP:  # ()
1885                 selector_function = _build_selector_function(selector.selector)
1886
1887             elif selector.type == PICKFIRST:  # /
1888                 fs = [_build_selector_function(s) for s in selector.selector]
1889
1890                 def selector_function(ctx):
1891                     for f in fs:
1892                         picked_formats = list(f(ctx))
1893                         if picked_formats:
1894                             return picked_formats
1895                     return []
1896
1897             elif selector.type == MERGE:  # +
1898                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1899
1900                 def selector_function(ctx):
1901                     for pair in itertools.product(
1902                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1903                         yield _merge(pair)
1904
1905             elif selector.type == SINGLE:  # atom
1906                 format_spec = selector.selector or 'best'
1907
1908                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
1909                 if format_spec == 'all':
1910                     def selector_function(ctx):
1911                         yield from _check_formats(ctx['formats'])
1912                 elif format_spec == 'mergeall':
1913                     def selector_function(ctx):
1914                         formats = list(_check_formats(ctx['formats']))
1915                         if not formats:
1916                             return
1917                         merged_format = formats[-1]
1918                         for f in formats[-2::-1]:
1919                             merged_format = _merge((merged_format, f))
1920                         yield merged_format
1921
1922                 else:
1923                     format_fallback, format_reverse, format_idx = False, True, 1
1924                     mobj = re.match(
1925                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
1926                         format_spec)
1927                     if mobj is not None:
1928                         format_idx = int_or_none(mobj.group('n'), default=1)
1929                         format_reverse = mobj.group('bw')[0] == 'b'
1930                         format_type = (mobj.group('type') or [None])[0]
1931                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
1932                         format_modified = mobj.group('mod') is not None
1933
1934                         format_fallback = not format_type and not format_modified  # for b, w
1935                         _filter_f = (
1936                             (lambda f: f.get('%scodec' % format_type) != 'none')
1937                             if format_type and format_modified  # bv*, ba*, wv*, wa*
1938                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
1939                             if format_type  # bv, ba, wv, wa
1940                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1941                             if not format_modified  # b, w
1942                             else lambda f: True)  # b*, w*
1943                         filter_f = lambda f: _filter_f(f) and (
1944                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
1945                     else:
1946                         filter_f = ((lambda f: f.get('ext') == format_spec)
1947                                     if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']  # extension
1948                                     else (lambda f: f.get('format_id') == format_spec))  # id
1949
1950                     def selector_function(ctx):
1951                         formats = list(ctx['formats'])
1952                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
1953                         if format_fallback and ctx['incomplete_formats'] and not matches:
1954                             # for extractors with incomplete formats (audio only (soundcloud)
1955                             # or video only (imgur)) best/worst will fallback to
1956                             # best/worst {video,audio}-only format
1957                             matches = formats
1958                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
1959                         try:
1960                             yield matches[format_idx - 1]
1961                         except IndexError:
1962                             return
1963
1964             filters = [self._build_format_filter(f) for f in selector.filters]
1965
1966             def final_selector(ctx):
1967                 ctx_copy = copy.deepcopy(ctx)
1968                 for _filter in filters:
1969                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1970                 return selector_function(ctx_copy)
1971             return final_selector
1972
1973         stream = io.BytesIO(format_spec.encode('utf-8'))
1974         try:
1975             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1976         except tokenize.TokenError:
1977             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1978
1979         class TokenIterator(object):
1980             def __init__(self, tokens):
1981                 self.tokens = tokens
1982                 self.counter = 0
1983
1984             def __iter__(self):
1985                 return self
1986
1987             def __next__(self):
1988                 if self.counter >= len(self.tokens):
1989                     raise StopIteration()
1990                 value = self.tokens[self.counter]
1991                 self.counter += 1
1992                 return value
1993
1994             next = __next__
1995
1996             def restore_last_token(self):
1997                 self.counter -= 1
1998
1999         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2000         return _build_selector_function(parsed_selector)
2001
2002     def _calc_headers(self, info_dict):
2003         res = std_headers.copy()
2004
2005         add_headers = info_dict.get('http_headers')
2006         if add_headers:
2007             res.update(add_headers)
2008
2009         cookies = self._calc_cookies(info_dict)
2010         if cookies:
2011             res['Cookie'] = cookies
2012
2013         if 'X-Forwarded-For' not in res:
2014             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2015             if x_forwarded_for_ip:
2016                 res['X-Forwarded-For'] = x_forwarded_for_ip
2017
2018         return res
2019
2020     def _calc_cookies(self, info_dict):
2021         pr = sanitized_Request(info_dict['url'])
2022         self.cookiejar.add_cookie_header(pr)
2023         return pr.get_header('Cookie')
2024
2025     def _sanitize_thumbnails(self, info_dict):
2026         thumbnails = info_dict.get('thumbnails')
2027         if thumbnails is None:
2028             thumbnail = info_dict.get('thumbnail')
2029             if thumbnail:
2030                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2031         if thumbnails:
2032             thumbnails.sort(key=lambda t: (
2033                 t.get('preference') if t.get('preference') is not None else -1,
2034                 t.get('width') if t.get('width') is not None else -1,
2035                 t.get('height') if t.get('height') is not None else -1,
2036                 t.get('id') if t.get('id') is not None else '',
2037                 t.get('url')))
2038
2039             def thumbnail_tester():
2040                 if self.params.get('check_formats'):
2041                     test_all = True
2042                     to_screen = lambda msg: self.to_screen(f'[info] {msg}')
2043                 else:
2044                     test_all = False
2045                     to_screen = self.write_debug
2046
2047                 def test_thumbnail(t):
2048                     if not test_all and not t.get('_test_url'):
2049                         return True
2050                     to_screen('Testing thumbnail %s' % t['id'])
2051                     try:
2052                         self.urlopen(HEADRequest(t['url']))
2053                     except network_exceptions as err:
2054                         to_screen('Unable to connect to thumbnail %s URL "%s" - %s. Skipping...' % (
2055                             t['id'], t['url'], error_to_compat_str(err)))
2056                         return False
2057                     return True
2058
2059                 return test_thumbnail
2060
2061             for i, t in enumerate(thumbnails):
2062                 if t.get('id') is None:
2063                     t['id'] = '%d' % i
2064                 if t.get('width') and t.get('height'):
2065                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
2066                 t['url'] = sanitize_url(t['url'])
2067
2068             if self.params.get('check_formats') is not False:
2069                 info_dict['thumbnails'] = LazyList(filter(thumbnail_tester(), thumbnails[::-1])).reverse()
2070             else:
2071                 info_dict['thumbnails'] = thumbnails
2072
2073     def process_video_result(self, info_dict, download=True):
2074         assert info_dict.get('_type', 'video') == 'video'
2075
2076         if 'id' not in info_dict:
2077             raise ExtractorError('Missing "id" field in extractor result')
2078         if 'title' not in info_dict:
2079             raise ExtractorError('Missing "title" field in extractor result',
2080                                  video_id=info_dict['id'], ie=info_dict['extractor'])
2081
2082         def report_force_conversion(field, field_not, conversion):
2083             self.report_warning(
2084                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2085                 % (field, field_not, conversion))
2086
2087         def sanitize_string_field(info, string_field):
2088             field = info.get(string_field)
2089             if field is None or isinstance(field, compat_str):
2090                 return
2091             report_force_conversion(string_field, 'a string', 'string')
2092             info[string_field] = compat_str(field)
2093
2094         def sanitize_numeric_fields(info):
2095             for numeric_field in self._NUMERIC_FIELDS:
2096                 field = info.get(numeric_field)
2097                 if field is None or isinstance(field, compat_numeric_types):
2098                     continue
2099                 report_force_conversion(numeric_field, 'numeric', 'int')
2100                 info[numeric_field] = int_or_none(field)
2101
2102         sanitize_string_field(info_dict, 'id')
2103         sanitize_numeric_fields(info_dict)
2104
2105         if 'playlist' not in info_dict:
2106             # It isn't part of a playlist
2107             info_dict['playlist'] = None
2108             info_dict['playlist_index'] = None
2109
2110         self._sanitize_thumbnails(info_dict)
2111
2112         thumbnail = info_dict.get('thumbnail')
2113         thumbnails = info_dict.get('thumbnails')
2114         if thumbnail:
2115             info_dict['thumbnail'] = sanitize_url(thumbnail)
2116         elif thumbnails:
2117             info_dict['thumbnail'] = thumbnails[-1]['url']
2118
2119         if info_dict.get('display_id') is None and 'id' in info_dict:
2120             info_dict['display_id'] = info_dict['id']
2121
2122         for ts_key, date_key in (
2123                 ('timestamp', 'upload_date'),
2124                 ('release_timestamp', 'release_date'),
2125         ):
2126             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2127                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2128                 # see http://bugs.python.org/issue1646728)
2129                 try:
2130                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2131                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2132                 except (ValueError, OverflowError, OSError):
2133                     pass
2134
2135         live_keys = ('is_live', 'was_live')
2136         live_status = info_dict.get('live_status')
2137         if live_status is None:
2138             for key in live_keys:
2139                 if info_dict.get(key) is False:
2140                     continue
2141                 if info_dict.get(key):
2142                     live_status = key
2143                 break
2144             if all(info_dict.get(key) is False for key in live_keys):
2145                 live_status = 'not_live'
2146         if live_status:
2147             info_dict['live_status'] = live_status
2148             for key in live_keys:
2149                 if info_dict.get(key) is None:
2150                     info_dict[key] = (live_status == key)
2151
2152         # Auto generate title fields corresponding to the *_number fields when missing
2153         # in order to always have clean titles. This is very common for TV series.
2154         for field in ('chapter', 'season', 'episode'):
2155             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2156                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2157
2158         for cc_kind in ('subtitles', 'automatic_captions'):
2159             cc = info_dict.get(cc_kind)
2160             if cc:
2161                 for _, subtitle in cc.items():
2162                     for subtitle_format in subtitle:
2163                         if subtitle_format.get('url'):
2164                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2165                         if subtitle_format.get('ext') is None:
2166                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2167
2168         automatic_captions = info_dict.get('automatic_captions')
2169         subtitles = info_dict.get('subtitles')
2170
2171         info_dict['requested_subtitles'] = self.process_subtitles(
2172             info_dict['id'], subtitles, automatic_captions)
2173
2174         # We now pick which formats have to be downloaded
2175         if info_dict.get('formats') is None:
2176             # There's only one format available
2177             formats = [info_dict]
2178         else:
2179             formats = info_dict['formats']
2180
2181         info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
2182         if not self.params.get('allow_unplayable_formats'):
2183             formats = [f for f in formats if not f.get('has_drm')]
2184
2185         if not formats:
2186             self.raise_no_formats(info_dict)
2187
2188         def is_wellformed(f):
2189             url = f.get('url')
2190             if not url:
2191                 self.report_warning(
2192                     '"url" field is missing or empty - skipping format, '
2193                     'there is an error in extractor')
2194                 return False
2195             if isinstance(url, bytes):
2196                 sanitize_string_field(f, 'url')
2197             return True
2198
2199         # Filter out malformed formats for better extraction robustness
2200         formats = list(filter(is_wellformed, formats))
2201
2202         formats_dict = {}
2203
2204         # We check that all the formats have the format and format_id fields
2205         for i, format in enumerate(formats):
2206             sanitize_string_field(format, 'format_id')
2207             sanitize_numeric_fields(format)
2208             format['url'] = sanitize_url(format['url'])
2209             if not format.get('format_id'):
2210                 format['format_id'] = compat_str(i)
2211             else:
2212                 # Sanitize format_id from characters used in format selector expression
2213                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2214             format_id = format['format_id']
2215             if format_id not in formats_dict:
2216                 formats_dict[format_id] = []
2217             formats_dict[format_id].append(format)
2218
2219         # Make sure all formats have unique format_id
2220         for format_id, ambiguous_formats in formats_dict.items():
2221             if len(ambiguous_formats) > 1:
2222                 for i, format in enumerate(ambiguous_formats):
2223                     format['format_id'] = '%s-%d' % (format_id, i)
2224
2225         for i, format in enumerate(formats):
2226             if format.get('format') is None:
2227                 format['format'] = '{id} - {res}{note}'.format(
2228                     id=format['format_id'],
2229                     res=self.format_resolution(format),
2230                     note=format_field(format, 'format_note', ' (%s)'),
2231                 )
2232             # Automatically determine file extension if missing
2233             if format.get('ext') is None:
2234                 format['ext'] = determine_ext(format['url']).lower()
2235             # Automatically determine protocol if missing (useful for format
2236             # selection purposes)
2237             if format.get('protocol') is None:
2238                 format['protocol'] = determine_protocol(format)
2239             # Add HTTP headers, so that external programs can use them from the
2240             # json output
2241             full_format_info = info_dict.copy()
2242             full_format_info.update(format)
2243             format['http_headers'] = self._calc_headers(full_format_info)
2244         # Remove private housekeeping stuff
2245         if '__x_forwarded_for_ip' in info_dict:
2246             del info_dict['__x_forwarded_for_ip']
2247
2248         # TODO Central sorting goes here
2249
2250         if not formats or formats[0] is not info_dict:
2251             # only set the 'formats' fields if the original info_dict list them
2252             # otherwise we end up with a circular reference, the first (and unique)
2253             # element in the 'formats' field in info_dict is info_dict itself,
2254             # which can't be exported to json
2255             info_dict['formats'] = formats
2256
2257         info_dict, _ = self.pre_process(info_dict)
2258
2259         if self.params.get('list_thumbnails'):
2260             self.list_thumbnails(info_dict)
2261         if self.params.get('listformats'):
2262             if not info_dict.get('formats') and not info_dict.get('url'):
2263                 self.to_screen('%s has no formats' % info_dict['id'])
2264             else:
2265                 self.list_formats(info_dict)
2266         if self.params.get('listsubtitles'):
2267             if 'automatic_captions' in info_dict:
2268                 self.list_subtitles(
2269                     info_dict['id'], automatic_captions, 'automatic captions')
2270             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2271         list_only = self.params.get('simulate') is None and (
2272             self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2273         if list_only:
2274             # Without this printing, -F --print-json will not work
2275             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2276             return
2277
2278         format_selector = self.format_selector
2279         if format_selector is None:
2280             req_format = self._default_format_spec(info_dict, download=download)
2281             self.write_debug('Default format spec: %s' % req_format)
2282             format_selector = self.build_format_selector(req_format)
2283
2284         # While in format selection we may need to have an access to the original
2285         # format set in order to calculate some metrics or do some processing.
2286         # For now we need to be able to guess whether original formats provided
2287         # by extractor are incomplete or not (i.e. whether extractor provides only
2288         # video-only or audio-only formats) for proper formats selection for
2289         # extractors with such incomplete formats (see
2290         # https://github.com/ytdl-org/youtube-dl/pull/5556).
2291         # Since formats may be filtered during format selection and may not match
2292         # the original formats the results may be incorrect. Thus original formats
2293         # or pre-calculated metrics should be passed to format selection routines
2294         # as well.
2295         # We will pass a context object containing all necessary additional data
2296         # instead of just formats.
2297         # This fixes incorrect format selection issue (see
2298         # https://github.com/ytdl-org/youtube-dl/issues/10083).
2299         incomplete_formats = (
2300             # All formats are video-only or
2301             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2302             # all formats are audio-only
2303             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2304
2305         ctx = {
2306             'formats': formats,
2307             'incomplete_formats': incomplete_formats,
2308         }
2309
2310         formats_to_download = list(format_selector(ctx))
2311         if not formats_to_download:
2312             if not self.params.get('ignore_no_formats_error'):
2313                 raise ExtractorError('Requested format is not available', expected=True,
2314                                      video_id=info_dict['id'], ie=info_dict['extractor'])
2315             else:
2316                 self.report_warning('Requested format is not available')
2317                 # Process what we can, even without any available formats.
2318                 self.process_info(dict(info_dict))
2319         elif download:
2320             self.to_screen(
2321                 '[info] %s: Downloading %d format(s): %s' % (
2322                     info_dict['id'], len(formats_to_download),
2323                     ", ".join([f['format_id'] for f in formats_to_download])))
2324             for fmt in formats_to_download:
2325                 new_info = dict(info_dict)
2326                 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2327                 new_info['__original_infodict'] = info_dict
2328                 new_info.update(fmt)
2329                 self.process_info(new_info)
2330         # We update the info dict with the best quality format (backwards compatibility)
2331         if formats_to_download:
2332             info_dict.update(formats_to_download[-1])
2333         return info_dict
2334
2335     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2336         """Select the requested subtitles and their format"""
2337         available_subs = {}
2338         if normal_subtitles and self.params.get('writesubtitles'):
2339             available_subs.update(normal_subtitles)
2340         if automatic_captions and self.params.get('writeautomaticsub'):
2341             for lang, cap_info in automatic_captions.items():
2342                 if lang not in available_subs:
2343                     available_subs[lang] = cap_info
2344
2345         if (not self.params.get('writesubtitles') and not
2346                 self.params.get('writeautomaticsub') or not
2347                 available_subs):
2348             return None
2349
2350         all_sub_langs = available_subs.keys()
2351         if self.params.get('allsubtitles', False):
2352             requested_langs = all_sub_langs
2353         elif self.params.get('subtitleslangs', False):
2354             # A list is used so that the order of languages will be the same as
2355             # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2356             requested_langs = []
2357             for lang_re in self.params.get('subtitleslangs'):
2358                 if lang_re == 'all':
2359                     requested_langs.extend(all_sub_langs)
2360                     continue
2361                 discard = lang_re[0] == '-'
2362                 if discard:
2363                     lang_re = lang_re[1:]
2364                 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2365                 if discard:
2366                     for lang in current_langs:
2367                         while lang in requested_langs:
2368                             requested_langs.remove(lang)
2369                 else:
2370                     requested_langs.extend(current_langs)
2371             requested_langs = orderedSet(requested_langs)
2372         elif 'en' in available_subs:
2373             requested_langs = ['en']
2374         else:
2375             requested_langs = [list(all_sub_langs)[0]]
2376         if requested_langs:
2377             self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2378
2379         formats_query = self.params.get('subtitlesformat', 'best')
2380         formats_preference = formats_query.split('/') if formats_query else []
2381         subs = {}
2382         for lang in requested_langs:
2383             formats = available_subs.get(lang)
2384             if formats is None:
2385                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2386                 continue
2387             for ext in formats_preference:
2388                 if ext == 'best':
2389                     f = formats[-1]
2390                     break
2391                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2392                 if matches:
2393                     f = matches[-1]
2394                     break
2395             else:
2396                 f = formats[-1]
2397                 self.report_warning(
2398                     'No subtitle format found matching "%s" for language %s, '
2399                     'using %s' % (formats_query, lang, f['ext']))
2400             subs[lang] = f
2401         return subs
2402
2403     def __forced_printings(self, info_dict, filename, incomplete):
2404         def print_mandatory(field, actual_field=None):
2405             if actual_field is None:
2406                 actual_field = field
2407             if (self.params.get('force%s' % field, False)
2408                     and (not incomplete or info_dict.get(actual_field) is not None)):
2409                 self.to_stdout(info_dict[actual_field])
2410
2411         def print_optional(field):
2412             if (self.params.get('force%s' % field, False)
2413                     and info_dict.get(field) is not None):
2414                 self.to_stdout(info_dict[field])
2415
2416         info_dict = info_dict.copy()
2417         if filename is not None:
2418             info_dict['filename'] = filename
2419         if info_dict.get('requested_formats') is not None:
2420             # For RTMP URLs, also include the playpath
2421             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2422         elif 'url' in info_dict:
2423             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2424
2425         if self.params.get('forceprint') or self.params.get('forcejson'):
2426             self.post_extract(info_dict)
2427         for tmpl in self.params.get('forceprint', []):
2428             if re.match(r'\w+$', tmpl):
2429                 tmpl = '%({})s'.format(tmpl)
2430             tmpl, info_copy = self.prepare_outtmpl(tmpl, info_dict)
2431             self.to_stdout(self.escape_outtmpl(tmpl) % info_copy)
2432
2433         print_mandatory('title')
2434         print_mandatory('id')
2435         print_mandatory('url', 'urls')
2436         print_optional('thumbnail')
2437         print_optional('description')
2438         print_optional('filename')
2439         if self.params.get('forceduration') and info_dict.get('duration') is not None:
2440             self.to_stdout(formatSeconds(info_dict['duration']))
2441         print_mandatory('format')
2442
2443         if self.params.get('forcejson'):
2444             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2445
2446     def dl(self, name, info, subtitle=False, test=False):
2447         if not info.get('url'):
2448             self.raise_no_formats(info, True)
2449
2450         if test:
2451             verbose = self.params.get('verbose')
2452             params = {
2453                 'test': True,
2454                 'quiet': not verbose,
2455                 'verbose': verbose,
2456                 'noprogress': not verbose,
2457                 'nopart': True,
2458                 'skip_unavailable_fragments': False,
2459                 'keep_fragments': False,
2460                 'overwrites': True,
2461                 '_no_ytdl_file': True,
2462             }
2463         else:
2464             params = self.params
2465         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2466         if not test:
2467             for ph in self._progress_hooks:
2468                 fd.add_progress_hook(ph)
2469             urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2470             self.write_debug('Invoking downloader on "%s"' % urls)
2471         new_info = dict(info)
2472         if new_info.get('http_headers') is None:
2473             new_info['http_headers'] = self._calc_headers(new_info)
2474         return fd.download(name, new_info, subtitle)
2475
2476     def process_info(self, info_dict):
2477         """Process a single resolved IE result."""
2478
2479         assert info_dict.get('_type', 'video') == 'video'
2480
2481         max_downloads = self.params.get('max_downloads')
2482         if max_downloads is not None:
2483             if self._num_downloads >= int(max_downloads):
2484                 raise MaxDownloadsReached()
2485
2486         # TODO: backward compatibility, to be removed
2487         info_dict['fulltitle'] = info_dict['title']
2488
2489         if 'format' not in info_dict and 'ext' in info_dict:
2490             info_dict['format'] = info_dict['ext']
2491
2492         if self._match_entry(info_dict) is not None:
2493             return
2494
2495         self.post_extract(info_dict)
2496         self._num_downloads += 1
2497
2498         # info_dict['_filename'] needs to be set for backward compatibility
2499         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2500         temp_filename = self.prepare_filename(info_dict, 'temp')
2501         files_to_move = {}
2502
2503         # Forced printings
2504         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2505
2506         if self.params.get('simulate'):
2507             if self.params.get('force_write_download_archive', False):
2508                 self.record_download_archive(info_dict)
2509             # Do nothing else if in simulate mode
2510             return
2511
2512         if full_filename is None:
2513             return
2514         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2515             return
2516         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2517             return
2518
2519         if self._write_description('video', info_dict,
2520                                    self.prepare_filename(info_dict, 'description')) is None:
2521             return
2522
2523         sub_files = self._write_subtitles(info_dict, temp_filename)
2524         if sub_files is None:
2525             return
2526         files_to_move.update(dict(sub_files))
2527
2528         thumb_files = self._write_thumbnails(
2529             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2530         if thumb_files is None:
2531             return
2532         files_to_move.update(dict(thumb_files))
2533
2534         infofn = self.prepare_filename(info_dict, 'infojson')
2535         _infojson_written = self._write_info_json('video', info_dict, infofn)
2536         if _infojson_written:
2537             info_dict['__infojson_filename'] = infofn
2538         elif _infojson_written is None:
2539             return
2540
2541         # Note: Annotations are deprecated
2542         annofn = None
2543         if self.params.get('writeannotations', False):
2544             annofn = self.prepare_filename(info_dict, 'annotation')
2545         if annofn:
2546             if not self._ensure_dir_exists(encodeFilename(annofn)):
2547                 return
2548             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2549                 self.to_screen('[info] Video annotations are already present')
2550             elif not info_dict.get('annotations'):
2551                 self.report_warning('There are no annotations to write.')
2552             else:
2553                 try:
2554                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2555                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2556                         annofile.write(info_dict['annotations'])
2557                 except (KeyError, TypeError):
2558                     self.report_warning('There are no annotations to write.')
2559                 except (OSError, IOError):
2560                     self.report_error('Cannot write annotations file: ' + annofn)
2561                     return
2562
2563         # Write internet shortcut files
2564         url_link = webloc_link = desktop_link = False
2565         if self.params.get('writelink', False):
2566             if sys.platform == "darwin":  # macOS.
2567                 webloc_link = True
2568             elif sys.platform.startswith("linux"):
2569                 desktop_link = True
2570             else:  # if sys.platform in ['win32', 'cygwin']:
2571                 url_link = True
2572         if self.params.get('writeurllink', False):
2573             url_link = True
2574         if self.params.get('writewebloclink', False):
2575             webloc_link = True
2576         if self.params.get('writedesktoplink', False):
2577             desktop_link = True
2578
2579         if url_link or webloc_link or desktop_link:
2580             if 'webpage_url' not in info_dict:
2581                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2582                 return
2583             ascii_url = iri_to_uri(info_dict['webpage_url'])
2584
2585         def _write_link_file(extension, template, newline, embed_filename):
2586             linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
2587             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2588                 self.to_screen('[info] Internet shortcut is already present')
2589             else:
2590                 try:
2591                     self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2592                     with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2593                         template_vars = {'url': ascii_url}
2594                         if embed_filename:
2595                             template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2596                         linkfile.write(template % template_vars)
2597                 except (OSError, IOError):
2598                     self.report_error('Cannot write internet shortcut ' + linkfn)
2599                     return False
2600             return True
2601
2602         if url_link:
2603             if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2604                 return
2605         if webloc_link:
2606             if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2607                 return
2608         if desktop_link:
2609             if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2610                 return
2611
2612         try:
2613             info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2614         except PostProcessingError as err:
2615             self.report_error('Preprocessing: %s' % str(err))
2616             return
2617
2618         must_record_download_archive = False
2619         if self.params.get('skip_download', False):
2620             info_dict['filepath'] = temp_filename
2621             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2622             info_dict['__files_to_move'] = files_to_move
2623             info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2624         else:
2625             # Download
2626             info_dict.setdefault('__postprocessors', [])
2627             try:
2628
2629                 def existing_file(*filepaths):
2630                     ext = info_dict.get('ext')
2631                     final_ext = self.params.get('final_ext', ext)
2632                     existing_files = []
2633                     for file in orderedSet(filepaths):
2634                         if final_ext != ext:
2635                             converted = replace_extension(file, final_ext, ext)
2636                             if os.path.exists(encodeFilename(converted)):
2637                                 existing_files.append(converted)
2638                         if os.path.exists(encodeFilename(file)):
2639                             existing_files.append(file)
2640
2641                     if not existing_files or self.params.get('overwrites', False):
2642                         for file in orderedSet(existing_files):
2643                             self.report_file_delete(file)
2644                             os.remove(encodeFilename(file))
2645                         return None
2646
2647                     info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2648                     return existing_files[0]
2649
2650                 success = True
2651                 if info_dict.get('requested_formats') is not None:
2652
2653                     def compatible_formats(formats):
2654                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2655                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2656                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2657                         if len(video_formats) > 2 or len(audio_formats) > 2:
2658                             return False
2659
2660                         # Check extension
2661                         exts = set(format.get('ext') for format in formats)
2662                         COMPATIBLE_EXTS = (
2663                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2664                             set(('webm',)),
2665                         )
2666                         for ext_sets in COMPATIBLE_EXTS:
2667                             if ext_sets.issuperset(exts):
2668                                 return True
2669                         # TODO: Check acodec/vcodec
2670                         return False
2671
2672                     requested_formats = info_dict['requested_formats']
2673                     old_ext = info_dict['ext']
2674                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
2675                         info_dict['ext'] = 'mkv'
2676                         self.report_warning(
2677                             'Requested formats are incompatible for merge and will be merged into mkv.')
2678                     new_ext = info_dict['ext']
2679
2680                     def correct_ext(filename, ext=new_ext):
2681                         if filename == '-':
2682                             return filename
2683                         filename_real_ext = os.path.splitext(filename)[1][1:]
2684                         filename_wo_ext = (
2685                             os.path.splitext(filename)[0]
2686                             if filename_real_ext in (old_ext, new_ext)
2687                             else filename)
2688                         return '%s.%s' % (filename_wo_ext, ext)
2689
2690                     # Ensure filename always has a correct extension for successful merge
2691                     full_filename = correct_ext(full_filename)
2692                     temp_filename = correct_ext(temp_filename)
2693                     dl_filename = existing_file(full_filename, temp_filename)
2694                     info_dict['__real_download'] = False
2695
2696                     _protocols = set(determine_protocol(f) for f in requested_formats)
2697                     if len(_protocols) == 1:  # All requested formats have same protocol
2698                         info_dict['protocol'] = _protocols.pop()
2699                     directly_mergable = FFmpegFD.can_merge_formats(info_dict, self.params)
2700                     if dl_filename is not None:
2701                         self.report_file_already_downloaded(dl_filename)
2702                     elif (directly_mergable and get_suitable_downloader(
2703                             info_dict, self.params, to_stdout=(temp_filename == '-')) == FFmpegFD):
2704                         info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
2705                         success, real_download = self.dl(temp_filename, info_dict)
2706                         info_dict['__real_download'] = real_download
2707                     else:
2708                         downloaded = []
2709                         merger = FFmpegMergerPP(self)
2710                         if self.params.get('allow_unplayable_formats'):
2711                             self.report_warning(
2712                                 'You have requested merging of multiple formats '
2713                                 'while also allowing unplayable formats to be downloaded. '
2714                                 'The formats won\'t be merged to prevent data corruption.')
2715                         elif not merger.available:
2716                             self.report_warning(
2717                                 'You have requested merging of multiple formats but ffmpeg is not installed. '
2718                                 'The formats won\'t be merged.')
2719
2720                         if temp_filename == '-':
2721                             reason = ('using a downloader other than ffmpeg' if directly_mergable
2722                                       else 'but the formats are incompatible for simultaneous download' if merger.available
2723                                       else 'but ffmpeg is not installed')
2724                             self.report_warning(
2725                                 f'You have requested downloading multiple formats to stdout {reason}. '
2726                                 'The formats will be streamed one after the other')
2727                             fname = temp_filename
2728                         for f in requested_formats:
2729                             new_info = dict(info_dict)
2730                             del new_info['requested_formats']
2731                             new_info.update(f)
2732                             if temp_filename != '-':
2733                                 fname = prepend_extension(
2734                                     correct_ext(temp_filename, new_info['ext']),
2735                                     'f%s' % f['format_id'], new_info['ext'])
2736                                 if not self._ensure_dir_exists(fname):
2737                                     return
2738                                 f['filepath'] = fname
2739                                 downloaded.append(fname)
2740                             partial_success, real_download = self.dl(fname, new_info)
2741                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
2742                             success = success and partial_success
2743                         if merger.available and not self.params.get('allow_unplayable_formats'):
2744                             info_dict['__postprocessors'].append(merger)
2745                             info_dict['__files_to_merge'] = downloaded
2746                             # Even if there were no downloads, it is being merged only now
2747                             info_dict['__real_download'] = True
2748                         else:
2749                             for file in downloaded:
2750                                 files_to_move[file] = None
2751                 else:
2752                     # Just a single file
2753                     dl_filename = existing_file(full_filename, temp_filename)
2754                     if dl_filename is None or dl_filename == temp_filename:
2755                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
2756                         # So we should try to resume the download
2757                         success, real_download = self.dl(temp_filename, info_dict)
2758                         info_dict['__real_download'] = real_download
2759                     else:
2760                         self.report_file_already_downloaded(dl_filename)
2761
2762                 dl_filename = dl_filename or temp_filename
2763                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2764
2765             except network_exceptions as err:
2766                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2767                 return
2768             except (OSError, IOError) as err:
2769                 raise UnavailableVideoError(err)
2770             except (ContentTooShortError, ) as err:
2771                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2772                 return
2773
2774             if success and full_filename != '-':
2775
2776                 def fixup():
2777                     do_fixup = True
2778                     fixup_policy = self.params.get('fixup')
2779                     vid = info_dict['id']
2780
2781                     if fixup_policy in ('ignore', 'never'):
2782                         return
2783                     elif fixup_policy == 'warn':
2784                         do_fixup = False
2785                     elif fixup_policy != 'force':
2786                         assert fixup_policy in ('detect_or_warn', None)
2787                         if not info_dict.get('__real_download'):
2788                             do_fixup = False
2789
2790                     def ffmpeg_fixup(cndn, msg, cls):
2791                         if not cndn:
2792                             return
2793                         if not do_fixup:
2794                             self.report_warning(f'{vid}: {msg}')
2795                             return
2796                         pp = cls(self)
2797                         if pp.available:
2798                             info_dict['__postprocessors'].append(pp)
2799                         else:
2800                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
2801
2802                     stretched_ratio = info_dict.get('stretched_ratio')
2803                     ffmpeg_fixup(
2804                         stretched_ratio not in (1, None),
2805                         f'Non-uniform pixel ratio {stretched_ratio}',
2806                         FFmpegFixupStretchedPP)
2807
2808                     ffmpeg_fixup(
2809                         (info_dict.get('requested_formats') is None
2810                          and info_dict.get('container') == 'm4a_dash'
2811                          and info_dict.get('ext') == 'm4a'),
2812                         'writing DASH m4a. Only some players support this container',
2813                         FFmpegFixupM4aPP)
2814
2815                     downloader = (get_suitable_downloader(info_dict, self.params).__name__
2816                                   if 'protocol' in info_dict else None)
2817                     ffmpeg_fixup(downloader == 'HlsFD', 'malformed AAC bitstream detected', FFmpegFixupM3u8PP)
2818                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP)
2819                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed duration detected', FFmpegFixupDurationPP)
2820
2821                 fixup()
2822                 try:
2823                     info_dict = self.post_process(dl_filename, info_dict, files_to_move)
2824                 except PostProcessingError as err:
2825                     self.report_error('Postprocessing: %s' % str(err))
2826                     return
2827                 try:
2828                     for ph in self._post_hooks:
2829                         ph(info_dict['filepath'])
2830                 except Exception as err:
2831                     self.report_error('post hooks: %s' % str(err))
2832                     return
2833                 must_record_download_archive = True
2834
2835         if must_record_download_archive or self.params.get('force_write_download_archive', False):
2836             self.record_download_archive(info_dict)
2837         max_downloads = self.params.get('max_downloads')
2838         if max_downloads is not None and self._num_downloads >= int(max_downloads):
2839             raise MaxDownloadsReached()
2840
2841     def download(self, url_list):
2842         """Download a given list of URLs."""
2843         outtmpl = self.outtmpl_dict['default']
2844         if (len(url_list) > 1
2845                 and outtmpl != '-'
2846                 and '%' not in outtmpl
2847                 and self.params.get('max_downloads') != 1):
2848             raise SameFileError(outtmpl)
2849
2850         for url in url_list:
2851             try:
2852                 # It also downloads the videos
2853                 res = self.extract_info(
2854                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2855             except UnavailableVideoError:
2856                 self.report_error('unable to download video')
2857             except MaxDownloadsReached:
2858                 self.to_screen('[info] Maximum number of downloads reached')
2859                 raise
2860             except ExistingVideoReached:
2861                 self.to_screen('[info] Encountered a video that is already in the archive, stopping due to --break-on-existing')
2862                 raise
2863             except RejectedVideoReached:
2864                 self.to_screen('[info] Encountered a video that did not match filter, stopping due to --break-on-reject')
2865                 raise
2866             else:
2867                 if self.params.get('dump_single_json', False):
2868                     self.post_extract(res)
2869                     self.to_stdout(json.dumps(self.sanitize_info(res)))
2870
2871         return self._download_retcode
2872
2873     def download_with_info_file(self, info_filename):
2874         with contextlib.closing(fileinput.FileInput(
2875                 [info_filename], mode='r',
2876                 openhook=fileinput.hook_encoded('utf-8'))) as f:
2877             # FileInput doesn't have a read method, we can't call json.load
2878             info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
2879         try:
2880             self.process_ie_result(info, download=True)
2881         except (DownloadError, EntryNotInPlaylist, ThrottledDownload):
2882             webpage_url = info.get('webpage_url')
2883             if webpage_url is not None:
2884                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2885                 return self.download([webpage_url])
2886             else:
2887                 raise
2888         return self._download_retcode
2889
2890     @staticmethod
2891     def sanitize_info(info_dict, remove_private_keys=False):
2892         ''' Sanitize the infodict for converting to json '''
2893         if info_dict is None:
2894             return info_dict
2895         info_dict.setdefault('epoch', int(time.time()))
2896         remove_keys = {'__original_infodict'}  # Always remove this since this may contain a copy of the entire dict
2897         keep_keys = ['_type'],  # Always keep this to facilitate load-info-json
2898         if remove_private_keys:
2899             remove_keys |= {
2900                 'requested_formats', 'requested_subtitles', 'requested_entries',
2901                 'filepath', 'entries', 'original_url', 'playlist_autonumber',
2902             }
2903             empty_values = (None, {}, [], set(), tuple())
2904             reject = lambda k, v: k not in keep_keys and (
2905                 k.startswith('_') or k in remove_keys or v in empty_values)
2906         else:
2907             reject = lambda k, v: k in remove_keys
2908         filter_fn = lambda obj: (
2909             list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))
2910             else obj if not isinstance(obj, dict)
2911             else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
2912         return filter_fn(info_dict)
2913
2914     @staticmethod
2915     def filter_requested_info(info_dict, actually_filter=True):
2916         ''' Alias of sanitize_info for backward compatibility '''
2917         return YoutubeDL.sanitize_info(info_dict, actually_filter)
2918
2919     def run_pp(self, pp, infodict):
2920         files_to_delete = []
2921         if '__files_to_move' not in infodict:
2922             infodict['__files_to_move'] = {}
2923         try:
2924             files_to_delete, infodict = pp.run(infodict)
2925         except PostProcessingError as e:
2926             # Must be True and not 'only_download'
2927             if self.params.get('ignoreerrors') is True:
2928                 self.report_error(e)
2929                 return infodict
2930             raise
2931
2932         if not files_to_delete:
2933             return infodict
2934         if self.params.get('keepvideo', False):
2935             for f in files_to_delete:
2936                 infodict['__files_to_move'].setdefault(f, '')
2937         else:
2938             for old_filename in set(files_to_delete):
2939                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2940                 try:
2941                     os.remove(encodeFilename(old_filename))
2942                 except (IOError, OSError):
2943                     self.report_warning('Unable to remove downloaded original file')
2944                 if old_filename in infodict['__files_to_move']:
2945                     del infodict['__files_to_move'][old_filename]
2946         return infodict
2947
2948     @staticmethod
2949     def post_extract(info_dict):
2950         def actual_post_extract(info_dict):
2951             if info_dict.get('_type') in ('playlist', 'multi_video'):
2952                 for video_dict in info_dict.get('entries', {}):
2953                     actual_post_extract(video_dict or {})
2954                 return
2955
2956             post_extractor = info_dict.get('__post_extractor') or (lambda: {})
2957             extra = post_extractor().items()
2958             info_dict.update(extra)
2959             info_dict.pop('__post_extractor', None)
2960
2961             original_infodict = info_dict.get('__original_infodict') or {}
2962             original_infodict.update(extra)
2963             original_infodict.pop('__post_extractor', None)
2964
2965         actual_post_extract(info_dict or {})
2966
2967     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
2968         info = dict(ie_info)
2969         info['__files_to_move'] = files_to_move or {}
2970         for pp in self._pps[key]:
2971             info = self.run_pp(pp, info)
2972         return info, info.pop('__files_to_move', None)
2973
2974     def post_process(self, filename, ie_info, files_to_move=None):
2975         """Run all the postprocessors on the given file."""
2976         info = dict(ie_info)
2977         info['filepath'] = filename
2978         info['__files_to_move'] = files_to_move or {}
2979
2980         for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
2981             info = self.run_pp(pp, info)
2982         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
2983         del info['__files_to_move']
2984         for pp in self._pps['after_move']:
2985             info = self.run_pp(pp, info)
2986         return info
2987
2988     def _make_archive_id(self, info_dict):
2989         video_id = info_dict.get('id')
2990         if not video_id:
2991             return
2992         # Future-proof against any change in case
2993         # and backwards compatibility with prior versions
2994         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
2995         if extractor is None:
2996             url = str_or_none(info_dict.get('url'))
2997             if not url:
2998                 return
2999             # Try to find matching extractor for the URL and take its ie_key
3000             for ie_key, ie in self._ies.items():
3001                 if ie.suitable(url):
3002                     extractor = ie_key
3003                     break
3004             else:
3005                 return
3006         return '%s %s' % (extractor.lower(), video_id)
3007
3008     def in_download_archive(self, info_dict):
3009         fn = self.params.get('download_archive')
3010         if fn is None:
3011             return False
3012
3013         vid_id = self._make_archive_id(info_dict)
3014         if not vid_id:
3015             return False  # Incomplete video information
3016
3017         return vid_id in self.archive
3018
3019     def record_download_archive(self, info_dict):
3020         fn = self.params.get('download_archive')
3021         if fn is None:
3022             return
3023         vid_id = self._make_archive_id(info_dict)
3024         assert vid_id
3025         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3026             archive_file.write(vid_id + '\n')
3027         self.archive.add(vid_id)
3028
3029     @staticmethod
3030     def format_resolution(format, default='unknown'):
3031         if format.get('vcodec') == 'none':
3032             if format.get('acodec') == 'none':
3033                 return 'images'
3034             return 'audio only'
3035         if format.get('resolution') is not None:
3036             return format['resolution']
3037         if format.get('width') and format.get('height'):
3038             res = '%dx%d' % (format['width'], format['height'])
3039         elif format.get('height'):
3040             res = '%sp' % format['height']
3041         elif format.get('width'):
3042             res = '%dx?' % format['width']
3043         else:
3044             res = default
3045         return res
3046
3047     def _format_note(self, fdict):
3048         res = ''
3049         if fdict.get('ext') in ['f4f', 'f4m']:
3050             res += '(unsupported) '
3051         if fdict.get('language'):
3052             if res:
3053                 res += ' '
3054             res += '[%s] ' % fdict['language']
3055         if fdict.get('format_note') is not None:
3056             res += fdict['format_note'] + ' '
3057         if fdict.get('tbr') is not None:
3058             res += '%4dk ' % fdict['tbr']
3059         if fdict.get('container') is not None:
3060             if res:
3061                 res += ', '
3062             res += '%s container' % fdict['container']
3063         if (fdict.get('vcodec') is not None
3064                 and fdict.get('vcodec') != 'none'):
3065             if res:
3066                 res += ', '
3067             res += fdict['vcodec']
3068             if fdict.get('vbr') is not None:
3069                 res += '@'
3070         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3071             res += 'video@'
3072         if fdict.get('vbr') is not None:
3073             res += '%4dk' % fdict['vbr']
3074         if fdict.get('fps') is not None:
3075             if res:
3076                 res += ', '
3077             res += '%sfps' % fdict['fps']
3078         if fdict.get('acodec') is not None:
3079             if res:
3080                 res += ', '
3081             if fdict['acodec'] == 'none':
3082                 res += 'video only'
3083             else:
3084                 res += '%-5s' % fdict['acodec']
3085         elif fdict.get('abr') is not None:
3086             if res:
3087                 res += ', '
3088             res += 'audio'
3089         if fdict.get('abr') is not None:
3090             res += '@%3dk' % fdict['abr']
3091         if fdict.get('asr') is not None:
3092             res += ' (%5dHz)' % fdict['asr']
3093         if fdict.get('filesize') is not None:
3094             if res:
3095                 res += ', '
3096             res += format_bytes(fdict['filesize'])
3097         elif fdict.get('filesize_approx') is not None:
3098             if res:
3099                 res += ', '
3100             res += '~' + format_bytes(fdict['filesize_approx'])
3101         return res
3102
3103     def list_formats(self, info_dict):
3104         formats = info_dict.get('formats', [info_dict])
3105         new_format = (
3106             'list-formats' not in self.params.get('compat_opts', [])
3107             and self.params.get('listformats_table', True) is not False)
3108         if new_format:
3109             table = [
3110                 [
3111                     format_field(f, 'format_id'),
3112                     format_field(f, 'ext'),
3113                     self.format_resolution(f),
3114                     format_field(f, 'fps', '%d'),
3115                     '|',
3116                     format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
3117                     format_field(f, 'tbr', '%4dk'),
3118                     shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
3119                     '|',
3120                     format_field(f, 'vcodec', default='unknown').replace('none', ''),
3121                     format_field(f, 'vbr', '%4dk'),
3122                     format_field(f, 'acodec', default='unknown').replace('none', ''),
3123                     format_field(f, 'abr', '%3dk'),
3124                     format_field(f, 'asr', '%5dHz'),
3125                     ', '.join(filter(None, (
3126                         'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
3127                         format_field(f, 'language', '[%s]'),
3128                         format_field(f, 'format_note'),
3129                         format_field(f, 'container', ignore=(None, f.get('ext'))),
3130                     ))),
3131                 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3132             header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', '  TBR', 'PROTO',
3133                            '|', 'VCODEC', '  VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO']
3134         else:
3135             table = [
3136                 [
3137                     format_field(f, 'format_id'),
3138                     format_field(f, 'ext'),
3139                     self.format_resolution(f),
3140                     self._format_note(f)]
3141                 for f in formats
3142                 if f.get('preference') is None or f['preference'] >= -1000]
3143             header_line = ['format code', 'extension', 'resolution', 'note']
3144
3145         self.to_screen(
3146             '[info] Available formats for %s:' % info_dict['id'])
3147         self.to_stdout(render_table(
3148             header_line, table, delim=new_format, extraGap=(0 if new_format else 1), hideEmpty=new_format))
3149
3150     def list_thumbnails(self, info_dict):
3151         thumbnails = list(info_dict.get('thumbnails'))
3152         if not thumbnails:
3153             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3154             return
3155
3156         self.to_screen(
3157             '[info] Thumbnails for %s:' % info_dict['id'])
3158         self.to_stdout(render_table(
3159             ['ID', 'width', 'height', 'URL'],
3160             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
3161
3162     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3163         if not subtitles:
3164             self.to_screen('%s has no %s' % (video_id, name))
3165             return
3166         self.to_screen(
3167             'Available %s for %s:' % (name, video_id))
3168
3169         def _row(lang, formats):
3170             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3171             if len(set(names)) == 1:
3172                 names = [] if names[0] == 'unknown' else names[:1]
3173             return [lang, ', '.join(names), ', '.join(exts)]
3174
3175         self.to_stdout(render_table(
3176             ['Language', 'Name', 'Formats'],
3177             [_row(lang, formats) for lang, formats in subtitles.items()],
3178             hideEmpty=True))
3179
3180     def urlopen(self, req):
3181         """ Start an HTTP download """
3182         if isinstance(req, compat_basestring):
3183             req = sanitized_Request(req)
3184         return self._opener.open(req, timeout=self._socket_timeout)
3185
3186     def print_debug_header(self):
3187         if not self.params.get('verbose'):
3188             return
3189
3190         stdout_encoding = getattr(
3191             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
3192         encoding_str = (
3193             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
3194                 locale.getpreferredencoding(),
3195                 sys.getfilesystemencoding(),
3196                 stdout_encoding,
3197                 self.get_encoding()))
3198         write_string(encoding_str, encoding=None)
3199
3200         source = detect_variant()
3201         self._write_string('[debug] yt-dlp version %s%s\n' % (__version__, '' if source == 'unknown' else f' ({source})'))
3202         if _LAZY_LOADER:
3203             self._write_string('[debug] Lazy loading extractors enabled\n')
3204         if _PLUGIN_CLASSES:
3205             self._write_string(
3206                 '[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])
3207         if self.params.get('compat_opts'):
3208             self._write_string(
3209                 '[debug] Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts')))
3210         try:
3211             sp = subprocess.Popen(
3212                 ['git', 'rev-parse', '--short', 'HEAD'],
3213                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3214                 cwd=os.path.dirname(os.path.abspath(__file__)))
3215             out, err = process_communicate_or_kill(sp)
3216             out = out.decode().strip()
3217             if re.match('[0-9a-f]+', out):
3218                 self._write_string('[debug] Git HEAD: %s\n' % out)
3219         except Exception:
3220             try:
3221                 sys.exc_clear()
3222             except Exception:
3223                 pass
3224
3225         def python_implementation():
3226             impl_name = platform.python_implementation()
3227             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3228                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3229             return impl_name
3230
3231         self._write_string('[debug] Python version %s (%s %s) - %s\n' % (
3232             platform.python_version(),
3233             python_implementation(),
3234             platform.architecture()[0],
3235             platform_name()))
3236
3237         exe_versions = FFmpegPostProcessor.get_versions(self)
3238         exe_versions['rtmpdump'] = rtmpdump_version()
3239         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3240         exe_str = ', '.join(
3241             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3242         ) or 'none'
3243         self._write_string('[debug] exe versions: %s\n' % exe_str)
3244
3245         from .downloader.websocket import has_websockets
3246         from .postprocessor.embedthumbnail import has_mutagen
3247         from .cookies import SQLITE_AVAILABLE, KEYRING_AVAILABLE
3248
3249         lib_str = ', '.join(sorted(filter(None, (
3250             compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
3251             has_websockets and 'websockets',
3252             has_mutagen and 'mutagen',
3253             SQLITE_AVAILABLE and 'sqlite',
3254             KEYRING_AVAILABLE and 'keyring',
3255         )))) or 'none'
3256         self._write_string('[debug] Optional libraries: %s\n' % lib_str)
3257
3258         proxy_map = {}
3259         for handler in self._opener.handlers:
3260             if hasattr(handler, 'proxies'):
3261                 proxy_map.update(handler.proxies)
3262         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
3263
3264         if self.params.get('call_home', False):
3265             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3266             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
3267             return
3268             latest_version = self.urlopen(
3269                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3270             if version_tuple(latest_version) > version_tuple(__version__):
3271                 self.report_warning(
3272                     'You are using an outdated version (newest version: %s)! '
3273                     'See https://yt-dl.org/update if you need help updating.' %
3274                     latest_version)
3275
3276     def _setup_opener(self):
3277         timeout_val = self.params.get('socket_timeout')
3278         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
3279
3280         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3281         opts_cookiefile = self.params.get('cookiefile')
3282         opts_proxy = self.params.get('proxy')
3283
3284         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3285
3286         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3287         if opts_proxy is not None:
3288             if opts_proxy == '':
3289                 proxies = {}
3290             else:
3291                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3292         else:
3293             proxies = compat_urllib_request.getproxies()
3294             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3295             if 'http' in proxies and 'https' not in proxies:
3296                 proxies['https'] = proxies['http']
3297         proxy_handler = PerRequestProxyHandler(proxies)
3298
3299         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3300         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3301         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3302         redirect_handler = YoutubeDLRedirectHandler()
3303         data_handler = compat_urllib_request_DataHandler()
3304
3305         # When passing our own FileHandler instance, build_opener won't add the
3306         # default FileHandler and allows us to disable the file protocol, which
3307         # can be used for malicious purposes (see
3308         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3309         file_handler = compat_urllib_request.FileHandler()
3310
3311         def file_open(*args, **kwargs):
3312             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3313         file_handler.file_open = file_open
3314
3315         opener = compat_urllib_request.build_opener(
3316             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3317
3318         # Delete the default user-agent header, which would otherwise apply in
3319         # cases where our custom HTTP handler doesn't come into play
3320         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3321         opener.addheaders = []
3322         self._opener = opener
3323
3324     def encode(self, s):
3325         if isinstance(s, bytes):
3326             return s  # Already encoded
3327
3328         try:
3329             return s.encode(self.get_encoding())
3330         except UnicodeEncodeError as err:
3331             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3332             raise
3333
3334     def get_encoding(self):
3335         encoding = self.params.get('encoding')
3336         if encoding is None:
3337             encoding = preferredencoding()
3338         return encoding
3339
3340     def _write_info_json(self, label, ie_result, infofn):
3341         ''' Write infojson and returns True = written, False = skip, None = error '''
3342         if not self.params.get('writeinfojson'):
3343             return False
3344         elif not infofn:
3345             self.write_debug(f'Skipping writing {label} infojson')
3346             return False
3347         elif not self._ensure_dir_exists(infofn):
3348             return None
3349         elif not self.params.get('overwrites', True) and os.path.exists(infofn):
3350             self.to_screen(f'[info] {label.title()} metadata is already present')
3351         else:
3352             self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3353             try:
3354                 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3355             except (OSError, IOError):
3356                 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3357                 return None
3358         return True
3359
3360     def _write_description(self, label, ie_result, descfn):
3361         ''' Write description and returns True = written, False = skip, None = error '''
3362         if not self.params.get('writedescription'):
3363             return False
3364         elif not descfn:
3365             self.write_debug(f'Skipping writing {label} description')
3366             return False
3367         elif not self._ensure_dir_exists(descfn):
3368             return None
3369         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3370             self.to_screen(f'[info] {label.title()} description is already present')
3371         elif ie_result.get('description') is None:
3372             self.report_warning(f'There\'s no {label} description to write')
3373             return False
3374         else:
3375             try:
3376                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3377                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3378                     descfile.write(ie_result['description'])
3379             except (OSError, IOError):
3380                 self.report_error(f'Cannot write {label} description file {descfn}')
3381                 return None
3382         return True
3383
3384     def _write_subtitles(self, info_dict, filename):
3385         ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3386         ret = []
3387         subtitles = info_dict.get('requested_subtitles')
3388         if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3389             # subtitles download errors are already managed as troubles in relevant IE
3390             # that way it will silently go on when used with unsupporting IE
3391             return ret
3392
3393         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3394         if not sub_filename_base:
3395             self.to_screen('[info] Skipping writing video subtitles')
3396             return ret
3397         for sub_lang, sub_info in subtitles.items():
3398             sub_format = sub_info['ext']
3399             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3400             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3401             if not self.params.get('overwrites', True) and os.path.exists(sub_filename):
3402                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3403                 sub_info['filepath'] = sub_filename
3404                 ret.append((sub_filename, sub_filename_final))
3405                 continue
3406
3407             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3408             if sub_info.get('data') is not None:
3409                 try:
3410                     # Use newline='' to prevent conversion of newline characters
3411                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
3412                     with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3413                         subfile.write(sub_info['data'])
3414                     sub_info['filepath'] = sub_filename
3415                     ret.append((sub_filename, sub_filename_final))
3416                     continue
3417                 except (OSError, IOError):
3418                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
3419                     return None
3420
3421             try:
3422                 sub_copy = sub_info.copy()
3423                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3424                 self.dl(sub_filename, sub_copy, subtitle=True)
3425                 sub_info['filepath'] = sub_filename
3426                 ret.append((sub_filename, sub_filename_final))
3427             except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3428                 self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
3429                 continue
3430             return ret
3431
3432     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3433         ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3434         write_all = self.params.get('write_all_thumbnails', False)
3435         thumbnails, ret = [], []
3436         if write_all or self.params.get('writethumbnail', False):
3437             thumbnails = info_dict.get('thumbnails') or []
3438         multiple = write_all and len(thumbnails) > 1
3439
3440         if thumb_filename_base is None:
3441             thumb_filename_base = filename
3442         if thumbnails and not thumb_filename_base:
3443             self.write_debug(f'Skipping writing {label} thumbnail')
3444             return ret
3445
3446         for t in thumbnails[::-1]:
3447             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3448             thumb_display_id = f'{label} thumbnail' + (f' {t["id"]}' if multiple else '')
3449             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3450             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3451
3452             if not self.params.get('overwrites', True) and os.path.exists(thumb_filename):
3453                 ret.append((thumb_filename, thumb_filename_final))
3454                 t['filepath'] = thumb_filename
3455                 self.to_screen(f'[info] {thumb_display_id.title()} is already present')
3456             else:
3457                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3458                 try:
3459                     uf = self.urlopen(t['url'])
3460                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3461                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3462                         shutil.copyfileobj(uf, thumbf)
3463                     ret.append((thumb_filename, thumb_filename_final))
3464                     t['filepath'] = thumb_filename
3465                 except network_exceptions as err:
3466                     self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3467             if ret and not write_all:
3468                 break
3469         return ret