yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import sys
  23 import tempfile
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28
  29 from string import ascii_letters
  30 from zipimport import zipimporter
  31
  32 from .compat import (
  33     compat_basestring,
  34     compat_cookiejar,
  35     compat_get_terminal_size,
  36     compat_kwargs,
  37     compat_numeric_types,
  38     compat_os_name,
  39     compat_str,
  40     compat_tokenize_tokenize,
  41     compat_urllib_error,
  42     compat_urllib_request,
  43     compat_urllib_request_DataHandler,
  44 )
  45 from .utils import (
  46     age_restricted,
  47     args_to_str,
  48     ContentTooShortError,
  49     date_from_str,
  50     DateRange,
  51     DEFAULT_OUTTMPL,
  52     determine_ext,
  53     determine_protocol,
  54     DOT_DESKTOP_LINK_TEMPLATE,
  55     DOT_URL_LINK_TEMPLATE,
  56     DOT_WEBLOC_LINK_TEMPLATE,
  57     DownloadError,
  58     encode_compat_str,
  59     encodeFilename,
  60     EntryNotInPlaylist,
  61     error_to_compat_str,
  62     ExistingVideoReached,
  63     expand_path,
  64     ExtractorError,
  65     float_or_none,
  66     format_bytes,
  67     format_field,
  68     STR_FORMAT_RE,
  69     formatSeconds,
  70     GeoRestrictedError,
  71     HEADRequest,
  72     int_or_none,
  73     iri_to_uri,
  74     ISO3166Utils,
  75     LazyList,
  76     locked_file,
  77     make_dir,
  78     make_HTTPS_handler,
  79     MaxDownloadsReached,
  80     network_exceptions,
  81     orderedSet,
  82     OUTTMPL_TYPES,
  83     PagedList,
  84     parse_filesize,
  85     PerRequestProxyHandler,
  86     platform_name,
  87     PostProcessingError,
  88     preferredencoding,
  89     prepend_extension,
  90     process_communicate_or_kill,
  91     register_socks_protocols,
  92     RejectedVideoReached,
  93     render_table,
  94     replace_extension,
  95     SameFileError,
  96     sanitize_filename,
  97     sanitize_path,
  98     sanitize_url,
  99     sanitized_Request,
 100     std_headers,
 101     str_or_none,
 102     strftime_or_none,
 103     subtitles_filename,
 104     ThrottledDownload,
 105     to_high_limit_path,
 106     traverse_obj,
 107     try_get,
 108     UnavailableVideoError,
 109     url_basename,
 110     version_tuple,
 111     write_json_file,
 112     write_string,
 113     YoutubeDLCookieJar,
 114     YoutubeDLCookieProcessor,
 115     YoutubeDLHandler,
 116     YoutubeDLRedirectHandler,
 117 )
 118 from .cache import Cache
 119 from .extractor import (
 120     gen_extractor_classes,
 121     get_info_extractor,
 122     _LAZY_LOADER,
 123     _PLUGIN_CLASSES
 124 )
 125 from .extractor.openload import PhantomJSwrapper
 126 from .downloader import (
 127     get_suitable_downloader,
 128     shorten_protocol_name
 129 )
 130 from .downloader.rtmp import rtmpdump_version
 131 from .postprocessor import (
 132     get_postprocessor,
 133     FFmpegFixupDurationPP,
 134     FFmpegFixupM3u8PP,
 135     FFmpegFixupM4aPP,
 136     FFmpegFixupStretchedPP,
 137     FFmpegFixupTimestampPP,
 138     FFmpegMergerPP,
 139     FFmpegPostProcessor,
 140     MoveFilesAfterDownloadPP,
 141 )
 142 from .version import __version__
 143
 144 if compat_os_name == 'nt':
 145     import ctypes
 146
 147
 148 class YoutubeDL(object):
 149     """YoutubeDL class.
 150
 151     YoutubeDL objects are the ones responsible of downloading the
 152     actual video file and writing it to disk if the user has requested
 153     it, among some other tasks. In most cases there should be one per
 154     program. As, given a video URL, the downloader doesn't know how to
 155     extract all the needed information, task that InfoExtractors do, it
 156     has to pass the URL to one of them.
 157
 158     For this, YoutubeDL objects have a method that allows
 159     InfoExtractors to be registered in a given order. When it is passed
 160     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 161     finds that reports being able to handle it. The InfoExtractor extracts
 162     all the information about the video or videos the URL refers to, and
 163     YoutubeDL process the extracted information, possibly using a File
 164     Downloader to download the video.
 165
 166     YoutubeDL objects accept a lot of parameters. In order not to saturate
 167     the object constructor with arguments, it receives a dictionary of
 168     options instead. These options are available through the params
 169     attribute for the InfoExtractors to use. The YoutubeDL also
 170     registers itself as the downloader in charge for the InfoExtractors
 171     that are added to it, so this is a "mutual registration".
 172
 173     Available options:
 174
 175     username:          Username for authentication purposes.
 176     password:          Password for authentication purposes.
 177     videopassword:     Password for accessing a video.
 178     ap_mso:            Adobe Pass multiple-system operator identifier.
 179     ap_username:       Multiple-system operator account username.
 180     ap_password:       Multiple-system operator account password.
 181     usenetrc:          Use netrc for authentication instead.
 182     verbose:           Print additional info to stdout.
 183     quiet:             Do not print messages to stdout.
 184     no_warnings:       Do not print out anything for warnings.
 185     forceprint:        A list of templates to force print
 186     forceurl:          Force printing final URL. (Deprecated)
 187     forcetitle:        Force printing title. (Deprecated)
 188     forceid:           Force printing ID. (Deprecated)
 189     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 190     forcedescription:  Force printing description. (Deprecated)
 191     forcefilename:     Force printing final filename. (Deprecated)
 192     forceduration:     Force printing duration. (Deprecated)
 193     forcejson:         Force printing info_dict as JSON.
 194     dump_single_json:  Force printing the info_dict of the whole playlist
 195                        (or video) as a single JSON line.
 196     force_write_download_archive: Force writing download archive regardless
 197                        of 'skip_download' or 'simulate'.
 198     simulate:          Do not download the video files.
 199     format:            Video format code. see "FORMAT SELECTION" for more details.
 200     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 201     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 202                        extracting metadata even if the video is not actually
 203                        available for download (experimental)
 204     format_sort:       How to sort the video formats. see "Sorting Formats"
 205                        for more details.
 206     format_sort_force: Force the given format_sort. see "Sorting Formats"
 207                        for more details.
 208     allow_multiple_video_streams:   Allow multiple video streams to be merged
 209                        into a single file
 210     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 211                        into a single file
 212     paths:             Dictionary of output paths. The allowed keys are 'home'
 213                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 214     outtmpl:           Dictionary of templates for output names. Allowed keys
 215                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 216                        A string a also accepted for backward compatibility
 217     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 218     restrictfilenames: Do not allow "&" and spaces in file names
 219     trim_file_name:    Limit length of filename (extension excluded)
 220     windowsfilenames:  Force the filenames to be windows compatible
 221     ignoreerrors:      Do not stop on download errors
 222                        (Default True when running yt-dlp,
 223                        but False when directly accessing YoutubeDL class)
 224     skip_playlist_after_errors: Number of allowed failures until the rest of
 225                        the playlist is skipped
 226     force_generic_extractor: Force downloader to use the generic extractor
 227     overwrites:        Overwrite all video and metadata files if True,
 228                        overwrite only non-video files if None
 229                        and don't overwrite any file if False
 230     playliststart:     Playlist item to start at.
 231     playlistend:       Playlist item to end at.
 232     playlist_items:    Specific indices of playlist to download.
 233     playlistreverse:   Download playlist items in reverse order.
 234     playlistrandom:    Download playlist items in random order.
 235     matchtitle:        Download only matching titles.
 236     rejecttitle:       Reject downloads for matching titles.
 237     logger:            Log messages to a logging.Logger instance.
 238     logtostderr:       Log messages to stderr instead of stdout.
 239     writedescription:  Write the video description to a .description file
 240     writeinfojson:     Write the video description to a .info.json file
 241     clean_infojson:    Remove private fields from the infojson
 242     writecomments:     Extract video comments. This will not be written to disk
 243                        unless writeinfojson is also given
 244     writeannotations:  Write the video annotations to a .annotations.xml file
 245     writethumbnail:    Write the thumbnail image to a file
 246     allow_playlist_files: Whether to write playlists' description, infojson etc
 247                        also to disk when using the 'write*' options
 248     write_all_thumbnails:  Write all thumbnail formats to files
 249     writelink:         Write an internet shortcut file, depending on the
 250                        current platform (.url/.webloc/.desktop)
 251     writeurllink:      Write a Windows internet shortcut file (.url)
 252     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 253     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 254     writesubtitles:    Write the video subtitles to a file
 255     writeautomaticsub: Write the automatically generated subtitles to a file
 256     allsubtitles:      Deprecated - Use subtitlelangs = ['all']
 257                        Downloads all the subtitles of the video
 258                        (requires writesubtitles or writeautomaticsub)
 259     listsubtitles:     Lists all available subtitles for the video
 260     subtitlesformat:   The format code for subtitles
 261     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 262                        The list may contain "all" to refer to all the available
 263                        subtitles. The language can be prefixed with a "-" to
 264                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 265     keepvideo:         Keep the video file after post-processing
 266     daterange:         A DateRange object, download only if the upload_date is in the range.
 267     skip_download:     Skip the actual download of the video file
 268     cachedir:          Location of the cache files in the filesystem.
 269                        False to disable filesystem cache.
 270     noplaylist:        Download single video instead of a playlist if in doubt.
 271     age_limit:         An integer representing the user's age in years.
 272                        Unsuitable videos for the given age are skipped.
 273     min_views:         An integer representing the minimum view count the video
 274                        must have in order to not be skipped.
 275                        Videos without view count information are always
 276                        downloaded. None for no limit.
 277     max_views:         An integer representing the maximum view count.
 278                        Videos that are more popular than that are not
 279                        downloaded.
 280                        Videos without view count information are always
 281                        downloaded. None for no limit.
 282     download_archive:  File name of a file where all downloads are recorded.
 283                        Videos already present in the file are not downloaded
 284                        again.
 285     break_on_existing: Stop the download process after attempting to download a
 286                        file that is in the archive.
 287     break_on_reject:   Stop the download process when encountering a video that
 288                        has been filtered out.
 289     cookiefile:        File name where cookies should be read from and dumped to
 290     nocheckcertificate:Do not verify SSL certificates
 291     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 292                        At the moment, this is only supported by YouTube.
 293     proxy:             URL of the proxy server to use
 294     geo_verification_proxy:  URL of the proxy to use for IP address verification
 295                        on geo-restricted sites.
 296     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 297     bidi_workaround:   Work around buggy terminals without bidirectional text
 298                        support, using fridibi
 299     debug_printtraffic:Print out sent and received HTTP traffic
 300     include_ads:       Download ads as well
 301     default_search:    Prepend this string if an input url is not valid.
 302                        'auto' for elaborate guessing
 303     encoding:          Use this encoding instead of the system-specified.
 304     extract_flat:      Do not resolve URLs, return the immediate result.
 305                        Pass in 'in_playlist' to only show this behavior for
 306                        playlist items.
 307     postprocessors:    A list of dictionaries, each with an entry
 308                        * key:  The name of the postprocessor. See
 309                                yt_dlp/postprocessor/__init__.py for a list.
 310                        * when: When to run the postprocessor. Can be one of
 311                                pre_process|before_dl|post_process|after_move.
 312                                Assumed to be 'post_process' if not given
 313     post_hooks:        A list of functions that get called as the final step
 314                        for each video file, after all postprocessors have been
 315                        called. The filename will be passed as the only argument.
 316     progress_hooks:    A list of functions that get called on download
 317                        progress, with a dictionary with the entries
 318                        * status: One of "downloading", "error", or "finished".
 319                                  Check this first and ignore unknown values.
 320
 321                        If status is one of "downloading", or "finished", the
 322                        following properties may also be present:
 323                        * filename: The final filename (always present)
 324                        * tmpfilename: The filename we're currently writing to
 325                        * downloaded_bytes: Bytes on disk
 326                        * total_bytes: Size of the whole file, None if unknown
 327                        * total_bytes_estimate: Guess of the eventual file size,
 328                                                None if unavailable.
 329                        * elapsed: The number of seconds since download started.
 330                        * eta: The estimated time in seconds, None if unknown
 331                        * speed: The download speed in bytes/second, None if
 332                                 unknown
 333                        * fragment_index: The counter of the currently
 334                                          downloaded video fragment.
 335                        * fragment_count: The number of fragments (= individual
 336                                          files that will be merged)
 337
 338                        Progress hooks are guaranteed to be called at least once
 339                        (with status "finished") if the download is successful.
 340     merge_output_format: Extension to use when merging formats.
 341     final_ext:         Expected final extension; used to detect when the file was
 342                        already downloaded and converted. "merge_output_format" is
 343                        replaced by this extension when given
 344     fixup:             Automatically correct known faults of the file.
 345                        One of:
 346                        - "never": do nothing
 347                        - "warn": only emit a warning
 348                        - "detect_or_warn": check whether we can do anything
 349                                            about it, warn otherwise (default)
 350     source_address:    Client-side IP address to bind to.
 351     call_home:         Boolean, true iff we are allowed to contact the
 352                        yt-dlp servers for debugging. (BROKEN)
 353     sleep_interval_requests: Number of seconds to sleep between requests
 354                        during extraction
 355     sleep_interval:    Number of seconds to sleep before each download when
 356                        used alone or a lower bound of a range for randomized
 357                        sleep before each download (minimum possible number
 358                        of seconds to sleep) when used along with
 359                        max_sleep_interval.
 360     max_sleep_interval:Upper bound of a range for randomized sleep before each
 361                        download (maximum possible number of seconds to sleep).
 362                        Must only be used along with sleep_interval.
 363                        Actual sleep time will be a random float from range
 364                        [sleep_interval; max_sleep_interval].
 365     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 366     listformats:       Print an overview of available video formats and exit.
 367     list_thumbnails:   Print a table of all thumbnails and exit.
 368     match_filter:      A function that gets called with the info_dict of
 369                        every video.
 370                        If it returns a message, the video is ignored.
 371                        If it returns None, the video is downloaded.
 372                        match_filter_func in utils.py is one example for this.
 373     no_color:          Do not emit color codes in output.
 374     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 375                        HTTP header
 376     geo_bypass_country:
 377                        Two-letter ISO 3166-2 country code that will be used for
 378                        explicit geographic restriction bypassing via faking
 379                        X-Forwarded-For HTTP header
 380     geo_bypass_ip_block:
 381                        IP range in CIDR notation that will be used similarly to
 382                        geo_bypass_country
 383
 384     The following options determine which downloader is picked:
 385     external_downloader: A dictionary of protocol keys and the executable of the
 386                        external downloader to use for it. The allowed protocols
 387                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 388                        Set the value to 'native' to use the native downloader
 389     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 390                        or {'m3u8': 'ffmpeg'} instead.
 391                        Use the native HLS downloader instead of ffmpeg/avconv
 392                        if True, otherwise use ffmpeg/avconv if False, otherwise
 393                        use downloader suggested by extractor if None.
 394     compat_opts:       Compatibility options. See "Differences in default behavior".
 395                        The following options do not work when used through the API:
 396                        filename, abort-on-error, multistreams, no-live-chat,
 397                        no-playlist-metafiles. Refer __init__.py for their implementation
 398
 399     The following parameters are not used by YoutubeDL itself, they are used by
 400     the downloader (see yt_dlp/downloader/common.py):
 401     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 402     max_filesize, test, noresizebuffer, retries, continuedl, noprogress, consoletitle,
 403     xattr_set_filesize, external_downloader_args, hls_use_mpegts, http_chunk_size.
 404
 405     The following options are used by the post processors:
 406     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 407                        otherwise prefer ffmpeg. (avconv support is deprecated)
 408     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 409                        to the binary or its containing directory.
 410     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 411                         and a list of additional command-line arguments for the
 412                         postprocessor/executable. The dict can also have "PP+EXE" keys
 413                         which are used when the given exe is used by the given PP.
 414                         Use 'default' as the name for arguments to passed to all PP
 415
 416     The following options are used by the extractors:
 417     extractor_retries: Number of times to retry for known errors
 418     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 419     hls_split_discontinuity: Split HLS playlists to different formats at
 420                        discontinuities such as ad breaks (default: False)
 421     extractor_args:    A dictionary of arguments to be passed to the extractors.
 422                        See "EXTRACTOR ARGUMENTS" for details.
 423                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 424     youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
 425                        If True (default), DASH manifests and related
 426                        data will be downloaded and processed by extractor.
 427                        You can reduce network I/O by disabling it if you don't
 428                        care about DASH. (only for youtube)
 429     youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
 430                        If True (default), HLS manifests and related
 431                        data will be downloaded and processed by extractor.
 432                        You can reduce network I/O by disabling it if you don't
 433                        care about HLS. (only for youtube)
 434     """
 435
 436     _NUMERIC_FIELDS = set((
 437         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 438         'timestamp', 'upload_year', 'upload_month', 'upload_day',
 439         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 440         'average_rating', 'comment_count', 'age_limit',
 441         'start_time', 'end_time',
 442         'chapter_number', 'season_number', 'episode_number',
 443         'track_number', 'disc_number', 'release_year',
 444         'playlist_index',
 445     ))
 446
 447     params = None
 448     _ies = []
 449     _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 450     __prepare_filename_warned = False
 451     _first_webpage_request = True
 452     _download_retcode = None
 453     _num_downloads = None
 454     _playlist_level = 0
 455     _playlist_urls = set()
 456     _screen_file = None
 457
 458     def __init__(self, params=None, auto_init=True):
 459         """Create a FileDownloader object with the given options."""
 460         if params is None:
 461             params = {}
 462         self._ies = []
 463         self._ies_instances = {}
 464         self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 465         self.__prepare_filename_warned = False
 466         self._first_webpage_request = True
 467         self._post_hooks = []
 468         self._progress_hooks = []
 469         self._download_retcode = 0
 470         self._num_downloads = 0
 471         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 472         self._err_file = sys.stderr
 473         self.params = {
 474             # Default parameters
 475             'nocheckcertificate': False,
 476         }
 477         self.params.update(params)
 478         self.cache = Cache(self)
 479
 480         if sys.version_info < (3, 6):
 481             self.report_warning(
 482                 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 483
 484         def check_deprecated(param, option, suggestion):
 485             if self.params.get(param) is not None:
 486                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 487                 return True
 488             return False
 489
 490         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 491             if self.params.get('geo_verification_proxy') is None:
 492                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 493
 494         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 495         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 496         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 497
 498         for msg in self.params.get('warnings', []):
 499             self.report_warning(msg)
 500
 501         if self.params.get('final_ext'):
 502             if self.params.get('merge_output_format'):
 503                 self.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
 504             self.params['merge_output_format'] = self.params['final_ext']
 505
 506         if 'overwrites' in self.params and self.params['overwrites'] is None:
 507             del self.params['overwrites']
 508
 509         if params.get('bidi_workaround', False):
 510             try:
 511                 import pty
 512                 master, slave = pty.openpty()
 513                 width = compat_get_terminal_size().columns
 514                 if width is None:
 515                     width_args = []
 516                 else:
 517                     width_args = ['-w', str(width)]
 518                 sp_kwargs = dict(
 519                     stdin=subprocess.PIPE,
 520                     stdout=slave,
 521                     stderr=self._err_file)
 522                 try:
 523                     self._output_process = subprocess.Popen(
 524                         ['bidiv'] + width_args, **sp_kwargs
 525                     )
 526                 except OSError:
 527                     self._output_process = subprocess.Popen(
 528                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 529                 self._output_channel = os.fdopen(master, 'rb')
 530             except OSError as ose:
 531                 if ose.errno == errno.ENOENT:
 532                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 533                 else:
 534                     raise
 535
 536         if (sys.platform != 'win32'
 537                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 538                 and not params.get('restrictfilenames', False)):
 539             # Unicode filesystem API will throw errors (#1474, #13027)
 540             self.report_warning(
 541                 'Assuming --restrict-filenames since file system encoding '
 542                 'cannot encode all characters. '
 543                 'Set the LC_ALL environment variable to fix this.')
 544             self.params['restrictfilenames'] = True
 545
 546         self.outtmpl_dict = self.parse_outtmpl()
 547
 548         # Creating format selector here allows us to catch syntax errors before the extraction
 549         self.format_selector = (
 550             None if self.params.get('format') is None
 551             else self.build_format_selector(self.params['format']))
 552
 553         self._setup_opener()
 554
 555         """Preload the archive, if any is specified"""
 556         def preload_download_archive(fn):
 557             if fn is None:
 558                 return False
 559             self.write_debug('Loading archive file %r\n' % fn)
 560             try:
 561                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 562                     for line in archive_file:
 563                         self.archive.add(line.strip())
 564             except IOError as ioe:
 565                 if ioe.errno != errno.ENOENT:
 566                     raise
 567                 return False
 568             return True
 569
 570         self.archive = set()
 571         preload_download_archive(self.params.get('download_archive'))
 572
 573         if auto_init:
 574             self.print_debug_header()
 575             self.add_default_info_extractors()
 576
 577         for pp_def_raw in self.params.get('postprocessors', []):
 578             pp_def = dict(pp_def_raw)
 579             when = pp_def.pop('when', 'post_process')
 580             pp_class = get_postprocessor(pp_def.pop('key'))
 581             pp = pp_class(self, **compat_kwargs(pp_def))
 582             self.add_post_processor(pp, when=when)
 583
 584         for ph in self.params.get('post_hooks', []):
 585             self.add_post_hook(ph)
 586
 587         for ph in self.params.get('progress_hooks', []):
 588             self.add_progress_hook(ph)
 589
 590         register_socks_protocols()
 591
 592     def warn_if_short_id(self, argv):
 593         # short YouTube ID starting with dash?
 594         idxs = [
 595             i for i, a in enumerate(argv)
 596             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 597         if idxs:
 598             correct_argv = (
 599                 ['yt-dlp']
 600                 + [a for i, a in enumerate(argv) if i not in idxs]
 601                 + ['--'] + [argv[i] for i in idxs]
 602             )
 603             self.report_warning(
 604                 'Long argument string detected. '
 605                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 606                 args_to_str(correct_argv))
 607
 608     def add_info_extractor(self, ie):
 609         """Add an InfoExtractor object to the end of the list."""
 610         self._ies.append(ie)
 611         if not isinstance(ie, type):
 612             self._ies_instances[ie.ie_key()] = ie
 613             ie.set_downloader(self)
 614
 615     def get_info_extractor(self, ie_key):
 616         """
 617         Get an instance of an IE with name ie_key, it will try to get one from
 618         the _ies list, if there's no instance it will create a new one and add
 619         it to the extractor list.
 620         """
 621         ie = self._ies_instances.get(ie_key)
 622         if ie is None:
 623             ie = get_info_extractor(ie_key)()
 624             self.add_info_extractor(ie)
 625         return ie
 626
 627     def add_default_info_extractors(self):
 628         """
 629         Add the InfoExtractors returned by gen_extractors to the end of the list
 630         """
 631         for ie in gen_extractor_classes():
 632             self.add_info_extractor(ie)
 633
 634     def add_post_processor(self, pp, when='post_process'):
 635         """Add a PostProcessor object to the end of the chain."""
 636         self._pps[when].append(pp)
 637         pp.set_downloader(self)
 638
 639     def add_post_hook(self, ph):
 640         """Add the post hook"""
 641         self._post_hooks.append(ph)
 642
 643     def add_progress_hook(self, ph):
 644         """Add the progress hook (currently only for the file downloader)"""
 645         self._progress_hooks.append(ph)
 646
 647     def _bidi_workaround(self, message):
 648         if not hasattr(self, '_output_channel'):
 649             return message
 650
 651         assert hasattr(self, '_output_process')
 652         assert isinstance(message, compat_str)
 653         line_count = message.count('\n') + 1
 654         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 655         self._output_process.stdin.flush()
 656         res = ''.join(self._output_channel.readline().decode('utf-8')
 657                       for _ in range(line_count))
 658         return res[:-len('\n')]
 659
 660     def _write_string(self, s, out=None):
 661         write_string(s, out=out, encoding=self.params.get('encoding'))
 662
 663     def to_stdout(self, message, skip_eol=False, quiet=False):
 664         """Print message to stdout"""
 665         if self.params.get('logger'):
 666             self.params['logger'].debug(message)
 667         elif not quiet or self.params.get('verbose'):
 668             self._write_string(
 669                 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 670                 self._err_file if quiet else self._screen_file)
 671
 672     def to_stderr(self, message):
 673         """Print message to stderr"""
 674         assert isinstance(message, compat_str)
 675         if self.params.get('logger'):
 676             self.params['logger'].error(message)
 677         else:
 678             self._write_string('%s\n' % self._bidi_workaround(message), self._err_file)
 679
 680     def to_console_title(self, message):
 681         if not self.params.get('consoletitle', False):
 682             return
 683         if compat_os_name == 'nt':
 684             if ctypes.windll.kernel32.GetConsoleWindow():
 685                 # c_wchar_p() might not be necessary if `message` is
 686                 # already of type unicode()
 687                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 688         elif 'TERM' in os.environ:
 689             self._write_string('\033]0;%s\007' % message, self._screen_file)
 690
 691     def save_console_title(self):
 692         if not self.params.get('consoletitle', False):
 693             return
 694         if self.params.get('simulate', False):
 695             return
 696         if compat_os_name != 'nt' and 'TERM' in os.environ:
 697             # Save the title on stack
 698             self._write_string('\033[22;0t', self._screen_file)
 699
 700     def restore_console_title(self):
 701         if not self.params.get('consoletitle', False):
 702             return
 703         if self.params.get('simulate', False):
 704             return
 705         if compat_os_name != 'nt' and 'TERM' in os.environ:
 706             # Restore the title from stack
 707             self._write_string('\033[23;0t', self._screen_file)
 708
 709     def __enter__(self):
 710         self.save_console_title()
 711         return self
 712
 713     def __exit__(self, *args):
 714         self.restore_console_title()
 715
 716         if self.params.get('cookiefile') is not None:
 717             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 718
 719     def trouble(self, message=None, tb=None):
 720         """Determine action to take when a download problem appears.
 721
 722         Depending on if the downloader has been configured to ignore
 723         download errors or not, this method may throw an exception or
 724         not when errors are found, after printing the message.
 725
 726         tb, if given, is additional traceback information.
 727         """
 728         if message is not None:
 729             self.to_stderr(message)
 730         if self.params.get('verbose'):
 731             if tb is None:
 732                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 733                     tb = ''
 734                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 735                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 736                     tb += encode_compat_str(traceback.format_exc())
 737                 else:
 738                     tb_data = traceback.format_list(traceback.extract_stack())
 739                     tb = ''.join(tb_data)
 740             if tb:
 741                 self.to_stderr(tb)
 742         if not self.params.get('ignoreerrors', False):
 743             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 744                 exc_info = sys.exc_info()[1].exc_info
 745             else:
 746                 exc_info = sys.exc_info()
 747             raise DownloadError(message, exc_info)
 748         self._download_retcode = 1
 749
 750     def to_screen(self, message, skip_eol=False):
 751         """Print message to stdout if not in quiet mode"""
 752         self.to_stdout(
 753             message, skip_eol, quiet=self.params.get('quiet', False))
 754
 755     def report_warning(self, message):
 756         '''
 757         Print the message to stderr, it will be prefixed with 'WARNING:'
 758         If stderr is a tty file the 'WARNING:' will be colored
 759         '''
 760         if self.params.get('logger') is not None:
 761             self.params['logger'].warning(message)
 762         else:
 763             if self.params.get('no_warnings'):
 764                 return
 765             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 766                 _msg_header = '\033[0;33mWARNING:\033[0m'
 767             else:
 768                 _msg_header = 'WARNING:'
 769             warning_message = '%s %s' % (_msg_header, message)
 770             self.to_stderr(warning_message)
 771
 772     def report_error(self, message, tb=None):
 773         '''
 774         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 775         in red if stderr is a tty file.
 776         '''
 777         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 778             _msg_header = '\033[0;31mERROR:\033[0m'
 779         else:
 780             _msg_header = 'ERROR:'
 781         error_message = '%s %s' % (_msg_header, message)
 782         self.trouble(error_message, tb)
 783
 784     def write_debug(self, message):
 785         '''Log debug message or Print message to stderr'''
 786         if not self.params.get('verbose', False):
 787             return
 788         message = '[debug] %s' % message
 789         if self.params.get('logger'):
 790             self.params['logger'].debug(message)
 791         else:
 792             self._write_string('%s\n' % message)
 793
 794     def report_file_already_downloaded(self, file_name):
 795         """Report file has already been fully downloaded."""
 796         try:
 797             self.to_screen('[download] %s has already been downloaded' % file_name)
 798         except UnicodeEncodeError:
 799             self.to_screen('[download] The file has already been downloaded')
 800
 801     def report_file_delete(self, file_name):
 802         """Report that existing file will be deleted."""
 803         try:
 804             self.to_screen('Deleting existing file %s' % file_name)
 805         except UnicodeEncodeError:
 806             self.to_screen('Deleting existing file')
 807
 808     def parse_outtmpl(self):
 809         outtmpl_dict = self.params.get('outtmpl', {})
 810         if not isinstance(outtmpl_dict, dict):
 811             outtmpl_dict = {'default': outtmpl_dict}
 812         outtmpl_dict.update({
 813             k: v for k, v in DEFAULT_OUTTMPL.items()
 814             if not outtmpl_dict.get(k)})
 815         for key, val in outtmpl_dict.items():
 816             if isinstance(val, bytes):
 817                 self.report_warning(
 818                     'Parameter outtmpl is bytes, but should be a unicode string. '
 819                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 820         return outtmpl_dict
 821
 822     def get_output_path(self, dir_type='', filename=None):
 823         paths = self.params.get('paths', {})
 824         assert isinstance(paths, dict)
 825         path = os.path.join(
 826             expand_path(paths.get('home', '').strip()),
 827             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
 828             filename or '')
 829
 830         # Temporary fix for #4787
 831         # 'Treat' all problem characters by passing filename through preferredencoding
 832         # to workaround encoding issues with subprocess on python2 @ Windows
 833         if sys.version_info < (3, 0) and sys.platform == 'win32':
 834             path = encodeFilename(path, True).decode(preferredencoding())
 835         return sanitize_path(path, force=self.params.get('windowsfilenames'))
 836
 837     @staticmethod
 838     def validate_outtmpl(tmpl):
 839         ''' @return None or Exception object '''
 840         try:
 841             re.sub(
 842                 STR_FORMAT_RE.format(''),
 843                 lambda mobj: ('%' if not mobj.group('has_key') else '') + mobj.group(0),
 844                 tmpl
 845             ) % collections.defaultdict(int)
 846             return None
 847         except ValueError as err:
 848             return err
 849
 850     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
 851         """ Make the template and info_dict suitable for substitution (outtmpl % info_dict)"""
 852         info_dict = dict(info_dict)
 853         na = self.params.get('outtmpl_na_placeholder', 'NA')
 854
 855         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
 856             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
 857             if info_dict.get('duration', None) is not None
 858             else None)
 859         info_dict['epoch'] = int(time.time())
 860         info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
 861         if info_dict.get('resolution') is None:
 862             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
 863
 864         # For fields playlist_index and autonumber convert all occurrences
 865         # of %(field)s to %(field)0Nd for backward compatibility
 866         field_size_compat_map = {
 867             'playlist_index': len(str(info_dict.get('_last_playlist_index') or '')),
 868             'autonumber': self.params.get('autonumber_size') or 5,
 869         }
 870
 871         TMPL_DICT = {}
 872         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE.format('[^)]*'))
 873         MATH_FUNCTIONS = {
 874             '+': float.__add__,
 875             '-': float.__sub__,
 876         }
 877         # Field is of the form key1.key2...
 878         # where keys (except first) can be string, int or slice
 879         FIELD_RE = r'\w+(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
 880         MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
 881         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
 882         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
 883             (?P<negate>-)?
 884             (?P<fields>{field})
 885             (?P<maths>(?:{math_op}{math_field})*)
 886             (?:>(?P<strf_format>.+?))?
 887             (?:\|(?P<default>.*?))?
 888             $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
 889
 890         get_key = lambda k: traverse_obj(
 891             info_dict, k.split('.'), is_user_input=True, traverse_string=True)
 892
 893         def get_value(mdict):
 894             # Object traversal
 895             value = get_key(mdict['fields'])
 896             # Negative
 897             if mdict['negate']:
 898                 value = float_or_none(value)
 899                 if value is not None:
 900                     value *= -1
 901             # Do maths
 902             offset_key = mdict['maths']
 903             if offset_key:
 904                 value = float_or_none(value)
 905                 operator = None
 906                 while offset_key:
 907                     item = re.match(
 908                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
 909                         offset_key).group(0)
 910                     offset_key = offset_key[len(item):]
 911                     if operator is None:
 912                         operator = MATH_FUNCTIONS[item]
 913                         continue
 914                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
 915                     offset = float_or_none(item)
 916                     if offset is None:
 917                         offset = float_or_none(get_key(item))
 918                     try:
 919                         value = operator(value, multiplier * offset)
 920                     except (TypeError, ZeroDivisionError):
 921                         return None
 922                     operator = None
 923             # Datetime formatting
 924             if mdict['strf_format']:
 925                 value = strftime_or_none(value, mdict['strf_format'])
 926
 927             return value
 928
 929         def create_key(outer_mobj):
 930             if not outer_mobj.group('has_key'):
 931                 return '%{}'.format(outer_mobj.group(0))
 932
 933             key = outer_mobj.group('key')
 934             fmt = outer_mobj.group('format')
 935             mobj = re.match(INTERNAL_FORMAT_RE, key)
 936             if mobj is None:
 937                 value, default, mobj = None, na, {'fields': ''}
 938             else:
 939                 mobj = mobj.groupdict()
 940                 default = mobj['default'] if mobj['default'] is not None else na
 941                 value = get_value(mobj)
 942
 943             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
 944                 fmt = '0{:d}d'.format(field_size_compat_map[key])
 945
 946             value = default if value is None else value
 947
 948             if fmt == 'c':
 949                 value = compat_str(value)
 950                 if value is None:
 951                     value, fmt = default, 's'
 952                 else:
 953                     value = value[0]
 954             elif fmt[-1] not in 'rs':  # numeric
 955                 value = float_or_none(value)
 956                 if value is None:
 957                     value, fmt = default, 's'
 958             if sanitize:
 959                 if fmt[-1] == 'r':
 960                     # If value is an object, sanitize might convert it to a string
 961                     # So we convert it to repr first
 962                     value, fmt = repr(value), '%ss' % fmt[:-1]
 963                 if fmt[-1] in 'csr':
 964                     value = sanitize(mobj['fields'].split('.')[-1], value)
 965             key += '\0%s' % fmt
 966             TMPL_DICT[key] = value
 967             return '%({key}){fmt}'.format(key=key, fmt=fmt)
 968
 969         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
 970
 971     def _prepare_filename(self, info_dict, tmpl_type='default'):
 972         try:
 973             sanitize = lambda k, v: sanitize_filename(
 974                 compat_str(v),
 975                 restricted=self.params.get('restrictfilenames'),
 976                 is_id=(k == 'id' or k.endswith('_id')))
 977             outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
 978             outtmpl, template_dict = self.prepare_outtmpl(outtmpl, info_dict, sanitize)
 979
 980             # expand_path translates '%%' into '%' and '$$' into '$'
 981             # correspondingly that is not what we want since we need to keep
 982             # '%%' intact for template dict substitution step. Working around
 983             # with boundary-alike separator hack.
 984             sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 985             outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 986
 987             # outtmpl should be expand_path'ed before template dict substitution
 988             # because meta fields may contain env variables we don't want to
 989             # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 990             # title "Hello $PATH", we don't want `$PATH` to be expanded.
 991             filename = expand_path(outtmpl).replace(sep, '') % template_dict
 992
 993             force_ext = OUTTMPL_TYPES.get(tmpl_type)
 994             if force_ext is not None:
 995                 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
 996
 997             # https://github.com/blackjack4494/youtube-dlc/issues/85
 998             trim_file_name = self.params.get('trim_file_name', False)
 999             if trim_file_name:
1000                 fn_groups = filename.rsplit('.')
1001                 ext = fn_groups[-1]
1002                 sub_ext = ''
1003                 if len(fn_groups) > 2:
1004                     sub_ext = fn_groups[-2]
1005                 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
1006
1007             return filename
1008         except ValueError as err:
1009             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1010             return None
1011
1012     def prepare_filename(self, info_dict, dir_type='', warn=False):
1013         """Generate the output filename."""
1014
1015         filename = self._prepare_filename(info_dict, dir_type or 'default')
1016
1017         if warn and not self.__prepare_filename_warned:
1018             if not self.params.get('paths'):
1019                 pass
1020             elif filename == '-':
1021                 self.report_warning('--paths is ignored when an outputting to stdout')
1022             elif os.path.isabs(filename):
1023                 self.report_warning('--paths is ignored since an absolute path is given in output template')
1024             self.__prepare_filename_warned = True
1025         if filename == '-' or not filename:
1026             return filename
1027
1028         return self.get_output_path(dir_type, filename)
1029
1030     def _match_entry(self, info_dict, incomplete=False, silent=False):
1031         """ Returns None if the file should be downloaded """
1032
1033         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1034
1035         def check_filter():
1036             if 'title' in info_dict:
1037                 # This can happen when we're just evaluating the playlist
1038                 title = info_dict['title']
1039                 matchtitle = self.params.get('matchtitle', False)
1040                 if matchtitle:
1041                     if not re.search(matchtitle, title, re.IGNORECASE):
1042                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1043                 rejecttitle = self.params.get('rejecttitle', False)
1044                 if rejecttitle:
1045                     if re.search(rejecttitle, title, re.IGNORECASE):
1046                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1047             date = info_dict.get('upload_date')
1048             if date is not None:
1049                 dateRange = self.params.get('daterange', DateRange())
1050                 if date not in dateRange:
1051                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1052             view_count = info_dict.get('view_count')
1053             if view_count is not None:
1054                 min_views = self.params.get('min_views')
1055                 if min_views is not None and view_count < min_views:
1056                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1057                 max_views = self.params.get('max_views')
1058                 if max_views is not None and view_count > max_views:
1059                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1060             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1061                 return 'Skipping "%s" because it is age restricted' % video_title
1062
1063             if not incomplete:
1064                 match_filter = self.params.get('match_filter')
1065                 if match_filter is not None:
1066                     ret = match_filter(info_dict)
1067                     if ret is not None:
1068                         return ret
1069             return None
1070
1071         if self.in_download_archive(info_dict):
1072             reason = '%s has already been recorded in the archive' % video_title
1073             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1074         else:
1075             reason = check_filter()
1076             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1077         if reason is not None:
1078             if not silent:
1079                 self.to_screen('[download] ' + reason)
1080             if self.params.get(break_opt, False):
1081                 raise break_err()
1082         return reason
1083
1084     @staticmethod
1085     def add_extra_info(info_dict, extra_info):
1086         '''Set the keys from extra_info in info dict if they are missing'''
1087         for key, value in extra_info.items():
1088             info_dict.setdefault(key, value)
1089
1090     def extract_info(self, url, download=True, ie_key=None, extra_info={},
1091                      process=True, force_generic_extractor=False):
1092         """
1093         Return a list with a dictionary for each video extracted.
1094
1095         Arguments:
1096         url -- URL to extract
1097
1098         Keyword arguments:
1099         download -- whether to download videos during extraction
1100         ie_key -- extractor key hint
1101         extra_info -- dictionary containing the extra values to add to each result
1102         process -- whether to resolve all unresolved references (URLs, playlist items),
1103             must be True for download to work.
1104         force_generic_extractor -- force using the generic extractor
1105         """
1106
1107         if not ie_key and force_generic_extractor:
1108             ie_key = 'Generic'
1109
1110         if ie_key:
1111             ies = [self.get_info_extractor(ie_key)]
1112         else:
1113             ies = self._ies
1114
1115         for ie in ies:
1116             if not ie.suitable(url):
1117                 continue
1118
1119             ie_key = ie.ie_key()
1120             ie = self.get_info_extractor(ie_key)
1121             if not ie.working():
1122                 self.report_warning('The program functionality for this site has been marked as broken, '
1123                                     'and will probably not work.')
1124
1125             try:
1126                 temp_id = str_or_none(
1127                     ie.extract_id(url) if callable(getattr(ie, 'extract_id', None))
1128                     else ie._match_id(url))
1129             except (AssertionError, IndexError, AttributeError):
1130                 temp_id = None
1131             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1132                 self.to_screen("[%s] %s: has already been recorded in archive" % (
1133                                ie_key, temp_id))
1134                 break
1135             return self.__extract_info(url, ie, download, extra_info, process)
1136         else:
1137             self.report_error('no suitable InfoExtractor for URL %s' % url)
1138
1139     def __handle_extraction_exceptions(func):
1140         def wrapper(self, *args, **kwargs):
1141             try:
1142                 return func(self, *args, **kwargs)
1143             except GeoRestrictedError as e:
1144                 msg = e.msg
1145                 if e.countries:
1146                     msg += '\nThis video is available in %s.' % ', '.join(
1147                         map(ISO3166Utils.short2full, e.countries))
1148                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1149                 self.report_error(msg)
1150             except ExtractorError as e:  # An error we somewhat expected
1151                 self.report_error(compat_str(e), e.format_traceback())
1152             except ThrottledDownload:
1153                 self.to_stderr('\r')
1154                 self.report_warning('The download speed is below throttle limit. Re-extracting data')
1155                 return wrapper(self, *args, **kwargs)
1156             except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
1157                 raise
1158             except Exception as e:
1159                 if self.params.get('ignoreerrors', False):
1160                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
1161                 else:
1162                     raise
1163         return wrapper
1164
1165     @__handle_extraction_exceptions
1166     def __extract_info(self, url, ie, download, extra_info, process):
1167         ie_result = ie.extract(url)
1168         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1169             return
1170         if isinstance(ie_result, list):
1171             # Backwards compatibility: old IE result format
1172             ie_result = {
1173                 '_type': 'compat_list',
1174                 'entries': ie_result,
1175             }
1176         self.add_default_extra_info(ie_result, ie, url)
1177         if process:
1178             return self.process_ie_result(ie_result, download, extra_info)
1179         else:
1180             return ie_result
1181
1182     def add_default_extra_info(self, ie_result, ie, url):
1183         if url is not None:
1184             self.add_extra_info(ie_result, {
1185                 'webpage_url': url,
1186                 'original_url': url,
1187                 'webpage_url_basename': url_basename(url),
1188             })
1189         if ie is not None:
1190             self.add_extra_info(ie_result, {
1191                 'extractor': ie.IE_NAME,
1192                 'extractor_key': ie.ie_key(),
1193             })
1194
1195     def process_ie_result(self, ie_result, download=True, extra_info={}):
1196         """
1197         Take the result of the ie(may be modified) and resolve all unresolved
1198         references (URLs, playlist items).
1199
1200         It will also download the videos if 'download'.
1201         Returns the resolved ie_result.
1202         """
1203         result_type = ie_result.get('_type', 'video')
1204
1205         if result_type in ('url', 'url_transparent'):
1206             ie_result['url'] = sanitize_url(ie_result['url'])
1207             extract_flat = self.params.get('extract_flat', False)
1208             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1209                     or extract_flat is True):
1210                 info_copy = ie_result.copy()
1211                 self.add_extra_info(info_copy, extra_info)
1212                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1213                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1214                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1215                 return ie_result
1216
1217         if result_type == 'video':
1218             self.add_extra_info(ie_result, extra_info)
1219             ie_result = self.process_video_result(ie_result, download=download)
1220             additional_urls = (ie_result or {}).get('additional_urls')
1221             if additional_urls:
1222                 # TODO: Improve MetadataFromFieldPP to allow setting a list
1223                 if isinstance(additional_urls, compat_str):
1224                     additional_urls = [additional_urls]
1225                 self.to_screen(
1226                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1227                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1228                 ie_result['additional_entries'] = [
1229                     self.extract_info(
1230                         url, download, extra_info,
1231                         force_generic_extractor=self.params.get('force_generic_extractor'))
1232                     for url in additional_urls
1233                 ]
1234             return ie_result
1235         elif result_type == 'url':
1236             # We have to add extra_info to the results because it may be
1237             # contained in a playlist
1238             return self.extract_info(
1239                 ie_result['url'], download,
1240                 ie_key=ie_result.get('ie_key'),
1241                 extra_info=extra_info)
1242         elif result_type == 'url_transparent':
1243             # Use the information from the embedding page
1244             info = self.extract_info(
1245                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1246                 extra_info=extra_info, download=False, process=False)
1247
1248             # extract_info may return None when ignoreerrors is enabled and
1249             # extraction failed with an error, don't crash and return early
1250             # in this case
1251             if not info:
1252                 return info
1253
1254             force_properties = dict(
1255                 (k, v) for k, v in ie_result.items() if v is not None)
1256             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1257                 if f in force_properties:
1258                     del force_properties[f]
1259             new_result = info.copy()
1260             new_result.update(force_properties)
1261
1262             # Extracted info may not be a video result (i.e.
1263             # info.get('_type', 'video') != video) but rather an url or
1264             # url_transparent. In such cases outer metadata (from ie_result)
1265             # should be propagated to inner one (info). For this to happen
1266             # _type of info should be overridden with url_transparent. This
1267             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1268             if new_result.get('_type') == 'url':
1269                 new_result['_type'] = 'url_transparent'
1270
1271             return self.process_ie_result(
1272                 new_result, download=download, extra_info=extra_info)
1273         elif result_type in ('playlist', 'multi_video'):
1274             # Protect from infinite recursion due to recursively nested playlists
1275             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1276             webpage_url = ie_result['webpage_url']
1277             if webpage_url in self._playlist_urls:
1278                 self.to_screen(
1279                     '[download] Skipping already downloaded playlist: %s'
1280                     % ie_result.get('title') or ie_result.get('id'))
1281                 return
1282
1283             self._playlist_level += 1
1284             self._playlist_urls.add(webpage_url)
1285             self._sanitize_thumbnails(ie_result)
1286             try:
1287                 return self.__process_playlist(ie_result, download)
1288             finally:
1289                 self._playlist_level -= 1
1290                 if not self._playlist_level:
1291                     self._playlist_urls.clear()
1292         elif result_type == 'compat_list':
1293             self.report_warning(
1294                 'Extractor %s returned a compat_list result. '
1295                 'It needs to be updated.' % ie_result.get('extractor'))
1296
1297             def _fixup(r):
1298                 self.add_extra_info(
1299                     r,
1300                     {
1301                         'extractor': ie_result['extractor'],
1302                         'webpage_url': ie_result['webpage_url'],
1303                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
1304                         'extractor_key': ie_result['extractor_key'],
1305                     }
1306                 )
1307                 return r
1308             ie_result['entries'] = [
1309                 self.process_ie_result(_fixup(r), download, extra_info)
1310                 for r in ie_result['entries']
1311             ]
1312             return ie_result
1313         else:
1314             raise Exception('Invalid result type: %s' % result_type)
1315
1316     def _ensure_dir_exists(self, path):
1317         return make_dir(path, self.report_error)
1318
1319     def __process_playlist(self, ie_result, download):
1320         # We process each entry in the playlist
1321         playlist = ie_result.get('title') or ie_result.get('id')
1322         self.to_screen('[download] Downloading playlist: %s' % playlist)
1323
1324         if 'entries' not in ie_result:
1325             raise EntryNotInPlaylist()
1326         incomplete_entries = bool(ie_result.get('requested_entries'))
1327         if incomplete_entries:
1328             def fill_missing_entries(entries, indexes):
1329                 ret = [None] * max(*indexes)
1330                 for i, entry in zip(indexes, entries):
1331                     ret[i - 1] = entry
1332                 return ret
1333             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1334
1335         playlist_results = []
1336
1337         playliststart = self.params.get('playliststart', 1)
1338         playlistend = self.params.get('playlistend')
1339         # For backwards compatibility, interpret -1 as whole list
1340         if playlistend == -1:
1341             playlistend = None
1342
1343         playlistitems_str = self.params.get('playlist_items')
1344         playlistitems = None
1345         if playlistitems_str is not None:
1346             def iter_playlistitems(format):
1347                 for string_segment in format.split(','):
1348                     if '-' in string_segment:
1349                         start, end = string_segment.split('-')
1350                         for item in range(int(start), int(end) + 1):
1351                             yield int(item)
1352                     else:
1353                         yield int(string_segment)
1354             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1355
1356         ie_entries = ie_result['entries']
1357         msg = (
1358             'Downloading %d videos' if not isinstance(ie_entries, list)
1359             else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
1360         if not isinstance(ie_entries, (list, PagedList)):
1361             ie_entries = LazyList(ie_entries)
1362
1363         entries = []
1364         for i in playlistitems or itertools.count(playliststart):
1365             if playlistitems is None and playlistend is not None and playlistend < i:
1366                 break
1367             entry = None
1368             try:
1369                 entry = ie_entries[i - 1]
1370                 if entry is None:
1371                     raise EntryNotInPlaylist()
1372             except (IndexError, EntryNotInPlaylist):
1373                 if incomplete_entries:
1374                     raise EntryNotInPlaylist()
1375                 elif not playlistitems:
1376                     break
1377             entries.append(entry)
1378             try:
1379                 if entry is not None:
1380                     self._match_entry(entry, incomplete=True, silent=True)
1381             except (ExistingVideoReached, RejectedVideoReached):
1382                 break
1383         ie_result['entries'] = entries
1384
1385         # Save playlist_index before re-ordering
1386         entries = [
1387             ((playlistitems[i - 1] if playlistitems else i), entry)
1388             for i, entry in enumerate(entries, 1)
1389             if entry is not None]
1390         n_entries = len(entries)
1391
1392         if not playlistitems and (playliststart or playlistend):
1393             playlistitems = list(range(playliststart, playliststart + n_entries))
1394         ie_result['requested_entries'] = playlistitems
1395
1396         if self.params.get('allow_playlist_files', True):
1397             ie_copy = {
1398                 'playlist': playlist,
1399                 'playlist_id': ie_result.get('id'),
1400                 'playlist_title': ie_result.get('title'),
1401                 'playlist_uploader': ie_result.get('uploader'),
1402                 'playlist_uploader_id': ie_result.get('uploader_id'),
1403                 'playlist_index': 0,
1404             }
1405             ie_copy.update(dict(ie_result))
1406
1407             if self.params.get('writeinfojson', False):
1408                 infofn = self.prepare_filename(ie_copy, 'pl_infojson')
1409                 if not self._ensure_dir_exists(encodeFilename(infofn)):
1410                     return
1411                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
1412                     self.to_screen('[info] Playlist metadata is already present')
1413                 else:
1414                     self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
1415                     try:
1416                         write_json_file(self.filter_requested_info(ie_result, self.params.get('clean_infojson', True)), infofn)
1417                     except (OSError, IOError):
1418                         self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
1419
1420             # TODO: This should be passed to ThumbnailsConvertor if necessary
1421             self._write_thumbnails(ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1422
1423             if self.params.get('writedescription', False):
1424                 descfn = self.prepare_filename(ie_copy, 'pl_description')
1425                 if not self._ensure_dir_exists(encodeFilename(descfn)):
1426                     return
1427                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
1428                     self.to_screen('[info] Playlist description is already present')
1429                 elif ie_result.get('description') is None:
1430                     self.report_warning('There\'s no playlist description to write.')
1431                 else:
1432                     try:
1433                         self.to_screen('[info] Writing playlist description to: ' + descfn)
1434                         with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1435                             descfile.write(ie_result['description'])
1436                     except (OSError, IOError):
1437                         self.report_error('Cannot write playlist description file ' + descfn)
1438                         return
1439
1440         if self.params.get('playlistreverse', False):
1441             entries = entries[::-1]
1442         if self.params.get('playlistrandom', False):
1443             random.shuffle(entries)
1444
1445         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1446
1447         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1448         failures = 0
1449         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1450         for i, entry_tuple in enumerate(entries, 1):
1451             playlist_index, entry = entry_tuple
1452             if 'playlist_index' in self.params.get('compat_options', []):
1453                 playlist_index = playlistitems[i - 1] if playlistitems else i
1454             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1455             # This __x_forwarded_for_ip thing is a bit ugly but requires
1456             # minimal changes
1457             if x_forwarded_for:
1458                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1459             extra = {
1460                 'n_entries': n_entries,
1461                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1462                 'playlist_index': playlist_index,
1463                 'playlist_autonumber': i,
1464                 'playlist': playlist,
1465                 'playlist_id': ie_result.get('id'),
1466                 'playlist_title': ie_result.get('title'),
1467                 'playlist_uploader': ie_result.get('uploader'),
1468                 'playlist_uploader_id': ie_result.get('uploader_id'),
1469                 'extractor': ie_result['extractor'],
1470                 'webpage_url': ie_result['webpage_url'],
1471                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1472                 'extractor_key': ie_result['extractor_key'],
1473             }
1474
1475             if self._match_entry(entry, incomplete=True) is not None:
1476                 continue
1477
1478             entry_result = self.__process_iterable_entry(entry, download, extra)
1479             if not entry_result:
1480                 failures += 1
1481             if failures >= max_failures:
1482                 self.report_error(
1483                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1484                 break
1485             # TODO: skip failed (empty) entries?
1486             playlist_results.append(entry_result)
1487         ie_result['entries'] = playlist_results
1488         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1489         return ie_result
1490
1491     @__handle_extraction_exceptions
1492     def __process_iterable_entry(self, entry, download, extra_info):
1493         return self.process_ie_result(
1494             entry, download=download, extra_info=extra_info)
1495
1496     def _build_format_filter(self, filter_spec):
1497         " Returns a function to filter the formats according to the filter_spec "
1498
1499         OPERATORS = {
1500             '<': operator.lt,
1501             '<=': operator.le,
1502             '>': operator.gt,
1503             '>=': operator.ge,
1504             '=': operator.eq,
1505             '!=': operator.ne,
1506         }
1507         operator_rex = re.compile(r'''(?x)\s*
1508             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1509             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1510             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1511             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1512         m = operator_rex.fullmatch(filter_spec)
1513         if m:
1514             try:
1515                 comparison_value = int(m.group('value'))
1516             except ValueError:
1517                 comparison_value = parse_filesize(m.group('value'))
1518                 if comparison_value is None:
1519                     comparison_value = parse_filesize(m.group('value') + 'B')
1520                 if comparison_value is None:
1521                     raise ValueError(
1522                         'Invalid value %r in format specification %r' % (
1523                             m.group('value'), filter_spec))
1524             op = OPERATORS[m.group('op')]
1525
1526         if not m:
1527             STR_OPERATORS = {
1528                 '=': operator.eq,
1529                 '^=': lambda attr, value: attr.startswith(value),
1530                 '$=': lambda attr, value: attr.endswith(value),
1531                 '*=': lambda attr, value: value in attr,
1532             }
1533             str_operator_rex = re.compile(r'''(?x)\s*
1534                 (?P<key>[a-zA-Z0-9._-]+)\s*
1535                 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1536                 (?P<value>[a-zA-Z0-9._-]+)\s*
1537                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1538             m = str_operator_rex.fullmatch(filter_spec)
1539             if m:
1540                 comparison_value = m.group('value')
1541                 str_op = STR_OPERATORS[m.group('op')]
1542                 if m.group('negation'):
1543                     op = lambda attr, value: not str_op(attr, value)
1544                 else:
1545                     op = str_op
1546
1547         if not m:
1548             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1549
1550         def _filter(f):
1551             actual_value = f.get(m.group('key'))
1552             if actual_value is None:
1553                 return m.group('none_inclusive')
1554             return op(actual_value, comparison_value)
1555         return _filter
1556
1557     def _default_format_spec(self, info_dict, download=True):
1558
1559         def can_merge():
1560             merger = FFmpegMergerPP(self)
1561             return merger.available and merger.can_merge()
1562
1563         prefer_best = (
1564             not self.params.get('simulate', False)
1565             and download
1566             and (
1567                 not can_merge()
1568                 or info_dict.get('is_live', False)
1569                 or self.outtmpl_dict['default'] == '-'))
1570         compat = (
1571             prefer_best
1572             or self.params.get('allow_multiple_audio_streams', False)
1573             or 'format-spec' in self.params.get('compat_opts', []))
1574
1575         return (
1576             'best/bestvideo+bestaudio' if prefer_best
1577             else 'bestvideo*+bestaudio/best' if not compat
1578             else 'bestvideo+bestaudio/best')
1579
1580     def build_format_selector(self, format_spec):
1581         def syntax_error(note, start):
1582             message = (
1583                 'Invalid format specification: '
1584                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1585             return SyntaxError(message)
1586
1587         PICKFIRST = 'PICKFIRST'
1588         MERGE = 'MERGE'
1589         SINGLE = 'SINGLE'
1590         GROUP = 'GROUP'
1591         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1592
1593         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1594                                   'video': self.params.get('allow_multiple_video_streams', False)}
1595
1596         check_formats = self.params.get('check_formats')
1597
1598         def _parse_filter(tokens):
1599             filter_parts = []
1600             for type, string, start, _, _ in tokens:
1601                 if type == tokenize.OP and string == ']':
1602                     return ''.join(filter_parts)
1603                 else:
1604                     filter_parts.append(string)
1605
1606         def _remove_unused_ops(tokens):
1607             # Remove operators that we don't use and join them with the surrounding strings
1608             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1609             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1610             last_string, last_start, last_end, last_line = None, None, None, None
1611             for type, string, start, end, line in tokens:
1612                 if type == tokenize.OP and string == '[':
1613                     if last_string:
1614                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1615                         last_string = None
1616                     yield type, string, start, end, line
1617                     # everything inside brackets will be handled by _parse_filter
1618                     for type, string, start, end, line in tokens:
1619                         yield type, string, start, end, line
1620                         if type == tokenize.OP and string == ']':
1621                             break
1622                 elif type == tokenize.OP and string in ALLOWED_OPS:
1623                     if last_string:
1624                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1625                         last_string = None
1626                     yield type, string, start, end, line
1627                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1628                     if not last_string:
1629                         last_string = string
1630                         last_start = start
1631                         last_end = end
1632                     else:
1633                         last_string += string
1634             if last_string:
1635                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1636
1637         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1638             selectors = []
1639             current_selector = None
1640             for type, string, start, _, _ in tokens:
1641                 # ENCODING is only defined in python 3.x
1642                 if type == getattr(tokenize, 'ENCODING', None):
1643                     continue
1644                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1645                     current_selector = FormatSelector(SINGLE, string, [])
1646                 elif type == tokenize.OP:
1647                     if string == ')':
1648                         if not inside_group:
1649                             # ')' will be handled by the parentheses group
1650                             tokens.restore_last_token()
1651                         break
1652                     elif inside_merge and string in ['/', ',']:
1653                         tokens.restore_last_token()
1654                         break
1655                     elif inside_choice and string == ',':
1656                         tokens.restore_last_token()
1657                         break
1658                     elif string == ',':
1659                         if not current_selector:
1660                             raise syntax_error('"," must follow a format selector', start)
1661                         selectors.append(current_selector)
1662                         current_selector = None
1663                     elif string == '/':
1664                         if not current_selector:
1665                             raise syntax_error('"/" must follow a format selector', start)
1666                         first_choice = current_selector
1667                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1668                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1669                     elif string == '[':
1670                         if not current_selector:
1671                             current_selector = FormatSelector(SINGLE, 'best', [])
1672                         format_filter = _parse_filter(tokens)
1673                         current_selector.filters.append(format_filter)
1674                     elif string == '(':
1675                         if current_selector:
1676                             raise syntax_error('Unexpected "("', start)
1677                         group = _parse_format_selection(tokens, inside_group=True)
1678                         current_selector = FormatSelector(GROUP, group, [])
1679                     elif string == '+':
1680                         if not current_selector:
1681                             raise syntax_error('Unexpected "+"', start)
1682                         selector_1 = current_selector
1683                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1684                         if not selector_2:
1685                             raise syntax_error('Expected a selector', start)
1686                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1687                     else:
1688                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1689                 elif type == tokenize.ENDMARKER:
1690                     break
1691             if current_selector:
1692                 selectors.append(current_selector)
1693             return selectors
1694
1695         def _merge(formats_pair):
1696             format_1, format_2 = formats_pair
1697
1698             formats_info = []
1699             formats_info.extend(format_1.get('requested_formats', (format_1,)))
1700             formats_info.extend(format_2.get('requested_formats', (format_2,)))
1701
1702             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1703                 get_no_more = {'video': False, 'audio': False}
1704                 for (i, fmt_info) in enumerate(formats_info):
1705                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
1706                         formats_info.pop(i)
1707                         continue
1708                     for aud_vid in ['audio', 'video']:
1709                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1710                             if get_no_more[aud_vid]:
1711                                 formats_info.pop(i)
1712                             get_no_more[aud_vid] = True
1713
1714             if len(formats_info) == 1:
1715                 return formats_info[0]
1716
1717             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1718             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1719
1720             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1721             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1722
1723             output_ext = self.params.get('merge_output_format')
1724             if not output_ext:
1725                 if the_only_video:
1726                     output_ext = the_only_video['ext']
1727                 elif the_only_audio and not video_fmts:
1728                     output_ext = the_only_audio['ext']
1729                 else:
1730                     output_ext = 'mkv'
1731
1732             new_dict = {
1733                 'requested_formats': formats_info,
1734                 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1735                 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1736                 'ext': output_ext,
1737             }
1738
1739             if the_only_video:
1740                 new_dict.update({
1741                     'width': the_only_video.get('width'),
1742                     'height': the_only_video.get('height'),
1743                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1744                     'fps': the_only_video.get('fps'),
1745                     'vcodec': the_only_video.get('vcodec'),
1746                     'vbr': the_only_video.get('vbr'),
1747                     'stretched_ratio': the_only_video.get('stretched_ratio'),
1748                 })
1749
1750             if the_only_audio:
1751                 new_dict.update({
1752                     'acodec': the_only_audio.get('acodec'),
1753                     'abr': the_only_audio.get('abr'),
1754                 })
1755
1756             return new_dict
1757
1758         def _check_formats(formats):
1759             if not check_formats:
1760                 yield from formats
1761                 return
1762             for f in formats:
1763                 self.to_screen('[info] Testing format %s' % f['format_id'])
1764                 temp_file = tempfile.NamedTemporaryFile(
1765                     suffix='.tmp', delete=False,
1766                     dir=self.get_output_path('temp') or None)
1767                 temp_file.close()
1768                 try:
1769                     success, _ = self.dl(temp_file.name, f, test=True)
1770                 except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1771                     success = False
1772                 finally:
1773                     if os.path.exists(temp_file.name):
1774                         try:
1775                             os.remove(temp_file.name)
1776                         except OSError:
1777                             self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1778                 if success:
1779                     yield f
1780                 else:
1781                     self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1782
1783         def _build_selector_function(selector):
1784             if isinstance(selector, list):  # ,
1785                 fs = [_build_selector_function(s) for s in selector]
1786
1787                 def selector_function(ctx):
1788                     for f in fs:
1789                         yield from f(ctx)
1790                 return selector_function
1791
1792             elif selector.type == GROUP:  # ()
1793                 selector_function = _build_selector_function(selector.selector)
1794
1795             elif selector.type == PICKFIRST:  # /
1796                 fs = [_build_selector_function(s) for s in selector.selector]
1797
1798                 def selector_function(ctx):
1799                     for f in fs:
1800                         picked_formats = list(f(ctx))
1801                         if picked_formats:
1802                             return picked_formats
1803                     return []
1804
1805             elif selector.type == MERGE:  # +
1806                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1807
1808                 def selector_function(ctx):
1809                     for pair in itertools.product(
1810                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1811                         yield _merge(pair)
1812
1813             elif selector.type == SINGLE:  # atom
1814                 format_spec = selector.selector or 'best'
1815
1816                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
1817                 if format_spec == 'all':
1818                     def selector_function(ctx):
1819                         yield from _check_formats(ctx['formats'])
1820                 elif format_spec == 'mergeall':
1821                     def selector_function(ctx):
1822                         formats = list(_check_formats(ctx['formats']))
1823                         if not formats:
1824                             return
1825                         merged_format = formats[-1]
1826                         for f in formats[-2::-1]:
1827                             merged_format = _merge((merged_format, f))
1828                         yield merged_format
1829
1830                 else:
1831                     format_fallback, format_reverse, format_idx = False, True, 1
1832                     mobj = re.match(
1833                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
1834                         format_spec)
1835                     if mobj is not None:
1836                         format_idx = int_or_none(mobj.group('n'), default=1)
1837                         format_reverse = mobj.group('bw')[0] == 'b'
1838                         format_type = (mobj.group('type') or [None])[0]
1839                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
1840                         format_modified = mobj.group('mod') is not None
1841
1842                         format_fallback = not format_type and not format_modified  # for b, w
1843                         _filter_f = (
1844                             (lambda f: f.get('%scodec' % format_type) != 'none')
1845                             if format_type and format_modified  # bv*, ba*, wv*, wa*
1846                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
1847                             if format_type  # bv, ba, wv, wa
1848                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1849                             if not format_modified  # b, w
1850                             else lambda f: True)  # b*, w*
1851                         filter_f = lambda f: _filter_f(f) and (
1852                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
1853                     else:
1854                         filter_f = ((lambda f: f.get('ext') == format_spec)
1855                                     if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']  # extension
1856                                     else (lambda f: f.get('format_id') == format_spec))  # id
1857
1858                     def selector_function(ctx):
1859                         formats = list(ctx['formats'])
1860                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
1861                         if format_fallback and ctx['incomplete_formats'] and not matches:
1862                             # for extractors with incomplete formats (audio only (soundcloud)
1863                             # or video only (imgur)) best/worst will fallback to
1864                             # best/worst {video,audio}-only format
1865                             matches = formats
1866                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
1867                         try:
1868                             yield matches[format_idx - 1]
1869                         except IndexError:
1870                             return
1871
1872             filters = [self._build_format_filter(f) for f in selector.filters]
1873
1874             def final_selector(ctx):
1875                 ctx_copy = copy.deepcopy(ctx)
1876                 for _filter in filters:
1877                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1878                 return selector_function(ctx_copy)
1879             return final_selector
1880
1881         stream = io.BytesIO(format_spec.encode('utf-8'))
1882         try:
1883             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1884         except tokenize.TokenError:
1885             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1886
1887         class TokenIterator(object):
1888             def __init__(self, tokens):
1889                 self.tokens = tokens
1890                 self.counter = 0
1891
1892             def __iter__(self):
1893                 return self
1894
1895             def __next__(self):
1896                 if self.counter >= len(self.tokens):
1897                     raise StopIteration()
1898                 value = self.tokens[self.counter]
1899                 self.counter += 1
1900                 return value
1901
1902             next = __next__
1903
1904             def restore_last_token(self):
1905                 self.counter -= 1
1906
1907         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1908         return _build_selector_function(parsed_selector)
1909
1910     def _calc_headers(self, info_dict):
1911         res = std_headers.copy()
1912
1913         add_headers = info_dict.get('http_headers')
1914         if add_headers:
1915             res.update(add_headers)
1916
1917         cookies = self._calc_cookies(info_dict)
1918         if cookies:
1919             res['Cookie'] = cookies
1920
1921         if 'X-Forwarded-For' not in res:
1922             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1923             if x_forwarded_for_ip:
1924                 res['X-Forwarded-For'] = x_forwarded_for_ip
1925
1926         return res
1927
1928     def _calc_cookies(self, info_dict):
1929         pr = sanitized_Request(info_dict['url'])
1930         self.cookiejar.add_cookie_header(pr)
1931         return pr.get_header('Cookie')
1932
1933     def _sanitize_thumbnails(self, info_dict):
1934         thumbnails = info_dict.get('thumbnails')
1935         if thumbnails is None:
1936             thumbnail = info_dict.get('thumbnail')
1937             if thumbnail:
1938                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1939         if thumbnails:
1940             thumbnails.sort(key=lambda t: (
1941                 t.get('preference') if t.get('preference') is not None else -1,
1942                 t.get('width') if t.get('width') is not None else -1,
1943                 t.get('height') if t.get('height') is not None else -1,
1944                 t.get('id') if t.get('id') is not None else '',
1945                 t.get('url')))
1946
1947             def test_thumbnail(t):
1948                 self.to_screen('[info] Testing thumbnail %s' % t['id'])
1949                 try:
1950                     self.urlopen(HEADRequest(t['url']))
1951                 except network_exceptions as err:
1952                     self.to_screen('[info] Unable to connect to thumbnail %s URL "%s" - %s. Skipping...' % (
1953                         t['id'], t['url'], error_to_compat_str(err)))
1954                     return False
1955                 return True
1956
1957             for i, t in enumerate(thumbnails):
1958                 if t.get('id') is None:
1959                     t['id'] = '%d' % i
1960                 if t.get('width') and t.get('height'):
1961                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1962                 t['url'] = sanitize_url(t['url'])
1963             if self.params.get('check_formats'):
1964                 info_dict['thumbnails'] = LazyList(filter(test_thumbnail, thumbnails[::-1])).reverse()
1965
1966     def process_video_result(self, info_dict, download=True):
1967         assert info_dict.get('_type', 'video') == 'video'
1968
1969         if 'id' not in info_dict:
1970             raise ExtractorError('Missing "id" field in extractor result')
1971         if 'title' not in info_dict:
1972             raise ExtractorError('Missing "title" field in extractor result')
1973
1974         def report_force_conversion(field, field_not, conversion):
1975             self.report_warning(
1976                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1977                 % (field, field_not, conversion))
1978
1979         def sanitize_string_field(info, string_field):
1980             field = info.get(string_field)
1981             if field is None or isinstance(field, compat_str):
1982                 return
1983             report_force_conversion(string_field, 'a string', 'string')
1984             info[string_field] = compat_str(field)
1985
1986         def sanitize_numeric_fields(info):
1987             for numeric_field in self._NUMERIC_FIELDS:
1988                 field = info.get(numeric_field)
1989                 if field is None or isinstance(field, compat_numeric_types):
1990                     continue
1991                 report_force_conversion(numeric_field, 'numeric', 'int')
1992                 info[numeric_field] = int_or_none(field)
1993
1994         sanitize_string_field(info_dict, 'id')
1995         sanitize_numeric_fields(info_dict)
1996
1997         if 'playlist' not in info_dict:
1998             # It isn't part of a playlist
1999             info_dict['playlist'] = None
2000             info_dict['playlist_index'] = None
2001
2002         self._sanitize_thumbnails(info_dict)
2003
2004         thumbnail = info_dict.get('thumbnail')
2005         thumbnails = info_dict.get('thumbnails')
2006         if thumbnail:
2007             info_dict['thumbnail'] = sanitize_url(thumbnail)
2008         elif thumbnails:
2009             info_dict['thumbnail'] = thumbnails[-1]['url']
2010
2011         if 'display_id' not in info_dict and 'id' in info_dict:
2012             info_dict['display_id'] = info_dict['id']
2013
2014         for ts_key, date_key in (
2015                 ('timestamp', 'upload_date'),
2016                 ('release_timestamp', 'release_date'),
2017         ):
2018             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2019                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2020                 # see http://bugs.python.org/issue1646728)
2021                 try:
2022                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2023                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2024                 except (ValueError, OverflowError, OSError):
2025                     pass
2026
2027         # Auto generate title fields corresponding to the *_number fields when missing
2028         # in order to always have clean titles. This is very common for TV series.
2029         for field in ('chapter', 'season', 'episode'):
2030             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2031                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2032
2033         for cc_kind in ('subtitles', 'automatic_captions'):
2034             cc = info_dict.get(cc_kind)
2035             if cc:
2036                 for _, subtitle in cc.items():
2037                     for subtitle_format in subtitle:
2038                         if subtitle_format.get('url'):
2039                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2040                         if subtitle_format.get('ext') is None:
2041                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2042
2043         automatic_captions = info_dict.get('automatic_captions')
2044         subtitles = info_dict.get('subtitles')
2045
2046         info_dict['requested_subtitles'] = self.process_subtitles(
2047             info_dict['id'], subtitles, automatic_captions)
2048
2049         # We now pick which formats have to be downloaded
2050         if info_dict.get('formats') is None:
2051             # There's only one format available
2052             formats = [info_dict]
2053         else:
2054             formats = info_dict['formats']
2055
2056         if not formats:
2057             if not self.params.get('ignore_no_formats_error'):
2058                 raise ExtractorError('No video formats found!')
2059             else:
2060                 self.report_warning('No video formats found!')
2061
2062         def is_wellformed(f):
2063             url = f.get('url')
2064             if not url:
2065                 self.report_warning(
2066                     '"url" field is missing or empty - skipping format, '
2067                     'there is an error in extractor')
2068                 return False
2069             if isinstance(url, bytes):
2070                 sanitize_string_field(f, 'url')
2071             return True
2072
2073         # Filter out malformed formats for better extraction robustness
2074         formats = list(filter(is_wellformed, formats))
2075
2076         formats_dict = {}
2077
2078         # We check that all the formats have the format and format_id fields
2079         for i, format in enumerate(formats):
2080             sanitize_string_field(format, 'format_id')
2081             sanitize_numeric_fields(format)
2082             format['url'] = sanitize_url(format['url'])
2083             if not format.get('format_id'):
2084                 format['format_id'] = compat_str(i)
2085             else:
2086                 # Sanitize format_id from characters used in format selector expression
2087                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2088             format_id = format['format_id']
2089             if format_id not in formats_dict:
2090                 formats_dict[format_id] = []
2091             formats_dict[format_id].append(format)
2092
2093         # Make sure all formats have unique format_id
2094         for format_id, ambiguous_formats in formats_dict.items():
2095             if len(ambiguous_formats) > 1:
2096                 for i, format in enumerate(ambiguous_formats):
2097                     format['format_id'] = '%s-%d' % (format_id, i)
2098
2099         for i, format in enumerate(formats):
2100             if format.get('format') is None:
2101                 format['format'] = '{id} - {res}{note}'.format(
2102                     id=format['format_id'],
2103                     res=self.format_resolution(format),
2104                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
2105                 )
2106             # Automatically determine file extension if missing
2107             if format.get('ext') is None:
2108                 format['ext'] = determine_ext(format['url']).lower()
2109             # Automatically determine protocol if missing (useful for format
2110             # selection purposes)
2111             if format.get('protocol') is None:
2112                 format['protocol'] = determine_protocol(format)
2113             # Add HTTP headers, so that external programs can use them from the
2114             # json output
2115             full_format_info = info_dict.copy()
2116             full_format_info.update(format)
2117             format['http_headers'] = self._calc_headers(full_format_info)
2118         # Remove private housekeeping stuff
2119         if '__x_forwarded_for_ip' in info_dict:
2120             del info_dict['__x_forwarded_for_ip']
2121
2122         # TODO Central sorting goes here
2123
2124         if formats and formats[0] is not info_dict:
2125             # only set the 'formats' fields if the original info_dict list them
2126             # otherwise we end up with a circular reference, the first (and unique)
2127             # element in the 'formats' field in info_dict is info_dict itself,
2128             # which can't be exported to json
2129             info_dict['formats'] = formats
2130
2131         info_dict, _ = self.pre_process(info_dict)
2132
2133         list_only = self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles')
2134         if list_only:
2135             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2136             if self.params.get('list_thumbnails'):
2137                 self.list_thumbnails(info_dict)
2138             if self.params.get('listformats'):
2139                 if not info_dict.get('formats'):
2140                     raise ExtractorError('No video formats found', expected=True)
2141                 self.list_formats(info_dict)
2142             if self.params.get('listsubtitles'):
2143                 if 'automatic_captions' in info_dict:
2144                     self.list_subtitles(
2145                         info_dict['id'], automatic_captions, 'automatic captions')
2146                 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2147             return
2148
2149         format_selector = self.format_selector
2150         if format_selector is None:
2151             req_format = self._default_format_spec(info_dict, download=download)
2152             self.write_debug('Default format spec: %s' % req_format)
2153             format_selector = self.build_format_selector(req_format)
2154
2155         # While in format selection we may need to have an access to the original
2156         # format set in order to calculate some metrics or do some processing.
2157         # For now we need to be able to guess whether original formats provided
2158         # by extractor are incomplete or not (i.e. whether extractor provides only
2159         # video-only or audio-only formats) for proper formats selection for
2160         # extractors with such incomplete formats (see
2161         # https://github.com/ytdl-org/youtube-dl/pull/5556).
2162         # Since formats may be filtered during format selection and may not match
2163         # the original formats the results may be incorrect. Thus original formats
2164         # or pre-calculated metrics should be passed to format selection routines
2165         # as well.
2166         # We will pass a context object containing all necessary additional data
2167         # instead of just formats.
2168         # This fixes incorrect format selection issue (see
2169         # https://github.com/ytdl-org/youtube-dl/issues/10083).
2170         incomplete_formats = (
2171             # All formats are video-only or
2172             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2173             # all formats are audio-only
2174             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2175
2176         ctx = {
2177             'formats': formats,
2178             'incomplete_formats': incomplete_formats,
2179         }
2180
2181         formats_to_download = list(format_selector(ctx))
2182         if not formats_to_download:
2183             if not self.params.get('ignore_no_formats_error'):
2184                 raise ExtractorError('Requested format is not available', expected=True)
2185             else:
2186                 self.report_warning('Requested format is not available')
2187                 # Process what we can, even without any available formats.
2188                 self.process_info(dict(info_dict))
2189         elif download:
2190             self.to_screen(
2191                 '[info] %s: Downloading %d format(s): %s' % (
2192                     info_dict['id'], len(formats_to_download),
2193                     ", ".join([f['format_id'] for f in formats_to_download])))
2194             for fmt in formats_to_download:
2195                 new_info = dict(info_dict)
2196                 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2197                 new_info['__original_infodict'] = info_dict
2198                 new_info.update(fmt)
2199                 self.process_info(new_info)
2200         # We update the info dict with the best quality format (backwards compatibility)
2201         if formats_to_download:
2202             info_dict.update(formats_to_download[-1])
2203         return info_dict
2204
2205     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2206         """Select the requested subtitles and their format"""
2207         available_subs = {}
2208         if normal_subtitles and self.params.get('writesubtitles'):
2209             available_subs.update(normal_subtitles)
2210         if automatic_captions and self.params.get('writeautomaticsub'):
2211             for lang, cap_info in automatic_captions.items():
2212                 if lang not in available_subs:
2213                     available_subs[lang] = cap_info
2214
2215         if (not self.params.get('writesubtitles') and not
2216                 self.params.get('writeautomaticsub') or not
2217                 available_subs):
2218             return None
2219
2220         all_sub_langs = available_subs.keys()
2221         if self.params.get('allsubtitles', False):
2222             requested_langs = all_sub_langs
2223         elif self.params.get('subtitleslangs', False):
2224             requested_langs = set()
2225             for lang in self.params.get('subtitleslangs'):
2226                 if lang == 'all':
2227                     requested_langs.update(all_sub_langs)
2228                     continue
2229                 discard = lang[0] == '-'
2230                 if discard:
2231                     lang = lang[1:]
2232                 current_langs = filter(re.compile(lang + '$').match, all_sub_langs)
2233                 if discard:
2234                     for lang in current_langs:
2235                         requested_langs.discard(lang)
2236                 else:
2237                     requested_langs.update(current_langs)
2238         elif 'en' in available_subs:
2239             requested_langs = ['en']
2240         else:
2241             requested_langs = [list(all_sub_langs)[0]]
2242         self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2243
2244         formats_query = self.params.get('subtitlesformat', 'best')
2245         formats_preference = formats_query.split('/') if formats_query else []
2246         subs = {}
2247         for lang in requested_langs:
2248             formats = available_subs.get(lang)
2249             if formats is None:
2250                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2251                 continue
2252             for ext in formats_preference:
2253                 if ext == 'best':
2254                     f = formats[-1]
2255                     break
2256                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2257                 if matches:
2258                     f = matches[-1]
2259                     break
2260             else:
2261                 f = formats[-1]
2262                 self.report_warning(
2263                     'No subtitle format found matching "%s" for language %s, '
2264                     'using %s' % (formats_query, lang, f['ext']))
2265             subs[lang] = f
2266         return subs
2267
2268     def __forced_printings(self, info_dict, filename, incomplete):
2269         def print_mandatory(field, actual_field=None):
2270             if actual_field is None:
2271                 actual_field = field
2272             if (self.params.get('force%s' % field, False)
2273                     and (not incomplete or info_dict.get(actual_field) is not None)):
2274                 self.to_stdout(info_dict[actual_field])
2275
2276         def print_optional(field):
2277             if (self.params.get('force%s' % field, False)
2278                     and info_dict.get(field) is not None):
2279                 self.to_stdout(info_dict[field])
2280
2281         info_dict = info_dict.copy()
2282         if filename is not None:
2283             info_dict['filename'] = filename
2284         if info_dict.get('requested_formats') is not None:
2285             # For RTMP URLs, also include the playpath
2286             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2287         elif 'url' in info_dict:
2288             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2289
2290         for tmpl in self.params.get('forceprint', []):
2291             if re.match(r'\w+$', tmpl):
2292                 tmpl = '%({})s'.format(tmpl)
2293             tmpl, info_copy = self.prepare_outtmpl(tmpl, info_dict)
2294             self.to_stdout(tmpl % info_copy)
2295
2296         print_mandatory('title')
2297         print_mandatory('id')
2298         print_mandatory('url', 'urls')
2299         print_optional('thumbnail')
2300         print_optional('description')
2301         print_optional('filename')
2302         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
2303             self.to_stdout(formatSeconds(info_dict['duration']))
2304         print_mandatory('format')
2305
2306         if self.params.get('forcejson', False):
2307             self.post_extract(info_dict)
2308             self.to_stdout(json.dumps(info_dict, default=repr))
2309
2310     def dl(self, name, info, subtitle=False, test=False):
2311
2312         if test:
2313             verbose = self.params.get('verbose')
2314             params = {
2315                 'test': True,
2316                 'quiet': not verbose,
2317                 'verbose': verbose,
2318                 'noprogress': not verbose,
2319                 'nopart': True,
2320                 'skip_unavailable_fragments': False,
2321                 'keep_fragments': False,
2322                 'overwrites': True,
2323                 '_no_ytdl_file': True,
2324             }
2325         else:
2326             params = self.params
2327         fd = get_suitable_downloader(info, params)(self, params)
2328         if not test:
2329             for ph in self._progress_hooks:
2330                 fd.add_progress_hook(ph)
2331             urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2332             self.write_debug('Invoking downloader on "%s"' % urls)
2333         new_info = dict(info)
2334         if new_info.get('http_headers') is None:
2335             new_info['http_headers'] = self._calc_headers(new_info)
2336         return fd.download(name, new_info, subtitle)
2337
2338     def process_info(self, info_dict):
2339         """Process a single resolved IE result."""
2340
2341         assert info_dict.get('_type', 'video') == 'video'
2342
2343         info_dict.setdefault('__postprocessors', [])
2344
2345         max_downloads = self.params.get('max_downloads')
2346         if max_downloads is not None:
2347             if self._num_downloads >= int(max_downloads):
2348                 raise MaxDownloadsReached()
2349
2350         # TODO: backward compatibility, to be removed
2351         info_dict['fulltitle'] = info_dict['title']
2352
2353         if 'format' not in info_dict and 'ext' in info_dict:
2354             info_dict['format'] = info_dict['ext']
2355
2356         if self._match_entry(info_dict) is not None:
2357             return
2358
2359         self.post_extract(info_dict)
2360         self._num_downloads += 1
2361
2362         # info_dict['_filename'] needs to be set for backward compatibility
2363         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2364         temp_filename = self.prepare_filename(info_dict, 'temp')
2365         files_to_move = {}
2366
2367         # Forced printings
2368         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2369
2370         if self.params.get('simulate', False):
2371             if self.params.get('force_write_download_archive', False):
2372                 self.record_download_archive(info_dict)
2373
2374             # Do nothing else if in simulate mode
2375             return
2376
2377         if full_filename is None:
2378             return
2379
2380         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2381             return
2382         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2383             return
2384
2385         if self.params.get('writedescription', False):
2386             descfn = self.prepare_filename(info_dict, 'description')
2387             if not self._ensure_dir_exists(encodeFilename(descfn)):
2388                 return
2389             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
2390                 self.to_screen('[info] Video description is already present')
2391             elif info_dict.get('description') is None:
2392                 self.report_warning('There\'s no description to write.')
2393             else:
2394                 try:
2395                     self.to_screen('[info] Writing video description to: ' + descfn)
2396                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
2397                         descfile.write(info_dict['description'])
2398                 except (OSError, IOError):
2399                     self.report_error('Cannot write description file ' + descfn)
2400                     return
2401
2402         if self.params.get('writeannotations', False):
2403             annofn = self.prepare_filename(info_dict, 'annotation')
2404             if not self._ensure_dir_exists(encodeFilename(annofn)):
2405                 return
2406             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2407                 self.to_screen('[info] Video annotations are already present')
2408             elif not info_dict.get('annotations'):
2409                 self.report_warning('There are no annotations to write.')
2410             else:
2411                 try:
2412                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2413                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2414                         annofile.write(info_dict['annotations'])
2415                 except (KeyError, TypeError):
2416                     self.report_warning('There are no annotations to write.')
2417                 except (OSError, IOError):
2418                     self.report_error('Cannot write annotations file: ' + annofn)
2419                     return
2420
2421         subtitles_are_requested = any([self.params.get('writesubtitles', False),
2422                                        self.params.get('writeautomaticsub')])
2423
2424         if subtitles_are_requested and info_dict.get('requested_subtitles'):
2425             # subtitles download errors are already managed as troubles in relevant IE
2426             # that way it will silently go on when used with unsupporting IE
2427             subtitles = info_dict['requested_subtitles']
2428             # ie = self.get_info_extractor(info_dict['extractor_key'])
2429             for sub_lang, sub_info in subtitles.items():
2430                 sub_format = sub_info['ext']
2431                 sub_filename = subtitles_filename(temp_filename, sub_lang, sub_format, info_dict.get('ext'))
2432                 sub_filename_final = subtitles_filename(
2433                     self.prepare_filename(info_dict, 'subtitle'), sub_lang, sub_format, info_dict.get('ext'))
2434                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
2435                     self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
2436                     sub_info['filepath'] = sub_filename
2437                     files_to_move[sub_filename] = sub_filename_final
2438                 else:
2439                     self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
2440                     if sub_info.get('data') is not None:
2441                         try:
2442                             # Use newline='' to prevent conversion of newline characters
2443                             # See https://github.com/ytdl-org/youtube-dl/issues/10268
2444                             with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
2445                                 subfile.write(sub_info['data'])
2446                             sub_info['filepath'] = sub_filename
2447                             files_to_move[sub_filename] = sub_filename_final
2448                         except (OSError, IOError):
2449                             self.report_error('Cannot write subtitles file ' + sub_filename)
2450                             return
2451                     else:
2452                         try:
2453                             self.dl(sub_filename, sub_info.copy(), subtitle=True)
2454                             sub_info['filepath'] = sub_filename
2455                             files_to_move[sub_filename] = sub_filename_final
2456                         except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
2457                             self.report_warning('Unable to download subtitle for "%s": %s' %
2458                                                 (sub_lang, error_to_compat_str(err)))
2459                             continue
2460
2461         if self.params.get('writeinfojson', False):
2462             infofn = self.prepare_filename(info_dict, 'infojson')
2463             if not self._ensure_dir_exists(encodeFilename(infofn)):
2464                 return
2465             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
2466                 self.to_screen('[info] Video metadata is already present')
2467             else:
2468                 self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
2469                 try:
2470                     write_json_file(self.filter_requested_info(info_dict, self.params.get('clean_infojson', True)), infofn)
2471                 except (OSError, IOError):
2472                     self.report_error('Cannot write video metadata to JSON file ' + infofn)
2473                     return
2474             info_dict['__infojson_filename'] = infofn
2475
2476         for thumb_ext in self._write_thumbnails(info_dict, temp_filename):
2477             thumb_filename_temp = replace_extension(temp_filename, thumb_ext, info_dict.get('ext'))
2478             thumb_filename = replace_extension(
2479                 self.prepare_filename(info_dict, 'thumbnail'), thumb_ext, info_dict.get('ext'))
2480             files_to_move[thumb_filename_temp] = thumb_filename
2481
2482         # Write internet shortcut files
2483         url_link = webloc_link = desktop_link = False
2484         if self.params.get('writelink', False):
2485             if sys.platform == "darwin":  # macOS.
2486                 webloc_link = True
2487             elif sys.platform.startswith("linux"):
2488                 desktop_link = True
2489             else:  # if sys.platform in ['win32', 'cygwin']:
2490                 url_link = True
2491         if self.params.get('writeurllink', False):
2492             url_link = True
2493         if self.params.get('writewebloclink', False):
2494             webloc_link = True
2495         if self.params.get('writedesktoplink', False):
2496             desktop_link = True
2497
2498         if url_link or webloc_link or desktop_link:
2499             if 'webpage_url' not in info_dict:
2500                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2501                 return
2502             ascii_url = iri_to_uri(info_dict['webpage_url'])
2503
2504         def _write_link_file(extension, template, newline, embed_filename):
2505             linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
2506             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2507                 self.to_screen('[info] Internet shortcut is already present')
2508             else:
2509                 try:
2510                     self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2511                     with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2512                         template_vars = {'url': ascii_url}
2513                         if embed_filename:
2514                             template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2515                         linkfile.write(template % template_vars)
2516                 except (OSError, IOError):
2517                     self.report_error('Cannot write internet shortcut ' + linkfn)
2518                     return False
2519             return True
2520
2521         if url_link:
2522             if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2523                 return
2524         if webloc_link:
2525             if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2526                 return
2527         if desktop_link:
2528             if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2529                 return
2530
2531         try:
2532             info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2533         except PostProcessingError as err:
2534             self.report_error('Preprocessing: %s' % str(err))
2535             return
2536
2537         must_record_download_archive = False
2538         if self.params.get('skip_download', False):
2539             info_dict['filepath'] = temp_filename
2540             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2541             info_dict['__files_to_move'] = files_to_move
2542             info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2543         else:
2544             # Download
2545             try:
2546
2547                 def existing_file(*filepaths):
2548                     ext = info_dict.get('ext')
2549                     final_ext = self.params.get('final_ext', ext)
2550                     existing_files = []
2551                     for file in orderedSet(filepaths):
2552                         if final_ext != ext:
2553                             converted = replace_extension(file, final_ext, ext)
2554                             if os.path.exists(encodeFilename(converted)):
2555                                 existing_files.append(converted)
2556                         if os.path.exists(encodeFilename(file)):
2557                             existing_files.append(file)
2558
2559                     if not existing_files or self.params.get('overwrites', False):
2560                         for file in orderedSet(existing_files):
2561                             self.report_file_delete(file)
2562                             os.remove(encodeFilename(file))
2563                         return None
2564
2565                     self.report_file_already_downloaded(existing_files[0])
2566                     info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2567                     return existing_files[0]
2568
2569                 success = True
2570                 if info_dict.get('requested_formats') is not None:
2571
2572                     def compatible_formats(formats):
2573                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2574                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2575                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2576                         if len(video_formats) > 2 or len(audio_formats) > 2:
2577                             return False
2578
2579                         # Check extension
2580                         exts = set(format.get('ext') for format in formats)
2581                         COMPATIBLE_EXTS = (
2582                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2583                             set(('webm',)),
2584                         )
2585                         for ext_sets in COMPATIBLE_EXTS:
2586                             if ext_sets.issuperset(exts):
2587                                 return True
2588                         # TODO: Check acodec/vcodec
2589                         return False
2590
2591                     requested_formats = info_dict['requested_formats']
2592                     old_ext = info_dict['ext']
2593                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
2594                         info_dict['ext'] = 'mkv'
2595                         self.report_warning(
2596                             'Requested formats are incompatible for merge and will be merged into mkv.')
2597
2598                     def correct_ext(filename):
2599                         filename_real_ext = os.path.splitext(filename)[1][1:]
2600                         filename_wo_ext = (
2601                             os.path.splitext(filename)[0]
2602                             if filename_real_ext == old_ext
2603                             else filename)
2604                         return '%s.%s' % (filename_wo_ext, info_dict['ext'])
2605
2606                     # Ensure filename always has a correct extension for successful merge
2607                     full_filename = correct_ext(full_filename)
2608                     temp_filename = correct_ext(temp_filename)
2609                     dl_filename = existing_file(full_filename, temp_filename)
2610                     info_dict['__real_download'] = False
2611
2612                     _protocols = set(determine_protocol(f) for f in requested_formats)
2613                     if len(_protocols) == 1:
2614                         info_dict['protocol'] = _protocols.pop()
2615                     directly_mergable = (
2616                         'no-direct-merge' not in self.params.get('compat_opts', [])
2617                         and info_dict.get('protocol') is not None  # All requested formats have same protocol
2618                         and not self.params.get('allow_unplayable_formats')
2619                         and get_suitable_downloader(info_dict, self.params).__name__ == 'FFmpegFD')
2620                     if directly_mergable:
2621                         info_dict['url'] = requested_formats[0]['url']
2622                         # Treat it as a single download
2623                         dl_filename = existing_file(full_filename, temp_filename)
2624                         if dl_filename is None:
2625                             success, real_download = self.dl(temp_filename, info_dict)
2626                             info_dict['__real_download'] = real_download
2627                     else:
2628                         downloaded = []
2629                         merger = FFmpegMergerPP(self)
2630                         if self.params.get('allow_unplayable_formats'):
2631                             self.report_warning(
2632                                 'You have requested merging of multiple formats '
2633                                 'while also allowing unplayable formats to be downloaded. '
2634                                 'The formats won\'t be merged to prevent data corruption.')
2635                         elif not merger.available:
2636                             self.report_warning(
2637                                 'You have requested merging of multiple formats but ffmpeg is not installed. '
2638                                 'The formats won\'t be merged.')
2639
2640                         if dl_filename is None:
2641                             for f in requested_formats:
2642                                 new_info = dict(info_dict)
2643                                 del new_info['requested_formats']
2644                                 new_info.update(f)
2645                                 fname = prepend_extension(
2646                                     self.prepare_filename(new_info, 'temp'),
2647                                     'f%s' % f['format_id'], new_info['ext'])
2648                                 if not self._ensure_dir_exists(fname):
2649                                     return
2650                                 downloaded.append(fname)
2651                                 partial_success, real_download = self.dl(fname, new_info)
2652                                 info_dict['__real_download'] = info_dict['__real_download'] or real_download
2653                                 success = success and partial_success
2654                             if merger.available and not self.params.get('allow_unplayable_formats'):
2655                                 info_dict['__postprocessors'].append(merger)
2656                                 info_dict['__files_to_merge'] = downloaded
2657                                 # Even if there were no downloads, it is being merged only now
2658                                 info_dict['__real_download'] = True
2659                             else:
2660                                 for file in downloaded:
2661                                     files_to_move[file] = None
2662                 else:
2663                     # Just a single file
2664                     dl_filename = existing_file(full_filename, temp_filename)
2665                     if dl_filename is None:
2666                         success, real_download = self.dl(temp_filename, info_dict)
2667                         info_dict['__real_download'] = real_download
2668
2669                 dl_filename = dl_filename or temp_filename
2670                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2671
2672             except network_exceptions as err:
2673                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2674                 return
2675             except (OSError, IOError) as err:
2676                 raise UnavailableVideoError(err)
2677             except (ContentTooShortError, ) as err:
2678                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2679                 return
2680
2681             if success and full_filename != '-':
2682
2683                 def fixup():
2684                     do_fixup = True
2685                     fixup_policy = self.params.get('fixup')
2686                     vid = info_dict['id']
2687
2688                     if fixup_policy in ('ignore', 'never'):
2689                         return
2690                     elif fixup_policy == 'warn':
2691                         do_fixup = False
2692                     elif fixup_policy != 'force':
2693                         assert fixup_policy in ('detect_or_warn', None)
2694                         if not info_dict.get('__real_download'):
2695                             do_fixup = False
2696
2697                     def ffmpeg_fixup(cndn, msg, cls):
2698                         if not cndn:
2699                             return
2700                         if not do_fixup:
2701                             self.report_warning(f'{vid}: {msg}')
2702                             return
2703                         pp = cls(self)
2704                         if pp.available:
2705                             info_dict['__postprocessors'].append(pp)
2706                         else:
2707                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
2708
2709                     stretched_ratio = info_dict.get('stretched_ratio')
2710                     ffmpeg_fixup(
2711                         stretched_ratio not in (1, None),
2712                         f'Non-uniform pixel ratio {stretched_ratio}',
2713                         FFmpegFixupStretchedPP)
2714
2715                     ffmpeg_fixup(
2716                         (info_dict.get('requested_formats') is None
2717                          and info_dict.get('container') == 'm4a_dash'
2718                          and info_dict.get('ext') == 'm4a'),
2719                         'writing DASH m4a. Only some players support this container',
2720                         FFmpegFixupM4aPP)
2721
2722                     downloader = (get_suitable_downloader(info_dict, self.params).__name__
2723                                   if 'protocol' in info_dict else None)
2724                     ffmpeg_fixup(downloader == 'HlsFD', 'malformed AAC bitstream detected', FFmpegFixupM3u8PP)
2725                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP)
2726                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed duration detected', FFmpegFixupDurationPP)
2727
2728                 fixup()
2729                 try:
2730                     info_dict = self.post_process(dl_filename, info_dict, files_to_move)
2731                 except PostProcessingError as err:
2732                     self.report_error('Postprocessing: %s' % str(err))
2733                     return
2734                 try:
2735                     for ph in self._post_hooks:
2736                         ph(info_dict['filepath'])
2737                 except Exception as err:
2738                     self.report_error('post hooks: %s' % str(err))
2739                     return
2740                 must_record_download_archive = True
2741
2742         if must_record_download_archive or self.params.get('force_write_download_archive', False):
2743             self.record_download_archive(info_dict)
2744         max_downloads = self.params.get('max_downloads')
2745         if max_downloads is not None and self._num_downloads >= int(max_downloads):
2746             raise MaxDownloadsReached()
2747
2748     def download(self, url_list):
2749         """Download a given list of URLs."""
2750         outtmpl = self.outtmpl_dict['default']
2751         if (len(url_list) > 1
2752                 and outtmpl != '-'
2753                 and '%' not in outtmpl
2754                 and self.params.get('max_downloads') != 1):
2755             raise SameFileError(outtmpl)
2756
2757         for url in url_list:
2758             try:
2759                 # It also downloads the videos
2760                 res = self.extract_info(
2761                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2762             except UnavailableVideoError:
2763                 self.report_error('unable to download video')
2764             except MaxDownloadsReached:
2765                 self.to_screen('[info] Maximum number of downloaded files reached')
2766                 raise
2767             except ExistingVideoReached:
2768                 self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
2769                 raise
2770             except RejectedVideoReached:
2771                 self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
2772                 raise
2773             else:
2774                 if self.params.get('dump_single_json', False):
2775                     self.post_extract(res)
2776                     self.to_stdout(json.dumps(res, default=repr))
2777
2778         return self._download_retcode
2779
2780     def download_with_info_file(self, info_filename):
2781         with contextlib.closing(fileinput.FileInput(
2782                 [info_filename], mode='r',
2783                 openhook=fileinput.hook_encoded('utf-8'))) as f:
2784             # FileInput doesn't have a read method, we can't call json.load
2785             info = self.filter_requested_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
2786         try:
2787             self.process_ie_result(info, download=True)
2788         except (DownloadError, EntryNotInPlaylist, ThrottledDownload):
2789             webpage_url = info.get('webpage_url')
2790             if webpage_url is not None:
2791                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2792                 return self.download([webpage_url])
2793             else:
2794                 raise
2795         return self._download_retcode
2796
2797     @staticmethod
2798     def filter_requested_info(info_dict, actually_filter=True):
2799         remove_keys = ['__original_infodict']  # Always remove this since this may contain a copy of the entire dict
2800         keep_keys = ['_type'],  # Always keep this to facilitate load-info-json
2801         if actually_filter:
2802             remove_keys += ('requested_formats', 'requested_subtitles', 'requested_entries', 'filepath', 'entries', 'original_url')
2803             empty_values = (None, {}, [], set(), tuple())
2804             reject = lambda k, v: k not in keep_keys and (
2805                 k.startswith('_') or k in remove_keys or v in empty_values)
2806         else:
2807             info_dict['epoch'] = int(time.time())
2808             reject = lambda k, v: k in remove_keys
2809         filter_fn = lambda obj: (
2810             list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))
2811             else obj if not isinstance(obj, dict)
2812             else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
2813         return filter_fn(info_dict)
2814
2815     def run_pp(self, pp, infodict):
2816         files_to_delete = []
2817         if '__files_to_move' not in infodict:
2818             infodict['__files_to_move'] = {}
2819         files_to_delete, infodict = pp.run(infodict)
2820         if not files_to_delete:
2821             return infodict
2822
2823         if self.params.get('keepvideo', False):
2824             for f in files_to_delete:
2825                 infodict['__files_to_move'].setdefault(f, '')
2826         else:
2827             for old_filename in set(files_to_delete):
2828                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2829                 try:
2830                     os.remove(encodeFilename(old_filename))
2831                 except (IOError, OSError):
2832                     self.report_warning('Unable to remove downloaded original file')
2833                 if old_filename in infodict['__files_to_move']:
2834                     del infodict['__files_to_move'][old_filename]
2835         return infodict
2836
2837     @staticmethod
2838     def post_extract(info_dict):
2839         def actual_post_extract(info_dict):
2840             if info_dict.get('_type') in ('playlist', 'multi_video'):
2841                 for video_dict in info_dict.get('entries', {}):
2842                     actual_post_extract(video_dict or {})
2843                 return
2844
2845             post_extractor = info_dict.get('__post_extractor') or (lambda: {})
2846             extra = post_extractor().items()
2847             info_dict.update(extra)
2848             info_dict.pop('__post_extractor', None)
2849
2850             original_infodict = info_dict.get('__original_infodict') or {}
2851             original_infodict.update(extra)
2852             original_infodict.pop('__post_extractor', None)
2853
2854         actual_post_extract(info_dict or {})
2855
2856     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
2857         info = dict(ie_info)
2858         info['__files_to_move'] = files_to_move or {}
2859         for pp in self._pps[key]:
2860             info = self.run_pp(pp, info)
2861         return info, info.pop('__files_to_move', None)
2862
2863     def post_process(self, filename, ie_info, files_to_move=None):
2864         """Run all the postprocessors on the given file."""
2865         info = dict(ie_info)
2866         info['filepath'] = filename
2867         info['__files_to_move'] = files_to_move or {}
2868
2869         for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
2870             info = self.run_pp(pp, info)
2871         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
2872         del info['__files_to_move']
2873         for pp in self._pps['after_move']:
2874             info = self.run_pp(pp, info)
2875         return info
2876
2877     def _make_archive_id(self, info_dict):
2878         video_id = info_dict.get('id')
2879         if not video_id:
2880             return
2881         # Future-proof against any change in case
2882         # and backwards compatibility with prior versions
2883         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
2884         if extractor is None:
2885             url = str_or_none(info_dict.get('url'))
2886             if not url:
2887                 return
2888             # Try to find matching extractor for the URL and take its ie_key
2889             for ie in self._ies:
2890                 if ie.suitable(url):
2891                     extractor = ie.ie_key()
2892                     break
2893             else:
2894                 return
2895         return '%s %s' % (extractor.lower(), video_id)
2896
2897     def in_download_archive(self, info_dict):
2898         fn = self.params.get('download_archive')
2899         if fn is None:
2900             return False
2901
2902         vid_id = self._make_archive_id(info_dict)
2903         if not vid_id:
2904             return False  # Incomplete video information
2905
2906         return vid_id in self.archive
2907
2908     def record_download_archive(self, info_dict):
2909         fn = self.params.get('download_archive')
2910         if fn is None:
2911             return
2912         vid_id = self._make_archive_id(info_dict)
2913         assert vid_id
2914         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
2915             archive_file.write(vid_id + '\n')
2916         self.archive.add(vid_id)
2917
2918     @staticmethod
2919     def format_resolution(format, default='unknown'):
2920         if format.get('vcodec') == 'none':
2921             if format.get('acodec') == 'none':
2922                 return 'images'
2923             return 'audio only'
2924         if format.get('resolution') is not None:
2925             return format['resolution']
2926         if format.get('width') and format.get('height'):
2927             res = '%dx%d' % (format['width'], format['height'])
2928         elif format.get('height'):
2929             res = '%sp' % format['height']
2930         elif format.get('width'):
2931             res = '%dx?' % format['width']
2932         else:
2933             res = default
2934         return res
2935
2936     def _format_note(self, fdict):
2937         res = ''
2938         if fdict.get('ext') in ['f4f', 'f4m']:
2939             res += '(unsupported) '
2940         if fdict.get('language'):
2941             if res:
2942                 res += ' '
2943             res += '[%s] ' % fdict['language']
2944         if fdict.get('format_note') is not None:
2945             res += fdict['format_note'] + ' '
2946         if fdict.get('tbr') is not None:
2947             res += '%4dk ' % fdict['tbr']
2948         if fdict.get('container') is not None:
2949             if res:
2950                 res += ', '
2951             res += '%s container' % fdict['container']
2952         if (fdict.get('vcodec') is not None
2953                 and fdict.get('vcodec') != 'none'):
2954             if res:
2955                 res += ', '
2956             res += fdict['vcodec']
2957             if fdict.get('vbr') is not None:
2958                 res += '@'
2959         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2960             res += 'video@'
2961         if fdict.get('vbr') is not None:
2962             res += '%4dk' % fdict['vbr']
2963         if fdict.get('fps') is not None:
2964             if res:
2965                 res += ', '
2966             res += '%sfps' % fdict['fps']
2967         if fdict.get('acodec') is not None:
2968             if res:
2969                 res += ', '
2970             if fdict['acodec'] == 'none':
2971                 res += 'video only'
2972             else:
2973                 res += '%-5s' % fdict['acodec']
2974         elif fdict.get('abr') is not None:
2975             if res:
2976                 res += ', '
2977             res += 'audio'
2978         if fdict.get('abr') is not None:
2979             res += '@%3dk' % fdict['abr']
2980         if fdict.get('asr') is not None:
2981             res += ' (%5dHz)' % fdict['asr']
2982         if fdict.get('filesize') is not None:
2983             if res:
2984                 res += ', '
2985             res += format_bytes(fdict['filesize'])
2986         elif fdict.get('filesize_approx') is not None:
2987             if res:
2988                 res += ', '
2989             res += '~' + format_bytes(fdict['filesize_approx'])
2990         return res
2991
2992     def _format_note_table(self, f):
2993         def join_fields(*vargs):
2994             return ', '.join((val for val in vargs if val != ''))
2995
2996         return join_fields(
2997             'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
2998             format_field(f, 'language', '[%s]'),
2999             format_field(f, 'format_note'),
3000             format_field(f, 'container', ignore=(None, f.get('ext'))),
3001             format_field(f, 'asr', '%5dHz'))
3002
3003     def list_formats(self, info_dict):
3004         formats = info_dict.get('formats', [info_dict])
3005         new_format = (
3006             'list-formats' not in self.params.get('compat_opts', [])
3007             and self.params.get('listformats_table', True) is not False)
3008         if new_format:
3009             table = [
3010                 [
3011                     format_field(f, 'format_id'),
3012                     format_field(f, 'ext'),
3013                     self.format_resolution(f),
3014                     format_field(f, 'fps', '%d'),
3015                     '|',
3016                     format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
3017                     format_field(f, 'tbr', '%4dk'),
3018                     shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
3019                     '|',
3020                     format_field(f, 'vcodec', default='unknown').replace('none', ''),
3021                     format_field(f, 'vbr', '%4dk'),
3022                     format_field(f, 'acodec', default='unknown').replace('none', ''),
3023                     format_field(f, 'abr', '%3dk'),
3024                     format_field(f, 'asr', '%5dHz'),
3025                     self._format_note_table(f)]
3026                 for f in formats
3027                 if f.get('preference') is None or f['preference'] >= -1000]
3028             header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', '  TBR', 'PROTO',
3029                            '|', 'VCODEC', '  VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']
3030         else:
3031             table = [
3032                 [
3033                     format_field(f, 'format_id'),
3034                     format_field(f, 'ext'),
3035                     self.format_resolution(f),
3036                     self._format_note(f)]
3037                 for f in formats
3038                 if f.get('preference') is None or f['preference'] >= -1000]
3039             header_line = ['format code', 'extension', 'resolution', 'note']
3040
3041         self.to_screen(
3042             '[info] Available formats for %s:' % info_dict['id'])
3043         self.to_stdout(render_table(
3044             header_line, table, delim=new_format, extraGap=(0 if new_format else 1), hideEmpty=new_format))
3045
3046     def list_thumbnails(self, info_dict):
3047         thumbnails = list(info_dict.get('thumbnails'))
3048         if not thumbnails:
3049             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3050             return
3051
3052         self.to_screen(
3053             '[info] Thumbnails for %s:' % info_dict['id'])
3054         self.to_stdout(render_table(
3055             ['ID', 'width', 'height', 'URL'],
3056             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
3057
3058     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3059         if not subtitles:
3060             self.to_screen('%s has no %s' % (video_id, name))
3061             return
3062         self.to_screen(
3063             'Available %s for %s:' % (name, video_id))
3064
3065         def _row(lang, formats):
3066             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3067             if len(set(names)) == 1:
3068                 names = [] if names[0] == 'unknown' else names[:1]
3069             return [lang, ', '.join(names), ', '.join(exts)]
3070
3071         self.to_stdout(render_table(
3072             ['Language', 'Name', 'Formats'],
3073             [_row(lang, formats) for lang, formats in subtitles.items()],
3074             hideEmpty=True))
3075
3076     def urlopen(self, req):
3077         """ Start an HTTP download """
3078         if isinstance(req, compat_basestring):
3079             req = sanitized_Request(req)
3080         return self._opener.open(req, timeout=self._socket_timeout)
3081
3082     def print_debug_header(self):
3083         if not self.params.get('verbose'):
3084             return
3085
3086         if type('') is not compat_str:
3087             # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
3088             self.report_warning(
3089                 'Your Python is broken! Update to a newer and supported version')
3090
3091         stdout_encoding = getattr(
3092             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
3093         encoding_str = (
3094             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
3095                 locale.getpreferredencoding(),
3096                 sys.getfilesystemencoding(),
3097                 stdout_encoding,
3098                 self.get_encoding()))
3099         write_string(encoding_str, encoding=None)
3100
3101         source = (
3102             '(exe)' if hasattr(sys, 'frozen')
3103             else '(zip)' if isinstance(globals().get('__loader__'), zipimporter)
3104             else '(source)' if os.path.basename(sys.argv[0]) == '__main__.py'
3105             else '')
3106         self._write_string('[debug] yt-dlp version %s %s\n' % (__version__, source))
3107         if _LAZY_LOADER:
3108             self._write_string('[debug] Lazy loading extractors enabled\n')
3109         if _PLUGIN_CLASSES:
3110             self._write_string(
3111                 '[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])
3112         if self.params.get('compat_opts'):
3113             self._write_string(
3114                 '[debug] Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts')))
3115         try:
3116             sp = subprocess.Popen(
3117                 ['git', 'rev-parse', '--short', 'HEAD'],
3118                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3119                 cwd=os.path.dirname(os.path.abspath(__file__)))
3120             out, err = process_communicate_or_kill(sp)
3121             out = out.decode().strip()
3122             if re.match('[0-9a-f]+', out):
3123                 self._write_string('[debug] Git HEAD: %s\n' % out)
3124         except Exception:
3125             try:
3126                 sys.exc_clear()
3127             except Exception:
3128                 pass
3129
3130         def python_implementation():
3131             impl_name = platform.python_implementation()
3132             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3133                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3134             return impl_name
3135
3136         self._write_string('[debug] Python version %s (%s %s) - %s\n' % (
3137             platform.python_version(),
3138             python_implementation(),
3139             platform.architecture()[0],
3140             platform_name()))
3141
3142         exe_versions = FFmpegPostProcessor.get_versions(self)
3143         exe_versions['rtmpdump'] = rtmpdump_version()
3144         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3145         exe_str = ', '.join(
3146             '%s %s' % (exe, v)
3147             for exe, v in sorted(exe_versions.items())
3148             if v
3149         )
3150         if not exe_str:
3151             exe_str = 'none'
3152         self._write_string('[debug] exe versions: %s\n' % exe_str)
3153
3154         proxy_map = {}
3155         for handler in self._opener.handlers:
3156             if hasattr(handler, 'proxies'):
3157                 proxy_map.update(handler.proxies)
3158         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
3159
3160         if self.params.get('call_home', False):
3161             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3162             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
3163             return
3164             latest_version = self.urlopen(
3165                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3166             if version_tuple(latest_version) > version_tuple(__version__):
3167                 self.report_warning(
3168                     'You are using an outdated version (newest version: %s)! '
3169                     'See https://yt-dl.org/update if you need help updating.' %
3170                     latest_version)
3171
3172     def _setup_opener(self):
3173         timeout_val = self.params.get('socket_timeout')
3174         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
3175
3176         opts_cookiefile = self.params.get('cookiefile')
3177         opts_proxy = self.params.get('proxy')
3178
3179         if opts_cookiefile is None:
3180             self.cookiejar = compat_cookiejar.CookieJar()
3181         else:
3182             opts_cookiefile = expand_path(opts_cookiefile)
3183             self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
3184             if os.access(opts_cookiefile, os.R_OK):
3185                 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
3186
3187         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3188         if opts_proxy is not None:
3189             if opts_proxy == '':
3190                 proxies = {}
3191             else:
3192                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3193         else:
3194             proxies = compat_urllib_request.getproxies()
3195             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3196             if 'http' in proxies and 'https' not in proxies:
3197                 proxies['https'] = proxies['http']
3198         proxy_handler = PerRequestProxyHandler(proxies)
3199
3200         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3201         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3202         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3203         redirect_handler = YoutubeDLRedirectHandler()
3204         data_handler = compat_urllib_request_DataHandler()
3205
3206         # When passing our own FileHandler instance, build_opener won't add the
3207         # default FileHandler and allows us to disable the file protocol, which
3208         # can be used for malicious purposes (see
3209         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3210         file_handler = compat_urllib_request.FileHandler()
3211
3212         def file_open(*args, **kwargs):
3213             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3214         file_handler.file_open = file_open
3215
3216         opener = compat_urllib_request.build_opener(
3217             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3218
3219         # Delete the default user-agent header, which would otherwise apply in
3220         # cases where our custom HTTP handler doesn't come into play
3221         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3222         opener.addheaders = []
3223         self._opener = opener
3224
3225     def encode(self, s):
3226         if isinstance(s, bytes):
3227             return s  # Already encoded
3228
3229         try:
3230             return s.encode(self.get_encoding())
3231         except UnicodeEncodeError as err:
3232             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3233             raise
3234
3235     def get_encoding(self):
3236         encoding = self.params.get('encoding')
3237         if encoding is None:
3238             encoding = preferredencoding()
3239         return encoding
3240
3241     def _write_thumbnails(self, info_dict, filename):  # return the extensions
3242         write_all = self.params.get('write_all_thumbnails', False)
3243         thumbnails = []
3244         if write_all or self.params.get('writethumbnail', False):
3245             thumbnails = info_dict.get('thumbnails') or []
3246         multiple = write_all and len(thumbnails) > 1
3247
3248         ret = []
3249         for t in thumbnails[::-1]:
3250             thumb_ext = determine_ext(t['url'], 'jpg')
3251             suffix = '%s.' % t['id'] if multiple else ''
3252             thumb_display_id = '%s ' % t['id'] if multiple else ''
3253             thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
3254
3255             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
3256                 ret.append(suffix + thumb_ext)
3257                 t['filepath'] = thumb_filename
3258                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
3259                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
3260             else:
3261                 self.to_screen('[%s] %s: Downloading thumbnail %s ...' %
3262                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
3263                 try:
3264                     uf = self.urlopen(t['url'])
3265                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3266                         shutil.copyfileobj(uf, thumbf)
3267                     ret.append(suffix + thumb_ext)
3268                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
3269                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
3270                     t['filepath'] = thumb_filename
3271                 except network_exceptions as err:
3272                     self.report_warning('Unable to download thumbnail "%s": %s' %
3273                                         (t['url'], error_to_compat_str(err)))
3274             if ret and not write_all:
3275                 break
3276         return ret