yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import sys
  23 import tempfile
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28
  29 from string import ascii_letters
  30 from zipimport import zipimporter
  31
  32 from .compat import (
  33     compat_basestring,
  34     compat_get_terminal_size,
  35     compat_kwargs,
  36     compat_numeric_types,
  37     compat_os_name,
  38     compat_shlex_quote,
  39     compat_str,
  40     compat_tokenize_tokenize,
  41     compat_urllib_error,
  42     compat_urllib_request,
  43     compat_urllib_request_DataHandler,
  44 )
  45 from .cookies import load_cookies
  46 from .utils import (
  47     age_restricted,
  48     args_to_str,
  49     ContentTooShortError,
  50     date_from_str,
  51     DateRange,
  52     DEFAULT_OUTTMPL,
  53     determine_ext,
  54     determine_protocol,
  55     DOT_DESKTOP_LINK_TEMPLATE,
  56     DOT_URL_LINK_TEMPLATE,
  57     DOT_WEBLOC_LINK_TEMPLATE,
  58     DownloadError,
  59     encode_compat_str,
  60     encodeFilename,
  61     EntryNotInPlaylist,
  62     error_to_compat_str,
  63     ExistingVideoReached,
  64     expand_path,
  65     ExtractorError,
  66     float_or_none,
  67     format_bytes,
  68     format_field,
  69     STR_FORMAT_RE_TMPL,
  70     STR_FORMAT_TYPES,
  71     formatSeconds,
  72     GeoRestrictedError,
  73     HEADRequest,
  74     int_or_none,
  75     iri_to_uri,
  76     ISO3166Utils,
  77     LazyList,
  78     locked_file,
  79     make_dir,
  80     make_HTTPS_handler,
  81     MaxDownloadsReached,
  82     network_exceptions,
  83     orderedSet,
  84     OUTTMPL_TYPES,
  85     PagedList,
  86     parse_filesize,
  87     PerRequestProxyHandler,
  88     platform_name,
  89     PostProcessingError,
  90     preferredencoding,
  91     prepend_extension,
  92     process_communicate_or_kill,
  93     register_socks_protocols,
  94     RejectedVideoReached,
  95     render_table,
  96     replace_extension,
  97     SameFileError,
  98     sanitize_filename,
  99     sanitize_path,
 100     sanitize_url,
 101     sanitized_Request,
 102     std_headers,
 103     str_or_none,
 104     strftime_or_none,
 105     subtitles_filename,
 106     ThrottledDownload,
 107     to_high_limit_path,
 108     traverse_obj,
 109     try_get,
 110     UnavailableVideoError,
 111     url_basename,
 112     variadic,
 113     version_tuple,
 114     write_json_file,
 115     write_string,
 116     YoutubeDLCookieProcessor,
 117     YoutubeDLHandler,
 118     YoutubeDLRedirectHandler,
 119 )
 120 from .cache import Cache
 121 from .extractor import (
 122     gen_extractor_classes,
 123     get_info_extractor,
 124     _LAZY_LOADER,
 125     _PLUGIN_CLASSES
 126 )
 127 from .extractor.openload import PhantomJSwrapper
 128 from .downloader import (
 129     get_suitable_downloader,
 130     shorten_protocol_name
 131 )
 132 from .downloader.rtmp import rtmpdump_version
 133 from .postprocessor import (
 134     get_postprocessor,
 135     FFmpegFixupDurationPP,
 136     FFmpegFixupM3u8PP,
 137     FFmpegFixupM4aPP,
 138     FFmpegFixupStretchedPP,
 139     FFmpegFixupTimestampPP,
 140     FFmpegMergerPP,
 141     FFmpegPostProcessor,
 142     MoveFilesAfterDownloadPP,
 143 )
 144 from .version import __version__
 145
 146 if compat_os_name == 'nt':
 147     import ctypes
 148
 149
 150 class YoutubeDL(object):
 151     """YoutubeDL class.
 152
 153     YoutubeDL objects are the ones responsible of downloading the
 154     actual video file and writing it to disk if the user has requested
 155     it, among some other tasks. In most cases there should be one per
 156     program. As, given a video URL, the downloader doesn't know how to
 157     extract all the needed information, task that InfoExtractors do, it
 158     has to pass the URL to one of them.
 159
 160     For this, YoutubeDL objects have a method that allows
 161     InfoExtractors to be registered in a given order. When it is passed
 162     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 163     finds that reports being able to handle it. The InfoExtractor extracts
 164     all the information about the video or videos the URL refers to, and
 165     YoutubeDL process the extracted information, possibly using a File
 166     Downloader to download the video.
 167
 168     YoutubeDL objects accept a lot of parameters. In order not to saturate
 169     the object constructor with arguments, it receives a dictionary of
 170     options instead. These options are available through the params
 171     attribute for the InfoExtractors to use. The YoutubeDL also
 172     registers itself as the downloader in charge for the InfoExtractors
 173     that are added to it, so this is a "mutual registration".
 174
 175     Available options:
 176
 177     username:          Username for authentication purposes.
 178     password:          Password for authentication purposes.
 179     videopassword:     Password for accessing a video.
 180     ap_mso:            Adobe Pass multiple-system operator identifier.
 181     ap_username:       Multiple-system operator account username.
 182     ap_password:       Multiple-system operator account password.
 183     usenetrc:          Use netrc for authentication instead.
 184     verbose:           Print additional info to stdout.
 185     quiet:             Do not print messages to stdout.
 186     no_warnings:       Do not print out anything for warnings.
 187     forceprint:        A list of templates to force print
 188     forceurl:          Force printing final URL. (Deprecated)
 189     forcetitle:        Force printing title. (Deprecated)
 190     forceid:           Force printing ID. (Deprecated)
 191     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 192     forcedescription:  Force printing description. (Deprecated)
 193     forcefilename:     Force printing final filename. (Deprecated)
 194     forceduration:     Force printing duration. (Deprecated)
 195     forcejson:         Force printing info_dict as JSON.
 196     dump_single_json:  Force printing the info_dict of the whole playlist
 197                        (or video) as a single JSON line.
 198     force_write_download_archive: Force writing download archive regardless
 199                        of 'skip_download' or 'simulate'.
 200     simulate:          Do not download the video files.
 201     format:            Video format code. see "FORMAT SELECTION" for more details.
 202     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 203     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 204                        extracting metadata even if the video is not actually
 205                        available for download (experimental)
 206     format_sort:       How to sort the video formats. see "Sorting Formats"
 207                        for more details.
 208     format_sort_force: Force the given format_sort. see "Sorting Formats"
 209                        for more details.
 210     allow_multiple_video_streams:   Allow multiple video streams to be merged
 211                        into a single file
 212     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 213                        into a single file
 214     check_formats      Whether to test if the formats are downloadable.
 215                        Can be True (check all), False (check none)
 216                        or None (check only if requested by extractor)
 217     paths:             Dictionary of output paths. The allowed keys are 'home'
 218                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 219     outtmpl:           Dictionary of templates for output names. Allowed keys
 220                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 221                        A string a also accepted for backward compatibility
 222     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 223     restrictfilenames: Do not allow "&" and spaces in file names
 224     trim_file_name:    Limit length of filename (extension excluded)
 225     windowsfilenames:  Force the filenames to be windows compatible
 226     ignoreerrors:      Do not stop on download errors
 227                        (Default True when running yt-dlp,
 228                        but False when directly accessing YoutubeDL class)
 229     skip_playlist_after_errors: Number of allowed failures until the rest of
 230                        the playlist is skipped
 231     force_generic_extractor: Force downloader to use the generic extractor
 232     overwrites:        Overwrite all video and metadata files if True,
 233                        overwrite only non-video files if None
 234                        and don't overwrite any file if False
 235     playliststart:     Playlist item to start at.
 236     playlistend:       Playlist item to end at.
 237     playlist_items:    Specific indices of playlist to download.
 238     playlistreverse:   Download playlist items in reverse order.
 239     playlistrandom:    Download playlist items in random order.
 240     matchtitle:        Download only matching titles.
 241     rejecttitle:       Reject downloads for matching titles.
 242     logger:            Log messages to a logging.Logger instance.
 243     logtostderr:       Log messages to stderr instead of stdout.
 244     writedescription:  Write the video description to a .description file
 245     writeinfojson:     Write the video description to a .info.json file
 246     clean_infojson:    Remove private fields from the infojson
 247     writecomments:     Extract video comments. This will not be written to disk
 248                        unless writeinfojson is also given
 249     writeannotations:  Write the video annotations to a .annotations.xml file
 250     writethumbnail:    Write the thumbnail image to a file
 251     allow_playlist_files: Whether to write playlists' description, infojson etc
 252                        also to disk when using the 'write*' options
 253     write_all_thumbnails:  Write all thumbnail formats to files
 254     writelink:         Write an internet shortcut file, depending on the
 255                        current platform (.url/.webloc/.desktop)
 256     writeurllink:      Write a Windows internet shortcut file (.url)
 257     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 258     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 259     writesubtitles:    Write the video subtitles to a file
 260     writeautomaticsub: Write the automatically generated subtitles to a file
 261     allsubtitles:      Deprecated - Use subtitleslangs = ['all']
 262                        Downloads all the subtitles of the video
 263                        (requires writesubtitles or writeautomaticsub)
 264     listsubtitles:     Lists all available subtitles for the video
 265     subtitlesformat:   The format code for subtitles
 266     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 267                        The list may contain "all" to refer to all the available
 268                        subtitles. The language can be prefixed with a "-" to
 269                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 270     keepvideo:         Keep the video file after post-processing
 271     daterange:         A DateRange object, download only if the upload_date is in the range.
 272     skip_download:     Skip the actual download of the video file
 273     cachedir:          Location of the cache files in the filesystem.
 274                        False to disable filesystem cache.
 275     noplaylist:        Download single video instead of a playlist if in doubt.
 276     age_limit:         An integer representing the user's age in years.
 277                        Unsuitable videos for the given age are skipped.
 278     min_views:         An integer representing the minimum view count the video
 279                        must have in order to not be skipped.
 280                        Videos without view count information are always
 281                        downloaded. None for no limit.
 282     max_views:         An integer representing the maximum view count.
 283                        Videos that are more popular than that are not
 284                        downloaded.
 285                        Videos without view count information are always
 286                        downloaded. None for no limit.
 287     download_archive:  File name of a file where all downloads are recorded.
 288                        Videos already present in the file are not downloaded
 289                        again.
 290     break_on_existing: Stop the download process after attempting to download a
 291                        file that is in the archive.
 292     break_on_reject:   Stop the download process when encountering a video that
 293                        has been filtered out.
 294     cookiefile:        File name where cookies should be read from and dumped to
 295     cookiesfrombrowser: A tuple containing the name of the browser and the profile
 296                        name/path from where cookies are loaded.
 297                        Eg: ('chrome', ) or (vivaldi, 'default')
 298     nocheckcertificate:Do not verify SSL certificates
 299     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 300                        At the moment, this is only supported by YouTube.
 301     proxy:             URL of the proxy server to use
 302     geo_verification_proxy:  URL of the proxy to use for IP address verification
 303                        on geo-restricted sites.
 304     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 305     bidi_workaround:   Work around buggy terminals without bidirectional text
 306                        support, using fridibi
 307     debug_printtraffic:Print out sent and received HTTP traffic
 308     include_ads:       Download ads as well
 309     default_search:    Prepend this string if an input url is not valid.
 310                        'auto' for elaborate guessing
 311     encoding:          Use this encoding instead of the system-specified.
 312     extract_flat:      Do not resolve URLs, return the immediate result.
 313                        Pass in 'in_playlist' to only show this behavior for
 314                        playlist items.
 315     postprocessors:    A list of dictionaries, each with an entry
 316                        * key:  The name of the postprocessor. See
 317                                yt_dlp/postprocessor/__init__.py for a list.
 318                        * when: When to run the postprocessor. Can be one of
 319                                pre_process|before_dl|post_process|after_move.
 320                                Assumed to be 'post_process' if not given
 321     post_hooks:        A list of functions that get called as the final step
 322                        for each video file, after all postprocessors have been
 323                        called. The filename will be passed as the only argument.
 324     progress_hooks:    A list of functions that get called on download
 325                        progress, with a dictionary with the entries
 326                        * status: One of "downloading", "error", or "finished".
 327                                  Check this first and ignore unknown values.
 328                        * info_dict: The extracted info_dict
 329
 330                        If status is one of "downloading", or "finished", the
 331                        following properties may also be present:
 332                        * filename: The final filename (always present)
 333                        * tmpfilename: The filename we're currently writing to
 334                        * downloaded_bytes: Bytes on disk
 335                        * total_bytes: Size of the whole file, None if unknown
 336                        * total_bytes_estimate: Guess of the eventual file size,
 337                                                None if unavailable.
 338                        * elapsed: The number of seconds since download started.
 339                        * eta: The estimated time in seconds, None if unknown
 340                        * speed: The download speed in bytes/second, None if
 341                                 unknown
 342                        * fragment_index: The counter of the currently
 343                                          downloaded video fragment.
 344                        * fragment_count: The number of fragments (= individual
 345                                          files that will be merged)
 346
 347                        Progress hooks are guaranteed to be called at least once
 348                        (with status "finished") if the download is successful.
 349     merge_output_format: Extension to use when merging formats.
 350     final_ext:         Expected final extension; used to detect when the file was
 351                        already downloaded and converted. "merge_output_format" is
 352                        replaced by this extension when given
 353     fixup:             Automatically correct known faults of the file.
 354                        One of:
 355                        - "never": do nothing
 356                        - "warn": only emit a warning
 357                        - "detect_or_warn": check whether we can do anything
 358                                            about it, warn otherwise (default)
 359     source_address:    Client-side IP address to bind to.
 360     call_home:         Boolean, true iff we are allowed to contact the
 361                        yt-dlp servers for debugging. (BROKEN)
 362     sleep_interval_requests: Number of seconds to sleep between requests
 363                        during extraction
 364     sleep_interval:    Number of seconds to sleep before each download when
 365                        used alone or a lower bound of a range for randomized
 366                        sleep before each download (minimum possible number
 367                        of seconds to sleep) when used along with
 368                        max_sleep_interval.
 369     max_sleep_interval:Upper bound of a range for randomized sleep before each
 370                        download (maximum possible number of seconds to sleep).
 371                        Must only be used along with sleep_interval.
 372                        Actual sleep time will be a random float from range
 373                        [sleep_interval; max_sleep_interval].
 374     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 375     listformats:       Print an overview of available video formats and exit.
 376     list_thumbnails:   Print a table of all thumbnails and exit.
 377     match_filter:      A function that gets called with the info_dict of
 378                        every video.
 379                        If it returns a message, the video is ignored.
 380                        If it returns None, the video is downloaded.
 381                        match_filter_func in utils.py is one example for this.
 382     no_color:          Do not emit color codes in output.
 383     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 384                        HTTP header
 385     geo_bypass_country:
 386                        Two-letter ISO 3166-2 country code that will be used for
 387                        explicit geographic restriction bypassing via faking
 388                        X-Forwarded-For HTTP header
 389     geo_bypass_ip_block:
 390                        IP range in CIDR notation that will be used similarly to
 391                        geo_bypass_country
 392
 393     The following options determine which downloader is picked:
 394     external_downloader: A dictionary of protocol keys and the executable of the
 395                        external downloader to use for it. The allowed protocols
 396                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 397                        Set the value to 'native' to use the native downloader
 398     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 399                        or {'m3u8': 'ffmpeg'} instead.
 400                        Use the native HLS downloader instead of ffmpeg/avconv
 401                        if True, otherwise use ffmpeg/avconv if False, otherwise
 402                        use downloader suggested by extractor if None.
 403     compat_opts:       Compatibility options. See "Differences in default behavior".
 404                        The following options do not work when used through the API:
 405                        filename, abort-on-error, multistreams, no-live-chat,
 406                        no-clean-infojson, no-playlist-metafiles.
 407                        Refer __init__.py for their implementation
 408
 409     The following parameters are not used by YoutubeDL itself, they are used by
 410     the downloader (see yt_dlp/downloader/common.py):
 411     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 412     max_filesize, test, noresizebuffer, retries, continuedl, noprogress, consoletitle,
 413     xattr_set_filesize, external_downloader_args, hls_use_mpegts, http_chunk_size.
 414
 415     The following options are used by the post processors:
 416     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 417                        otherwise prefer ffmpeg. (avconv support is deprecated)
 418     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 419                        to the binary or its containing directory.
 420     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 421                         and a list of additional command-line arguments for the
 422                         postprocessor/executable. The dict can also have "PP+EXE" keys
 423                         which are used when the given exe is used by the given PP.
 424                         Use 'default' as the name for arguments to passed to all PP
 425
 426     The following options are used by the extractors:
 427     extractor_retries: Number of times to retry for known errors
 428     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 429     hls_split_discontinuity: Split HLS playlists to different formats at
 430                        discontinuities such as ad breaks (default: False)
 431     extractor_args:    A dictionary of arguments to be passed to the extractors.
 432                        See "EXTRACTOR ARGUMENTS" for details.
 433                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 434     youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
 435                        If True (default), DASH manifests and related
 436                        data will be downloaded and processed by extractor.
 437                        You can reduce network I/O by disabling it if you don't
 438                        care about DASH. (only for youtube)
 439     youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
 440                        If True (default), HLS manifests and related
 441                        data will be downloaded and processed by extractor.
 442                        You can reduce network I/O by disabling it if you don't
 443                        care about HLS. (only for youtube)
 444     """
 445
 446     _NUMERIC_FIELDS = set((
 447         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 448         'timestamp', 'upload_year', 'upload_month', 'upload_day',
 449         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 450         'average_rating', 'comment_count', 'age_limit',
 451         'start_time', 'end_time',
 452         'chapter_number', 'season_number', 'episode_number',
 453         'track_number', 'disc_number', 'release_year',
 454         'playlist_index',
 455     ))
 456
 457     params = None
 458     _ies = []
 459     _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 460     _printed_messages = set()
 461     _first_webpage_request = True
 462     _download_retcode = None
 463     _num_downloads = None
 464     _playlist_level = 0
 465     _playlist_urls = set()
 466     _screen_file = None
 467
 468     def __init__(self, params=None, auto_init=True):
 469         """Create a FileDownloader object with the given options."""
 470         if params is None:
 471             params = {}
 472         self._ies = []
 473         self._ies_instances = {}
 474         self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 475         self._printed_messages = set()
 476         self._first_webpage_request = True
 477         self._post_hooks = []
 478         self._progress_hooks = []
 479         self._download_retcode = 0
 480         self._num_downloads = 0
 481         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 482         self._err_file = sys.stderr
 483         self.params = {
 484             # Default parameters
 485             'nocheckcertificate': False,
 486         }
 487         self.params.update(params)
 488         self.cache = Cache(self)
 489
 490         if sys.version_info < (3, 6):
 491             self.report_warning(
 492                 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 493
 494         def check_deprecated(param, option, suggestion):
 495             if self.params.get(param) is not None:
 496                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 497                 return True
 498             return False
 499
 500         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 501             if self.params.get('geo_verification_proxy') is None:
 502                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 503
 504         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 505         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 506         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 507
 508         for msg in self.params.get('warnings', []):
 509             self.report_warning(msg)
 510
 511         if self.params.get('final_ext'):
 512             if self.params.get('merge_output_format'):
 513                 self.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
 514             self.params['merge_output_format'] = self.params['final_ext']
 515
 516         if 'overwrites' in self.params and self.params['overwrites'] is None:
 517             del self.params['overwrites']
 518
 519         if params.get('bidi_workaround', False):
 520             try:
 521                 import pty
 522                 master, slave = pty.openpty()
 523                 width = compat_get_terminal_size().columns
 524                 if width is None:
 525                     width_args = []
 526                 else:
 527                     width_args = ['-w', str(width)]
 528                 sp_kwargs = dict(
 529                     stdin=subprocess.PIPE,
 530                     stdout=slave,
 531                     stderr=self._err_file)
 532                 try:
 533                     self._output_process = subprocess.Popen(
 534                         ['bidiv'] + width_args, **sp_kwargs
 535                     )
 536                 except OSError:
 537                     self._output_process = subprocess.Popen(
 538                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 539                 self._output_channel = os.fdopen(master, 'rb')
 540             except OSError as ose:
 541                 if ose.errno == errno.ENOENT:
 542                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 543                 else:
 544                     raise
 545
 546         if (sys.platform != 'win32'
 547                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 548                 and not params.get('restrictfilenames', False)):
 549             # Unicode filesystem API will throw errors (#1474, #13027)
 550             self.report_warning(
 551                 'Assuming --restrict-filenames since file system encoding '
 552                 'cannot encode all characters. '
 553                 'Set the LC_ALL environment variable to fix this.')
 554             self.params['restrictfilenames'] = True
 555
 556         self.outtmpl_dict = self.parse_outtmpl()
 557
 558         # Creating format selector here allows us to catch syntax errors before the extraction
 559         self.format_selector = (
 560             None if self.params.get('format') is None
 561             else self.build_format_selector(self.params['format']))
 562
 563         self._setup_opener()
 564
 565         """Preload the archive, if any is specified"""
 566         def preload_download_archive(fn):
 567             if fn is None:
 568                 return False
 569             self.write_debug('Loading archive file %r\n' % fn)
 570             try:
 571                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 572                     for line in archive_file:
 573                         self.archive.add(line.strip())
 574             except IOError as ioe:
 575                 if ioe.errno != errno.ENOENT:
 576                     raise
 577                 return False
 578             return True
 579
 580         self.archive = set()
 581         preload_download_archive(self.params.get('download_archive'))
 582
 583         if auto_init:
 584             self.print_debug_header()
 585             self.add_default_info_extractors()
 586
 587         for pp_def_raw in self.params.get('postprocessors', []):
 588             pp_def = dict(pp_def_raw)
 589             when = pp_def.pop('when', 'post_process')
 590             pp_class = get_postprocessor(pp_def.pop('key'))
 591             pp = pp_class(self, **compat_kwargs(pp_def))
 592             self.add_post_processor(pp, when=when)
 593
 594         for ph in self.params.get('post_hooks', []):
 595             self.add_post_hook(ph)
 596
 597         for ph in self.params.get('progress_hooks', []):
 598             self.add_progress_hook(ph)
 599
 600         register_socks_protocols()
 601
 602     def warn_if_short_id(self, argv):
 603         # short YouTube ID starting with dash?
 604         idxs = [
 605             i for i, a in enumerate(argv)
 606             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 607         if idxs:
 608             correct_argv = (
 609                 ['yt-dlp']
 610                 + [a for i, a in enumerate(argv) if i not in idxs]
 611                 + ['--'] + [argv[i] for i in idxs]
 612             )
 613             self.report_warning(
 614                 'Long argument string detected. '
 615                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 616                 args_to_str(correct_argv))
 617
 618     def add_info_extractor(self, ie):
 619         """Add an InfoExtractor object to the end of the list."""
 620         self._ies.append(ie)
 621         if not isinstance(ie, type):
 622             self._ies_instances[ie.ie_key()] = ie
 623             ie.set_downloader(self)
 624
 625     def get_info_extractor(self, ie_key):
 626         """
 627         Get an instance of an IE with name ie_key, it will try to get one from
 628         the _ies list, if there's no instance it will create a new one and add
 629         it to the extractor list.
 630         """
 631         ie = self._ies_instances.get(ie_key)
 632         if ie is None:
 633             ie = get_info_extractor(ie_key)()
 634             self.add_info_extractor(ie)
 635         return ie
 636
 637     def add_default_info_extractors(self):
 638         """
 639         Add the InfoExtractors returned by gen_extractors to the end of the list
 640         """
 641         for ie in gen_extractor_classes():
 642             self.add_info_extractor(ie)
 643
 644     def add_post_processor(self, pp, when='post_process'):
 645         """Add a PostProcessor object to the end of the chain."""
 646         self._pps[when].append(pp)
 647         pp.set_downloader(self)
 648
 649     def add_post_hook(self, ph):
 650         """Add the post hook"""
 651         self._post_hooks.append(ph)
 652
 653     def add_progress_hook(self, ph):
 654         """Add the progress hook (currently only for the file downloader)"""
 655         self._progress_hooks.append(ph)
 656
 657     def _bidi_workaround(self, message):
 658         if not hasattr(self, '_output_channel'):
 659             return message
 660
 661         assert hasattr(self, '_output_process')
 662         assert isinstance(message, compat_str)
 663         line_count = message.count('\n') + 1
 664         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 665         self._output_process.stdin.flush()
 666         res = ''.join(self._output_channel.readline().decode('utf-8')
 667                       for _ in range(line_count))
 668         return res[:-len('\n')]
 669
 670     def _write_string(self, message, out=None, only_once=False):
 671         if only_once:
 672             if message in self._printed_messages:
 673                 return
 674             self._printed_messages.add(message)
 675         write_string(message, out=out, encoding=self.params.get('encoding'))
 676
 677     def to_stdout(self, message, skip_eol=False, quiet=False):
 678         """Print message to stdout"""
 679         if self.params.get('logger'):
 680             self.params['logger'].debug(message)
 681         elif not quiet or self.params.get('verbose'):
 682             self._write_string(
 683                 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 684                 self._err_file if quiet else self._screen_file)
 685
 686     def to_stderr(self, message, only_once=False):
 687         """Print message to stderr"""
 688         assert isinstance(message, compat_str)
 689         if self.params.get('logger'):
 690             self.params['logger'].error(message)
 691         else:
 692             self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
 693
 694     def to_console_title(self, message):
 695         if not self.params.get('consoletitle', False):
 696             return
 697         if compat_os_name == 'nt':
 698             if ctypes.windll.kernel32.GetConsoleWindow():
 699                 # c_wchar_p() might not be necessary if `message` is
 700                 # already of type unicode()
 701                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 702         elif 'TERM' in os.environ:
 703             self._write_string('\033]0;%s\007' % message, self._screen_file)
 704
 705     def save_console_title(self):
 706         if not self.params.get('consoletitle', False):
 707             return
 708         if self.params.get('simulate', False):
 709             return
 710         if compat_os_name != 'nt' and 'TERM' in os.environ:
 711             # Save the title on stack
 712             self._write_string('\033[22;0t', self._screen_file)
 713
 714     def restore_console_title(self):
 715         if not self.params.get('consoletitle', False):
 716             return
 717         if self.params.get('simulate', False):
 718             return
 719         if compat_os_name != 'nt' and 'TERM' in os.environ:
 720             # Restore the title from stack
 721             self._write_string('\033[23;0t', self._screen_file)
 722
 723     def __enter__(self):
 724         self.save_console_title()
 725         return self
 726
 727     def __exit__(self, *args):
 728         self.restore_console_title()
 729
 730         if self.params.get('cookiefile') is not None:
 731             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 732
 733     def trouble(self, message=None, tb=None):
 734         """Determine action to take when a download problem appears.
 735
 736         Depending on if the downloader has been configured to ignore
 737         download errors or not, this method may throw an exception or
 738         not when errors are found, after printing the message.
 739
 740         tb, if given, is additional traceback information.
 741         """
 742         if message is not None:
 743             self.to_stderr(message)
 744         if self.params.get('verbose'):
 745             if tb is None:
 746                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 747                     tb = ''
 748                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 749                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 750                     tb += encode_compat_str(traceback.format_exc())
 751                 else:
 752                     tb_data = traceback.format_list(traceback.extract_stack())
 753                     tb = ''.join(tb_data)
 754             if tb:
 755                 self.to_stderr(tb)
 756         if not self.params.get('ignoreerrors', False):
 757             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 758                 exc_info = sys.exc_info()[1].exc_info
 759             else:
 760                 exc_info = sys.exc_info()
 761             raise DownloadError(message, exc_info)
 762         self._download_retcode = 1
 763
 764     def to_screen(self, message, skip_eol=False):
 765         """Print message to stdout if not in quiet mode"""
 766         self.to_stdout(
 767             message, skip_eol, quiet=self.params.get('quiet', False))
 768
 769     def report_warning(self, message, only_once=False):
 770         '''
 771         Print the message to stderr, it will be prefixed with 'WARNING:'
 772         If stderr is a tty file the 'WARNING:' will be colored
 773         '''
 774         if self.params.get('logger') is not None:
 775             self.params['logger'].warning(message)
 776         else:
 777             if self.params.get('no_warnings'):
 778                 return
 779             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 780                 _msg_header = '\033[0;33mWARNING:\033[0m'
 781             else:
 782                 _msg_header = 'WARNING:'
 783             warning_message = '%s %s' % (_msg_header, message)
 784             self.to_stderr(warning_message, only_once)
 785
 786     def report_error(self, message, tb=None):
 787         '''
 788         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 789         in red if stderr is a tty file.
 790         '''
 791         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 792             _msg_header = '\033[0;31mERROR:\033[0m'
 793         else:
 794             _msg_header = 'ERROR:'
 795         error_message = '%s %s' % (_msg_header, message)
 796         self.trouble(error_message, tb)
 797
 798     def write_debug(self, message, only_once=False):
 799         '''Log debug message or Print message to stderr'''
 800         if not self.params.get('verbose', False):
 801             return
 802         message = '[debug] %s' % message
 803         if self.params.get('logger'):
 804             self.params['logger'].debug(message)
 805         else:
 806             self.to_stderr(message, only_once)
 807
 808     def report_file_already_downloaded(self, file_name):
 809         """Report file has already been fully downloaded."""
 810         try:
 811             self.to_screen('[download] %s has already been downloaded' % file_name)
 812         except UnicodeEncodeError:
 813             self.to_screen('[download] The file has already been downloaded')
 814
 815     def report_file_delete(self, file_name):
 816         """Report that existing file will be deleted."""
 817         try:
 818             self.to_screen('Deleting existing file %s' % file_name)
 819         except UnicodeEncodeError:
 820             self.to_screen('Deleting existing file')
 821
 822     def parse_outtmpl(self):
 823         outtmpl_dict = self.params.get('outtmpl', {})
 824         if not isinstance(outtmpl_dict, dict):
 825             outtmpl_dict = {'default': outtmpl_dict}
 826         outtmpl_dict.update({
 827             k: v for k, v in DEFAULT_OUTTMPL.items()
 828             if not outtmpl_dict.get(k)})
 829         for key, val in outtmpl_dict.items():
 830             if isinstance(val, bytes):
 831                 self.report_warning(
 832                     'Parameter outtmpl is bytes, but should be a unicode string. '
 833                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 834         return outtmpl_dict
 835
 836     def get_output_path(self, dir_type='', filename=None):
 837         paths = self.params.get('paths', {})
 838         assert isinstance(paths, dict)
 839         path = os.path.join(
 840             expand_path(paths.get('home', '').strip()),
 841             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
 842             filename or '')
 843
 844         # Temporary fix for #4787
 845         # 'Treat' all problem characters by passing filename through preferredencoding
 846         # to workaround encoding issues with subprocess on python2 @ Windows
 847         if sys.version_info < (3, 0) and sys.platform == 'win32':
 848             path = encodeFilename(path, True).decode(preferredencoding())
 849         return sanitize_path(path, force=self.params.get('windowsfilenames'))
 850
 851     @staticmethod
 852     def _outtmpl_expandpath(outtmpl):
 853         # expand_path translates '%%' into '%' and '$$' into '$'
 854         # correspondingly that is not what we want since we need to keep
 855         # '%%' intact for template dict substitution step. Working around
 856         # with boundary-alike separator hack.
 857         sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 858         outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 859
 860         # outtmpl should be expand_path'ed before template dict substitution
 861         # because meta fields may contain env variables we don't want to
 862         # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 863         # title "Hello $PATH", we don't want `$PATH` to be expanded.
 864         return expand_path(outtmpl).replace(sep, '')
 865
 866     @staticmethod
 867     def escape_outtmpl(outtmpl):
 868         ''' Escape any remaining strings like %s, %abc% etc. '''
 869         return re.sub(
 870             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
 871             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
 872             outtmpl)
 873
 874     @classmethod
 875     def validate_outtmpl(cls, outtmpl):
 876         ''' @return None or Exception object '''
 877         outtmpl = re.sub(
 878             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljq]'),
 879             lambda mobj: f'{mobj.group(0)[:-1]}s',
 880             cls._outtmpl_expandpath(outtmpl))
 881         try:
 882             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
 883             return None
 884         except ValueError as err:
 885             return err
 886
 887     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
 888         """ Make the template and info_dict suitable for substitution : ydl.outtmpl_escape(outtmpl) % info_dict """
 889         info_dict = dict(info_dict)
 890         na = self.params.get('outtmpl_na_placeholder', 'NA')
 891
 892         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
 893             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
 894             if info_dict.get('duration', None) is not None
 895             else None)
 896         info_dict['epoch'] = int(time.time())
 897         info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
 898         if info_dict.get('resolution') is None:
 899             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
 900
 901         # For fields playlist_index and autonumber convert all occurrences
 902         # of %(field)s to %(field)0Nd for backward compatibility
 903         field_size_compat_map = {
 904             'playlist_index': len(str(info_dict.get('_last_playlist_index') or '')),
 905             'autonumber': self.params.get('autonumber_size') or 5,
 906         }
 907
 908         TMPL_DICT = {}
 909         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljq]'))
 910         MATH_FUNCTIONS = {
 911             '+': float.__add__,
 912             '-': float.__sub__,
 913         }
 914         # Field is of the form key1.key2...
 915         # where keys (except first) can be string, int or slice
 916         FIELD_RE = r'\w+(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
 917         MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
 918         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
 919         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
 920             (?P<negate>-)?
 921             (?P<fields>{field})
 922             (?P<maths>(?:{math_op}{math_field})*)
 923             (?:>(?P<strf_format>.+?))?
 924             (?:\|(?P<default>.*?))?
 925             $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
 926
 927         get_key = lambda k: traverse_obj(
 928             info_dict, k.split('.'), is_user_input=True, traverse_string=True)
 929
 930         def get_value(mdict):
 931             # Object traversal
 932             value = get_key(mdict['fields'])
 933             # Negative
 934             if mdict['negate']:
 935                 value = float_or_none(value)
 936                 if value is not None:
 937                     value *= -1
 938             # Do maths
 939             offset_key = mdict['maths']
 940             if offset_key:
 941                 value = float_or_none(value)
 942                 operator = None
 943                 while offset_key:
 944                     item = re.match(
 945                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
 946                         offset_key).group(0)
 947                     offset_key = offset_key[len(item):]
 948                     if operator is None:
 949                         operator = MATH_FUNCTIONS[item]
 950                         continue
 951                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
 952                     offset = float_or_none(item)
 953                     if offset is None:
 954                         offset = float_or_none(get_key(item))
 955                     try:
 956                         value = operator(value, multiplier * offset)
 957                     except (TypeError, ZeroDivisionError):
 958                         return None
 959                     operator = None
 960             # Datetime formatting
 961             if mdict['strf_format']:
 962                 value = strftime_or_none(value, mdict['strf_format'])
 963
 964             return value
 965
 966         def create_key(outer_mobj):
 967             if not outer_mobj.group('has_key'):
 968                 return f'%{outer_mobj.group(0)}'
 969
 970             prefix = outer_mobj.group('prefix')
 971             key = outer_mobj.group('key')
 972             original_fmt = fmt = outer_mobj.group('format')
 973             mobj = re.match(INTERNAL_FORMAT_RE, key)
 974             if mobj is None:
 975                 value, default, mobj = None, na, {'fields': ''}
 976             else:
 977                 mobj = mobj.groupdict()
 978                 default = mobj['default'] if mobj['default'] is not None else na
 979                 value = get_value(mobj)
 980
 981             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
 982                 fmt = '0{:d}d'.format(field_size_compat_map[key])
 983
 984             value = default if value is None else value
 985
 986             str_fmt = f'{fmt[:-1]}s'
 987             if fmt[-1] == 'l':
 988                 value, fmt = ', '.join(variadic(value)), str_fmt
 989             elif fmt[-1] == 'j':
 990                 value, fmt = json.dumps(value), str_fmt
 991             elif fmt[-1] == 'q':
 992                 value, fmt = compat_shlex_quote(str(value)), str_fmt
 993             elif fmt[-1] == 'c':
 994                 value = str(value)
 995                 if value is None:
 996                     value, fmt = default, 's'
 997                 else:
 998                     value = value[0]
 999             elif fmt[-1] not in 'rs':  # numeric
1000                 value = float_or_none(value)
1001                 if value is None:
1002                     value, fmt = default, 's'
1003
1004             if sanitize:
1005                 if fmt[-1] == 'r':
1006                     # If value is an object, sanitize might convert it to a string
1007                     # So we convert it to repr first
1008                     value, fmt = repr(value), str_fmt
1009                 if fmt[-1] in 'csr':
1010                     value = sanitize(mobj['fields'].split('.')[-1], value)
1011
1012             key = '%s\0%s' % (key.replace('%', '%\0'), original_fmt)
1013             TMPL_DICT[key] = value
1014             return f'{prefix}%({key}){fmt}'
1015
1016         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1017
1018     def _prepare_filename(self, info_dict, tmpl_type='default'):
1019         try:
1020             sanitize = lambda k, v: sanitize_filename(
1021                 compat_str(v),
1022                 restricted=self.params.get('restrictfilenames'),
1023                 is_id=(k == 'id' or k.endswith('_id')))
1024             outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
1025             outtmpl, template_dict = self.prepare_outtmpl(outtmpl, info_dict, sanitize)
1026             outtmpl = self.escape_outtmpl(self._outtmpl_expandpath(outtmpl))
1027             filename = outtmpl % template_dict
1028
1029             force_ext = OUTTMPL_TYPES.get(tmpl_type)
1030             if force_ext is not None:
1031                 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1032
1033             # https://github.com/blackjack4494/youtube-dlc/issues/85
1034             trim_file_name = self.params.get('trim_file_name', False)
1035             if trim_file_name:
1036                 fn_groups = filename.rsplit('.')
1037                 ext = fn_groups[-1]
1038                 sub_ext = ''
1039                 if len(fn_groups) > 2:
1040                     sub_ext = fn_groups[-2]
1041                 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
1042
1043             return filename
1044         except ValueError as err:
1045             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1046             return None
1047
1048     def prepare_filename(self, info_dict, dir_type='', warn=False):
1049         """Generate the output filename."""
1050
1051         filename = self._prepare_filename(info_dict, dir_type or 'default')
1052
1053         if warn:
1054             if not self.params.get('paths'):
1055                 pass
1056             elif filename == '-':
1057                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1058             elif os.path.isabs(filename):
1059                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1060             self.__prepare_filename_warned = True
1061         if filename == '-' or not filename:
1062             return filename
1063
1064         return self.get_output_path(dir_type, filename)
1065
1066     def _match_entry(self, info_dict, incomplete=False, silent=False):
1067         """ Returns None if the file should be downloaded """
1068
1069         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1070
1071         def check_filter():
1072             if 'title' in info_dict:
1073                 # This can happen when we're just evaluating the playlist
1074                 title = info_dict['title']
1075                 matchtitle = self.params.get('matchtitle', False)
1076                 if matchtitle:
1077                     if not re.search(matchtitle, title, re.IGNORECASE):
1078                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1079                 rejecttitle = self.params.get('rejecttitle', False)
1080                 if rejecttitle:
1081                     if re.search(rejecttitle, title, re.IGNORECASE):
1082                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1083             date = info_dict.get('upload_date')
1084             if date is not None:
1085                 dateRange = self.params.get('daterange', DateRange())
1086                 if date not in dateRange:
1087                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1088             view_count = info_dict.get('view_count')
1089             if view_count is not None:
1090                 min_views = self.params.get('min_views')
1091                 if min_views is not None and view_count < min_views:
1092                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1093                 max_views = self.params.get('max_views')
1094                 if max_views is not None and view_count > max_views:
1095                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1096             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1097                 return 'Skipping "%s" because it is age restricted' % video_title
1098
1099             if not incomplete:
1100                 match_filter = self.params.get('match_filter')
1101                 if match_filter is not None:
1102                     ret = match_filter(info_dict)
1103                     if ret is not None:
1104                         return ret
1105             return None
1106
1107         if self.in_download_archive(info_dict):
1108             reason = '%s has already been recorded in the archive' % video_title
1109             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1110         else:
1111             reason = check_filter()
1112             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1113         if reason is not None:
1114             if not silent:
1115                 self.to_screen('[download] ' + reason)
1116             if self.params.get(break_opt, False):
1117                 raise break_err()
1118         return reason
1119
1120     @staticmethod
1121     def add_extra_info(info_dict, extra_info):
1122         '''Set the keys from extra_info in info dict if they are missing'''
1123         for key, value in extra_info.items():
1124             info_dict.setdefault(key, value)
1125
1126     def extract_info(self, url, download=True, ie_key=None, extra_info={},
1127                      process=True, force_generic_extractor=False):
1128         """
1129         Return a list with a dictionary for each video extracted.
1130
1131         Arguments:
1132         url -- URL to extract
1133
1134         Keyword arguments:
1135         download -- whether to download videos during extraction
1136         ie_key -- extractor key hint
1137         extra_info -- dictionary containing the extra values to add to each result
1138         process -- whether to resolve all unresolved references (URLs, playlist items),
1139             must be True for download to work.
1140         force_generic_extractor -- force using the generic extractor
1141         """
1142
1143         if not ie_key and force_generic_extractor:
1144             ie_key = 'Generic'
1145
1146         if ie_key:
1147             ies = [self.get_info_extractor(ie_key)]
1148         else:
1149             ies = self._ies
1150
1151         for ie in ies:
1152             if not ie.suitable(url):
1153                 continue
1154
1155             ie_key = ie.ie_key()
1156             ie = self.get_info_extractor(ie_key)
1157             if not ie.working():
1158                 self.report_warning('The program functionality for this site has been marked as broken, '
1159                                     'and will probably not work.')
1160
1161             try:
1162                 temp_id = str_or_none(
1163                     ie.extract_id(url) if callable(getattr(ie, 'extract_id', None))
1164                     else ie._match_id(url))
1165             except (AssertionError, IndexError, AttributeError):
1166                 temp_id = None
1167             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1168                 self.to_screen("[%s] %s: has already been recorded in archive" % (
1169                                ie_key, temp_id))
1170                 break
1171             return self.__extract_info(url, ie, download, extra_info, process)
1172         else:
1173             self.report_error('no suitable InfoExtractor for URL %s' % url)
1174
1175     def __handle_extraction_exceptions(func, handle_all_errors=True):
1176         def wrapper(self, *args, **kwargs):
1177             try:
1178                 return func(self, *args, **kwargs)
1179             except GeoRestrictedError as e:
1180                 msg = e.msg
1181                 if e.countries:
1182                     msg += '\nThis video is available in %s.' % ', '.join(
1183                         map(ISO3166Utils.short2full, e.countries))
1184                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1185                 self.report_error(msg)
1186             except ExtractorError as e:  # An error we somewhat expected
1187                 self.report_error(compat_str(e), e.format_traceback())
1188             except ThrottledDownload:
1189                 self.to_stderr('\r')
1190                 self.report_warning('The download speed is below throttle limit. Re-extracting data')
1191                 return wrapper(self, *args, **kwargs)
1192             except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
1193                 raise
1194             except Exception as e:
1195                 if handle_all_errors and self.params.get('ignoreerrors', False):
1196                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
1197                 else:
1198                     raise
1199         return wrapper
1200
1201     @__handle_extraction_exceptions
1202     def __extract_info(self, url, ie, download, extra_info, process):
1203         ie_result = ie.extract(url)
1204         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1205             return
1206         if isinstance(ie_result, list):
1207             # Backwards compatibility: old IE result format
1208             ie_result = {
1209                 '_type': 'compat_list',
1210                 'entries': ie_result,
1211             }
1212         if extra_info.get('original_url'):
1213             ie_result.setdefault('original_url', extra_info['original_url'])
1214         self.add_default_extra_info(ie_result, ie, url)
1215         if process:
1216             return self.process_ie_result(ie_result, download, extra_info)
1217         else:
1218             return ie_result
1219
1220     def add_default_extra_info(self, ie_result, ie, url):
1221         if url is not None:
1222             self.add_extra_info(ie_result, {
1223                 'webpage_url': url,
1224                 'original_url': url,
1225                 'webpage_url_basename': url_basename(url),
1226             })
1227         if ie is not None:
1228             self.add_extra_info(ie_result, {
1229                 'extractor': ie.IE_NAME,
1230                 'extractor_key': ie.ie_key(),
1231             })
1232
1233     def process_ie_result(self, ie_result, download=True, extra_info={}):
1234         """
1235         Take the result of the ie(may be modified) and resolve all unresolved
1236         references (URLs, playlist items).
1237
1238         It will also download the videos if 'download'.
1239         Returns the resolved ie_result.
1240         """
1241         result_type = ie_result.get('_type', 'video')
1242
1243         if result_type in ('url', 'url_transparent'):
1244             ie_result['url'] = sanitize_url(ie_result['url'])
1245             if ie_result.get('original_url'):
1246                 extra_info.setdefault('original_url', ie_result['original_url'])
1247
1248             extract_flat = self.params.get('extract_flat', False)
1249             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1250                     or extract_flat is True):
1251                 info_copy = ie_result.copy()
1252                 self.add_extra_info(info_copy, extra_info)
1253                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1254                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1255                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1256                 return ie_result
1257
1258         if result_type == 'video':
1259             self.add_extra_info(ie_result, extra_info)
1260             ie_result = self.process_video_result(ie_result, download=download)
1261             additional_urls = (ie_result or {}).get('additional_urls')
1262             if additional_urls:
1263                 # TODO: Improve MetadataFromFieldPP to allow setting a list
1264                 if isinstance(additional_urls, compat_str):
1265                     additional_urls = [additional_urls]
1266                 self.to_screen(
1267                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1268                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1269                 ie_result['additional_entries'] = [
1270                     self.extract_info(
1271                         url, download, extra_info,
1272                         force_generic_extractor=self.params.get('force_generic_extractor'))
1273                     for url in additional_urls
1274                 ]
1275             return ie_result
1276         elif result_type == 'url':
1277             # We have to add extra_info to the results because it may be
1278             # contained in a playlist
1279             return self.extract_info(
1280                 ie_result['url'], download,
1281                 ie_key=ie_result.get('ie_key'),
1282                 extra_info=extra_info)
1283         elif result_type == 'url_transparent':
1284             # Use the information from the embedding page
1285             info = self.extract_info(
1286                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1287                 extra_info=extra_info, download=False, process=False)
1288
1289             # extract_info may return None when ignoreerrors is enabled and
1290             # extraction failed with an error, don't crash and return early
1291             # in this case
1292             if not info:
1293                 return info
1294
1295             force_properties = dict(
1296                 (k, v) for k, v in ie_result.items() if v is not None)
1297             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1298                 if f in force_properties:
1299                     del force_properties[f]
1300             new_result = info.copy()
1301             new_result.update(force_properties)
1302
1303             # Extracted info may not be a video result (i.e.
1304             # info.get('_type', 'video') != video) but rather an url or
1305             # url_transparent. In such cases outer metadata (from ie_result)
1306             # should be propagated to inner one (info). For this to happen
1307             # _type of info should be overridden with url_transparent. This
1308             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1309             if new_result.get('_type') == 'url':
1310                 new_result['_type'] = 'url_transparent'
1311
1312             return self.process_ie_result(
1313                 new_result, download=download, extra_info=extra_info)
1314         elif result_type in ('playlist', 'multi_video'):
1315             # Protect from infinite recursion due to recursively nested playlists
1316             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1317             webpage_url = ie_result['webpage_url']
1318             if webpage_url in self._playlist_urls:
1319                 self.to_screen(
1320                     '[download] Skipping already downloaded playlist: %s'
1321                     % ie_result.get('title') or ie_result.get('id'))
1322                 return
1323
1324             self._playlist_level += 1
1325             self._playlist_urls.add(webpage_url)
1326             self._sanitize_thumbnails(ie_result)
1327             try:
1328                 return self.__process_playlist(ie_result, download)
1329             finally:
1330                 self._playlist_level -= 1
1331                 if not self._playlist_level:
1332                     self._playlist_urls.clear()
1333         elif result_type == 'compat_list':
1334             self.report_warning(
1335                 'Extractor %s returned a compat_list result. '
1336                 'It needs to be updated.' % ie_result.get('extractor'))
1337
1338             def _fixup(r):
1339                 self.add_extra_info(
1340                     r,
1341                     {
1342                         'extractor': ie_result['extractor'],
1343                         'webpage_url': ie_result['webpage_url'],
1344                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
1345                         'extractor_key': ie_result['extractor_key'],
1346                     }
1347                 )
1348                 return r
1349             ie_result['entries'] = [
1350                 self.process_ie_result(_fixup(r), download, extra_info)
1351                 for r in ie_result['entries']
1352             ]
1353             return ie_result
1354         else:
1355             raise Exception('Invalid result type: %s' % result_type)
1356
1357     def _ensure_dir_exists(self, path):
1358         return make_dir(path, self.report_error)
1359
1360     def __process_playlist(self, ie_result, download):
1361         # We process each entry in the playlist
1362         playlist = ie_result.get('title') or ie_result.get('id')
1363         self.to_screen('[download] Downloading playlist: %s' % playlist)
1364
1365         if 'entries' not in ie_result:
1366             raise EntryNotInPlaylist()
1367         incomplete_entries = bool(ie_result.get('requested_entries'))
1368         if incomplete_entries:
1369             def fill_missing_entries(entries, indexes):
1370                 ret = [None] * max(*indexes)
1371                 for i, entry in zip(indexes, entries):
1372                     ret[i - 1] = entry
1373                 return ret
1374             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1375
1376         playlist_results = []
1377
1378         playliststart = self.params.get('playliststart', 1)
1379         playlistend = self.params.get('playlistend')
1380         # For backwards compatibility, interpret -1 as whole list
1381         if playlistend == -1:
1382             playlistend = None
1383
1384         playlistitems_str = self.params.get('playlist_items')
1385         playlistitems = None
1386         if playlistitems_str is not None:
1387             def iter_playlistitems(format):
1388                 for string_segment in format.split(','):
1389                     if '-' in string_segment:
1390                         start, end = string_segment.split('-')
1391                         for item in range(int(start), int(end) + 1):
1392                             yield int(item)
1393                     else:
1394                         yield int(string_segment)
1395             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1396
1397         ie_entries = ie_result['entries']
1398         msg = (
1399             'Downloading %d videos' if not isinstance(ie_entries, list)
1400             else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
1401         if not isinstance(ie_entries, (list, PagedList)):
1402             ie_entries = LazyList(ie_entries)
1403
1404         def get_entry(i):
1405             return YoutubeDL.__handle_extraction_exceptions(
1406                 lambda self, i: ie_entries[i - 1],
1407                 False
1408             )(self, i)
1409
1410         entries = []
1411         for i in playlistitems or itertools.count(playliststart):
1412             if playlistitems is None and playlistend is not None and playlistend < i:
1413                 break
1414             entry = None
1415             try:
1416                 entry = get_entry(i)
1417                 if entry is None:
1418                     raise EntryNotInPlaylist()
1419             except (IndexError, EntryNotInPlaylist):
1420                 if incomplete_entries:
1421                     raise EntryNotInPlaylist()
1422                 elif not playlistitems:
1423                     break
1424             entries.append(entry)
1425             try:
1426                 if entry is not None:
1427                     self._match_entry(entry, incomplete=True, silent=True)
1428             except (ExistingVideoReached, RejectedVideoReached):
1429                 break
1430         ie_result['entries'] = entries
1431
1432         # Save playlist_index before re-ordering
1433         entries = [
1434             ((playlistitems[i - 1] if playlistitems else i), entry)
1435             for i, entry in enumerate(entries, 1)
1436             if entry is not None]
1437         n_entries = len(entries)
1438
1439         if not playlistitems and (playliststart or playlistend):
1440             playlistitems = list(range(playliststart, playliststart + n_entries))
1441         ie_result['requested_entries'] = playlistitems
1442
1443         if self.params.get('allow_playlist_files', True):
1444             ie_copy = {
1445                 'playlist': playlist,
1446                 'playlist_id': ie_result.get('id'),
1447                 'playlist_title': ie_result.get('title'),
1448                 'playlist_uploader': ie_result.get('uploader'),
1449                 'playlist_uploader_id': ie_result.get('uploader_id'),
1450                 'playlist_index': 0,
1451             }
1452             ie_copy.update(dict(ie_result))
1453
1454             if self.params.get('writeinfojson', False):
1455                 infofn = self.prepare_filename(ie_copy, 'pl_infojson')
1456                 if not self._ensure_dir_exists(encodeFilename(infofn)):
1457                     return
1458                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
1459                     self.to_screen('[info] Playlist metadata is already present')
1460                 else:
1461                     self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
1462                     try:
1463                         write_json_file(self.filter_requested_info(ie_result, self.params.get('clean_infojson', True)), infofn)
1464                     except (OSError, IOError):
1465                         self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
1466
1467             # TODO: This should be passed to ThumbnailsConvertor if necessary
1468             self._write_thumbnails(ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1469
1470             if self.params.get('writedescription', False):
1471                 descfn = self.prepare_filename(ie_copy, 'pl_description')
1472                 if not self._ensure_dir_exists(encodeFilename(descfn)):
1473                     return
1474                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
1475                     self.to_screen('[info] Playlist description is already present')
1476                 elif ie_result.get('description') is None:
1477                     self.report_warning('There\'s no playlist description to write.')
1478                 else:
1479                     try:
1480                         self.to_screen('[info] Writing playlist description to: ' + descfn)
1481                         with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1482                             descfile.write(ie_result['description'])
1483                     except (OSError, IOError):
1484                         self.report_error('Cannot write playlist description file ' + descfn)
1485                         return
1486
1487         if self.params.get('playlistreverse', False):
1488             entries = entries[::-1]
1489         if self.params.get('playlistrandom', False):
1490             random.shuffle(entries)
1491
1492         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1493
1494         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1495         failures = 0
1496         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1497         for i, entry_tuple in enumerate(entries, 1):
1498             playlist_index, entry = entry_tuple
1499             if 'playlist_index' in self.params.get('compat_options', []):
1500                 playlist_index = playlistitems[i - 1] if playlistitems else i
1501             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1502             # This __x_forwarded_for_ip thing is a bit ugly but requires
1503             # minimal changes
1504             if x_forwarded_for:
1505                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1506             extra = {
1507                 'n_entries': n_entries,
1508                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1509                 'playlist_index': playlist_index,
1510                 'playlist_autonumber': i,
1511                 'playlist': playlist,
1512                 'playlist_id': ie_result.get('id'),
1513                 'playlist_title': ie_result.get('title'),
1514                 'playlist_uploader': ie_result.get('uploader'),
1515                 'playlist_uploader_id': ie_result.get('uploader_id'),
1516                 'extractor': ie_result['extractor'],
1517                 'webpage_url': ie_result['webpage_url'],
1518                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1519                 'extractor_key': ie_result['extractor_key'],
1520             }
1521
1522             if self._match_entry(entry, incomplete=True) is not None:
1523                 continue
1524
1525             entry_result = self.__process_iterable_entry(entry, download, extra)
1526             if not entry_result:
1527                 failures += 1
1528             if failures >= max_failures:
1529                 self.report_error(
1530                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1531                 break
1532             # TODO: skip failed (empty) entries?
1533             playlist_results.append(entry_result)
1534         ie_result['entries'] = playlist_results
1535         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1536         return ie_result
1537
1538     @__handle_extraction_exceptions
1539     def __process_iterable_entry(self, entry, download, extra_info):
1540         return self.process_ie_result(
1541             entry, download=download, extra_info=extra_info)
1542
1543     def _build_format_filter(self, filter_spec):
1544         " Returns a function to filter the formats according to the filter_spec "
1545
1546         OPERATORS = {
1547             '<': operator.lt,
1548             '<=': operator.le,
1549             '>': operator.gt,
1550             '>=': operator.ge,
1551             '=': operator.eq,
1552             '!=': operator.ne,
1553         }
1554         operator_rex = re.compile(r'''(?x)\s*
1555             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1556             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1557             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1558             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1559         m = operator_rex.fullmatch(filter_spec)
1560         if m:
1561             try:
1562                 comparison_value = int(m.group('value'))
1563             except ValueError:
1564                 comparison_value = parse_filesize(m.group('value'))
1565                 if comparison_value is None:
1566                     comparison_value = parse_filesize(m.group('value') + 'B')
1567                 if comparison_value is None:
1568                     raise ValueError(
1569                         'Invalid value %r in format specification %r' % (
1570                             m.group('value'), filter_spec))
1571             op = OPERATORS[m.group('op')]
1572
1573         if not m:
1574             STR_OPERATORS = {
1575                 '=': operator.eq,
1576                 '^=': lambda attr, value: attr.startswith(value),
1577                 '$=': lambda attr, value: attr.endswith(value),
1578                 '*=': lambda attr, value: value in attr,
1579             }
1580             str_operator_rex = re.compile(r'''(?x)\s*
1581                 (?P<key>[a-zA-Z0-9._-]+)\s*
1582                 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1583                 (?P<value>[a-zA-Z0-9._-]+)\s*
1584                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1585             m = str_operator_rex.fullmatch(filter_spec)
1586             if m:
1587                 comparison_value = m.group('value')
1588                 str_op = STR_OPERATORS[m.group('op')]
1589                 if m.group('negation'):
1590                     op = lambda attr, value: not str_op(attr, value)
1591                 else:
1592                     op = str_op
1593
1594         if not m:
1595             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1596
1597         def _filter(f):
1598             actual_value = f.get(m.group('key'))
1599             if actual_value is None:
1600                 return m.group('none_inclusive')
1601             return op(actual_value, comparison_value)
1602         return _filter
1603
1604     def _default_format_spec(self, info_dict, download=True):
1605
1606         def can_merge():
1607             merger = FFmpegMergerPP(self)
1608             return merger.available and merger.can_merge()
1609
1610         prefer_best = (
1611             not self.params.get('simulate', False)
1612             and download
1613             and (
1614                 not can_merge()
1615                 or info_dict.get('is_live', False)
1616                 or self.outtmpl_dict['default'] == '-'))
1617         compat = (
1618             prefer_best
1619             or self.params.get('allow_multiple_audio_streams', False)
1620             or 'format-spec' in self.params.get('compat_opts', []))
1621
1622         return (
1623             'best/bestvideo+bestaudio' if prefer_best
1624             else 'bestvideo*+bestaudio/best' if not compat
1625             else 'bestvideo+bestaudio/best')
1626
1627     def build_format_selector(self, format_spec):
1628         def syntax_error(note, start):
1629             message = (
1630                 'Invalid format specification: '
1631                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1632             return SyntaxError(message)
1633
1634         PICKFIRST = 'PICKFIRST'
1635         MERGE = 'MERGE'
1636         SINGLE = 'SINGLE'
1637         GROUP = 'GROUP'
1638         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1639
1640         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1641                                   'video': self.params.get('allow_multiple_video_streams', False)}
1642
1643         check_formats = self.params.get('check_formats')
1644
1645         def _parse_filter(tokens):
1646             filter_parts = []
1647             for type, string, start, _, _ in tokens:
1648                 if type == tokenize.OP and string == ']':
1649                     return ''.join(filter_parts)
1650                 else:
1651                     filter_parts.append(string)
1652
1653         def _remove_unused_ops(tokens):
1654             # Remove operators that we don't use and join them with the surrounding strings
1655             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1656             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1657             last_string, last_start, last_end, last_line = None, None, None, None
1658             for type, string, start, end, line in tokens:
1659                 if type == tokenize.OP and string == '[':
1660                     if last_string:
1661                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1662                         last_string = None
1663                     yield type, string, start, end, line
1664                     # everything inside brackets will be handled by _parse_filter
1665                     for type, string, start, end, line in tokens:
1666                         yield type, string, start, end, line
1667                         if type == tokenize.OP and string == ']':
1668                             break
1669                 elif type == tokenize.OP and string in ALLOWED_OPS:
1670                     if last_string:
1671                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1672                         last_string = None
1673                     yield type, string, start, end, line
1674                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1675                     if not last_string:
1676                         last_string = string
1677                         last_start = start
1678                         last_end = end
1679                     else:
1680                         last_string += string
1681             if last_string:
1682                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1683
1684         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1685             selectors = []
1686             current_selector = None
1687             for type, string, start, _, _ in tokens:
1688                 # ENCODING is only defined in python 3.x
1689                 if type == getattr(tokenize, 'ENCODING', None):
1690                     continue
1691                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1692                     current_selector = FormatSelector(SINGLE, string, [])
1693                 elif type == tokenize.OP:
1694                     if string == ')':
1695                         if not inside_group:
1696                             # ')' will be handled by the parentheses group
1697                             tokens.restore_last_token()
1698                         break
1699                     elif inside_merge and string in ['/', ',']:
1700                         tokens.restore_last_token()
1701                         break
1702                     elif inside_choice and string == ',':
1703                         tokens.restore_last_token()
1704                         break
1705                     elif string == ',':
1706                         if not current_selector:
1707                             raise syntax_error('"," must follow a format selector', start)
1708                         selectors.append(current_selector)
1709                         current_selector = None
1710                     elif string == '/':
1711                         if not current_selector:
1712                             raise syntax_error('"/" must follow a format selector', start)
1713                         first_choice = current_selector
1714                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1715                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1716                     elif string == '[':
1717                         if not current_selector:
1718                             current_selector = FormatSelector(SINGLE, 'best', [])
1719                         format_filter = _parse_filter(tokens)
1720                         current_selector.filters.append(format_filter)
1721                     elif string == '(':
1722                         if current_selector:
1723                             raise syntax_error('Unexpected "("', start)
1724                         group = _parse_format_selection(tokens, inside_group=True)
1725                         current_selector = FormatSelector(GROUP, group, [])
1726                     elif string == '+':
1727                         if not current_selector:
1728                             raise syntax_error('Unexpected "+"', start)
1729                         selector_1 = current_selector
1730                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1731                         if not selector_2:
1732                             raise syntax_error('Expected a selector', start)
1733                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1734                     else:
1735                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1736                 elif type == tokenize.ENDMARKER:
1737                     break
1738             if current_selector:
1739                 selectors.append(current_selector)
1740             return selectors
1741
1742         def _merge(formats_pair):
1743             format_1, format_2 = formats_pair
1744
1745             formats_info = []
1746             formats_info.extend(format_1.get('requested_formats', (format_1,)))
1747             formats_info.extend(format_2.get('requested_formats', (format_2,)))
1748
1749             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1750                 get_no_more = {'video': False, 'audio': False}
1751                 for (i, fmt_info) in enumerate(formats_info):
1752                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
1753                         formats_info.pop(i)
1754                         continue
1755                     for aud_vid in ['audio', 'video']:
1756                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1757                             if get_no_more[aud_vid]:
1758                                 formats_info.pop(i)
1759                             get_no_more[aud_vid] = True
1760
1761             if len(formats_info) == 1:
1762                 return formats_info[0]
1763
1764             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1765             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1766
1767             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1768             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1769
1770             output_ext = self.params.get('merge_output_format')
1771             if not output_ext:
1772                 if the_only_video:
1773                     output_ext = the_only_video['ext']
1774                 elif the_only_audio and not video_fmts:
1775                     output_ext = the_only_audio['ext']
1776                 else:
1777                     output_ext = 'mkv'
1778
1779             new_dict = {
1780                 'requested_formats': formats_info,
1781                 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1782                 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1783                 'ext': output_ext,
1784             }
1785
1786             if the_only_video:
1787                 new_dict.update({
1788                     'width': the_only_video.get('width'),
1789                     'height': the_only_video.get('height'),
1790                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1791                     'fps': the_only_video.get('fps'),
1792                     'vcodec': the_only_video.get('vcodec'),
1793                     'vbr': the_only_video.get('vbr'),
1794                     'stretched_ratio': the_only_video.get('stretched_ratio'),
1795                 })
1796
1797             if the_only_audio:
1798                 new_dict.update({
1799                     'acodec': the_only_audio.get('acodec'),
1800                     'abr': the_only_audio.get('abr'),
1801                 })
1802
1803             return new_dict
1804
1805         def _check_formats(formats):
1806             if not check_formats:
1807                 yield from formats
1808                 return
1809             for f in formats:
1810                 self.to_screen('[info] Testing format %s' % f['format_id'])
1811                 temp_file = tempfile.NamedTemporaryFile(
1812                     suffix='.tmp', delete=False,
1813                     dir=self.get_output_path('temp') or None)
1814                 temp_file.close()
1815                 try:
1816                     success, _ = self.dl(temp_file.name, f, test=True)
1817                 except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1818                     success = False
1819                 finally:
1820                     if os.path.exists(temp_file.name):
1821                         try:
1822                             os.remove(temp_file.name)
1823                         except OSError:
1824                             self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1825                 if success:
1826                     yield f
1827                 else:
1828                     self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1829
1830         def _build_selector_function(selector):
1831             if isinstance(selector, list):  # ,
1832                 fs = [_build_selector_function(s) for s in selector]
1833
1834                 def selector_function(ctx):
1835                     for f in fs:
1836                         yield from f(ctx)
1837                 return selector_function
1838
1839             elif selector.type == GROUP:  # ()
1840                 selector_function = _build_selector_function(selector.selector)
1841
1842             elif selector.type == PICKFIRST:  # /
1843                 fs = [_build_selector_function(s) for s in selector.selector]
1844
1845                 def selector_function(ctx):
1846                     for f in fs:
1847                         picked_formats = list(f(ctx))
1848                         if picked_formats:
1849                             return picked_formats
1850                     return []
1851
1852             elif selector.type == MERGE:  # +
1853                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1854
1855                 def selector_function(ctx):
1856                     for pair in itertools.product(
1857                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1858                         yield _merge(pair)
1859
1860             elif selector.type == SINGLE:  # atom
1861                 format_spec = selector.selector or 'best'
1862
1863                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
1864                 if format_spec == 'all':
1865                     def selector_function(ctx):
1866                         yield from _check_formats(ctx['formats'])
1867                 elif format_spec == 'mergeall':
1868                     def selector_function(ctx):
1869                         formats = list(_check_formats(ctx['formats']))
1870                         if not formats:
1871                             return
1872                         merged_format = formats[-1]
1873                         for f in formats[-2::-1]:
1874                             merged_format = _merge((merged_format, f))
1875                         yield merged_format
1876
1877                 else:
1878                     format_fallback, format_reverse, format_idx = False, True, 1
1879                     mobj = re.match(
1880                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
1881                         format_spec)
1882                     if mobj is not None:
1883                         format_idx = int_or_none(mobj.group('n'), default=1)
1884                         format_reverse = mobj.group('bw')[0] == 'b'
1885                         format_type = (mobj.group('type') or [None])[0]
1886                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
1887                         format_modified = mobj.group('mod') is not None
1888
1889                         format_fallback = not format_type and not format_modified  # for b, w
1890                         _filter_f = (
1891                             (lambda f: f.get('%scodec' % format_type) != 'none')
1892                             if format_type and format_modified  # bv*, ba*, wv*, wa*
1893                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
1894                             if format_type  # bv, ba, wv, wa
1895                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1896                             if not format_modified  # b, w
1897                             else lambda f: True)  # b*, w*
1898                         filter_f = lambda f: _filter_f(f) and (
1899                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
1900                     else:
1901                         filter_f = ((lambda f: f.get('ext') == format_spec)
1902                                     if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']  # extension
1903                                     else (lambda f: f.get('format_id') == format_spec))  # id
1904
1905                     def selector_function(ctx):
1906                         formats = list(ctx['formats'])
1907                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
1908                         if format_fallback and ctx['incomplete_formats'] and not matches:
1909                             # for extractors with incomplete formats (audio only (soundcloud)
1910                             # or video only (imgur)) best/worst will fallback to
1911                             # best/worst {video,audio}-only format
1912                             matches = formats
1913                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
1914                         try:
1915                             yield matches[format_idx - 1]
1916                         except IndexError:
1917                             return
1918
1919             filters = [self._build_format_filter(f) for f in selector.filters]
1920
1921             def final_selector(ctx):
1922                 ctx_copy = copy.deepcopy(ctx)
1923                 for _filter in filters:
1924                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1925                 return selector_function(ctx_copy)
1926             return final_selector
1927
1928         stream = io.BytesIO(format_spec.encode('utf-8'))
1929         try:
1930             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1931         except tokenize.TokenError:
1932             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1933
1934         class TokenIterator(object):
1935             def __init__(self, tokens):
1936                 self.tokens = tokens
1937                 self.counter = 0
1938
1939             def __iter__(self):
1940                 return self
1941
1942             def __next__(self):
1943                 if self.counter >= len(self.tokens):
1944                     raise StopIteration()
1945                 value = self.tokens[self.counter]
1946                 self.counter += 1
1947                 return value
1948
1949             next = __next__
1950
1951             def restore_last_token(self):
1952                 self.counter -= 1
1953
1954         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1955         return _build_selector_function(parsed_selector)
1956
1957     def _calc_headers(self, info_dict):
1958         res = std_headers.copy()
1959
1960         add_headers = info_dict.get('http_headers')
1961         if add_headers:
1962             res.update(add_headers)
1963
1964         cookies = self._calc_cookies(info_dict)
1965         if cookies:
1966             res['Cookie'] = cookies
1967
1968         if 'X-Forwarded-For' not in res:
1969             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1970             if x_forwarded_for_ip:
1971                 res['X-Forwarded-For'] = x_forwarded_for_ip
1972
1973         return res
1974
1975     def _calc_cookies(self, info_dict):
1976         pr = sanitized_Request(info_dict['url'])
1977         self.cookiejar.add_cookie_header(pr)
1978         return pr.get_header('Cookie')
1979
1980     def _sanitize_thumbnails(self, info_dict):
1981         thumbnails = info_dict.get('thumbnails')
1982         if thumbnails is None:
1983             thumbnail = info_dict.get('thumbnail')
1984             if thumbnail:
1985                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1986         if thumbnails:
1987             thumbnails.sort(key=lambda t: (
1988                 t.get('preference') if t.get('preference') is not None else -1,
1989                 t.get('width') if t.get('width') is not None else -1,
1990                 t.get('height') if t.get('height') is not None else -1,
1991                 t.get('id') if t.get('id') is not None else '',
1992                 t.get('url')))
1993
1994             def thumbnail_tester():
1995                 if self.params.get('check_formats'):
1996                     test_all = True
1997                     to_screen = lambda msg: self.to_screen(f'[info] {msg}')
1998                 else:
1999                     test_all = False
2000                     to_screen = self.write_debug
2001
2002                 def test_thumbnail(t):
2003                     if not test_all and not t.get('_test_url'):
2004                         return True
2005                     to_screen('Testing thumbnail %s' % t['id'])
2006                     try:
2007                         self.urlopen(HEADRequest(t['url']))
2008                     except network_exceptions as err:
2009                         to_screen('Unable to connect to thumbnail %s URL "%s" - %s. Skipping...' % (
2010                             t['id'], t['url'], error_to_compat_str(err)))
2011                         return False
2012                     return True
2013
2014                 return test_thumbnail
2015
2016             for i, t in enumerate(thumbnails):
2017                 if t.get('id') is None:
2018                     t['id'] = '%d' % i
2019                 if t.get('width') and t.get('height'):
2020                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
2021                 t['url'] = sanitize_url(t['url'])
2022
2023             if self.params.get('check_formats') is not False:
2024                 info_dict['thumbnails'] = LazyList(filter(thumbnail_tester(), thumbnails[::-1])).reverse()
2025             else:
2026                 info_dict['thumbnails'] = thumbnails
2027
2028     def process_video_result(self, info_dict, download=True):
2029         assert info_dict.get('_type', 'video') == 'video'
2030
2031         if 'id' not in info_dict:
2032             raise ExtractorError('Missing "id" field in extractor result')
2033         if 'title' not in info_dict:
2034             raise ExtractorError('Missing "title" field in extractor result')
2035
2036         def report_force_conversion(field, field_not, conversion):
2037             self.report_warning(
2038                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2039                 % (field, field_not, conversion))
2040
2041         def sanitize_string_field(info, string_field):
2042             field = info.get(string_field)
2043             if field is None or isinstance(field, compat_str):
2044                 return
2045             report_force_conversion(string_field, 'a string', 'string')
2046             info[string_field] = compat_str(field)
2047
2048         def sanitize_numeric_fields(info):
2049             for numeric_field in self._NUMERIC_FIELDS:
2050                 field = info.get(numeric_field)
2051                 if field is None or isinstance(field, compat_numeric_types):
2052                     continue
2053                 report_force_conversion(numeric_field, 'numeric', 'int')
2054                 info[numeric_field] = int_or_none(field)
2055
2056         sanitize_string_field(info_dict, 'id')
2057         sanitize_numeric_fields(info_dict)
2058
2059         if 'playlist' not in info_dict:
2060             # It isn't part of a playlist
2061             info_dict['playlist'] = None
2062             info_dict['playlist_index'] = None
2063
2064         self._sanitize_thumbnails(info_dict)
2065
2066         thumbnail = info_dict.get('thumbnail')
2067         thumbnails = info_dict.get('thumbnails')
2068         if thumbnail:
2069             info_dict['thumbnail'] = sanitize_url(thumbnail)
2070         elif thumbnails:
2071             info_dict['thumbnail'] = thumbnails[-1]['url']
2072
2073         if info_dict.get('display_id') is None and 'id' in info_dict:
2074             info_dict['display_id'] = info_dict['id']
2075
2076         for ts_key, date_key in (
2077                 ('timestamp', 'upload_date'),
2078                 ('release_timestamp', 'release_date'),
2079         ):
2080             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2081                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2082                 # see http://bugs.python.org/issue1646728)
2083                 try:
2084                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2085                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2086                 except (ValueError, OverflowError, OSError):
2087                     pass
2088
2089         live_keys = ('is_live', 'was_live')
2090         live_status = info_dict.get('live_status')
2091         if live_status is None:
2092             for key in live_keys:
2093                 if info_dict.get(key) is False:
2094                     continue
2095                 if info_dict.get(key):
2096                     live_status = key
2097                 break
2098             if all(info_dict.get(key) is False for key in live_keys):
2099                 live_status = 'not_live'
2100         if live_status:
2101             info_dict['live_status'] = live_status
2102             for key in live_keys:
2103                 if info_dict.get(key) is None:
2104                     info_dict[key] = (live_status == key)
2105
2106         # Auto generate title fields corresponding to the *_number fields when missing
2107         # in order to always have clean titles. This is very common for TV series.
2108         for field in ('chapter', 'season', 'episode'):
2109             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2110                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2111
2112         for cc_kind in ('subtitles', 'automatic_captions'):
2113             cc = info_dict.get(cc_kind)
2114             if cc:
2115                 for _, subtitle in cc.items():
2116                     for subtitle_format in subtitle:
2117                         if subtitle_format.get('url'):
2118                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2119                         if subtitle_format.get('ext') is None:
2120                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2121
2122         automatic_captions = info_dict.get('automatic_captions')
2123         subtitles = info_dict.get('subtitles')
2124
2125         info_dict['requested_subtitles'] = self.process_subtitles(
2126             info_dict['id'], subtitles, automatic_captions)
2127
2128         # We now pick which formats have to be downloaded
2129         if info_dict.get('formats') is None:
2130             # There's only one format available
2131             formats = [info_dict]
2132         else:
2133             formats = info_dict['formats']
2134
2135         if not formats:
2136             if not self.params.get('ignore_no_formats_error'):
2137                 raise ExtractorError('No video formats found!')
2138             else:
2139                 self.report_warning('No video formats found!')
2140
2141         def is_wellformed(f):
2142             url = f.get('url')
2143             if not url:
2144                 self.report_warning(
2145                     '"url" field is missing or empty - skipping format, '
2146                     'there is an error in extractor')
2147                 return False
2148             if isinstance(url, bytes):
2149                 sanitize_string_field(f, 'url')
2150             return True
2151
2152         # Filter out malformed formats for better extraction robustness
2153         formats = list(filter(is_wellformed, formats))
2154
2155         formats_dict = {}
2156
2157         # We check that all the formats have the format and format_id fields
2158         for i, format in enumerate(formats):
2159             sanitize_string_field(format, 'format_id')
2160             sanitize_numeric_fields(format)
2161             format['url'] = sanitize_url(format['url'])
2162             if not format.get('format_id'):
2163                 format['format_id'] = compat_str(i)
2164             else:
2165                 # Sanitize format_id from characters used in format selector expression
2166                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2167             format_id = format['format_id']
2168             if format_id not in formats_dict:
2169                 formats_dict[format_id] = []
2170             formats_dict[format_id].append(format)
2171
2172         # Make sure all formats have unique format_id
2173         for format_id, ambiguous_formats in formats_dict.items():
2174             if len(ambiguous_formats) > 1:
2175                 for i, format in enumerate(ambiguous_formats):
2176                     format['format_id'] = '%s-%d' % (format_id, i)
2177
2178         for i, format in enumerate(formats):
2179             if format.get('format') is None:
2180                 format['format'] = '{id} - {res}{note}'.format(
2181                     id=format['format_id'],
2182                     res=self.format_resolution(format),
2183                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
2184                 )
2185             # Automatically determine file extension if missing
2186             if format.get('ext') is None:
2187                 format['ext'] = determine_ext(format['url']).lower()
2188             # Automatically determine protocol if missing (useful for format
2189             # selection purposes)
2190             if format.get('protocol') is None:
2191                 format['protocol'] = determine_protocol(format)
2192             # Add HTTP headers, so that external programs can use them from the
2193             # json output
2194             full_format_info = info_dict.copy()
2195             full_format_info.update(format)
2196             format['http_headers'] = self._calc_headers(full_format_info)
2197         # Remove private housekeeping stuff
2198         if '__x_forwarded_for_ip' in info_dict:
2199             del info_dict['__x_forwarded_for_ip']
2200
2201         # TODO Central sorting goes here
2202
2203         if formats and formats[0] is not info_dict:
2204             # only set the 'formats' fields if the original info_dict list them
2205             # otherwise we end up with a circular reference, the first (and unique)
2206             # element in the 'formats' field in info_dict is info_dict itself,
2207             # which can't be exported to json
2208             info_dict['formats'] = formats
2209
2210         info_dict, _ = self.pre_process(info_dict)
2211
2212         list_only = self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles')
2213         if list_only:
2214             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2215             if self.params.get('list_thumbnails'):
2216                 self.list_thumbnails(info_dict)
2217             if self.params.get('listformats'):
2218                 if not info_dict.get('formats'):
2219                     raise ExtractorError('No video formats found', expected=True)
2220                 self.list_formats(info_dict)
2221             if self.params.get('listsubtitles'):
2222                 if 'automatic_captions' in info_dict:
2223                     self.list_subtitles(
2224                         info_dict['id'], automatic_captions, 'automatic captions')
2225                 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2226             return
2227
2228         format_selector = self.format_selector
2229         if format_selector is None:
2230             req_format = self._default_format_spec(info_dict, download=download)
2231             self.write_debug('Default format spec: %s' % req_format)
2232             format_selector = self.build_format_selector(req_format)
2233
2234         # While in format selection we may need to have an access to the original
2235         # format set in order to calculate some metrics or do some processing.
2236         # For now we need to be able to guess whether original formats provided
2237         # by extractor are incomplete or not (i.e. whether extractor provides only
2238         # video-only or audio-only formats) for proper formats selection for
2239         # extractors with such incomplete formats (see
2240         # https://github.com/ytdl-org/youtube-dl/pull/5556).
2241         # Since formats may be filtered during format selection and may not match
2242         # the original formats the results may be incorrect. Thus original formats
2243         # or pre-calculated metrics should be passed to format selection routines
2244         # as well.
2245         # We will pass a context object containing all necessary additional data
2246         # instead of just formats.
2247         # This fixes incorrect format selection issue (see
2248         # https://github.com/ytdl-org/youtube-dl/issues/10083).
2249         incomplete_formats = (
2250             # All formats are video-only or
2251             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2252             # all formats are audio-only
2253             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2254
2255         ctx = {
2256             'formats': formats,
2257             'incomplete_formats': incomplete_formats,
2258         }
2259
2260         formats_to_download = list(format_selector(ctx))
2261         if not formats_to_download:
2262             if not self.params.get('ignore_no_formats_error'):
2263                 raise ExtractorError('Requested format is not available', expected=True)
2264             else:
2265                 self.report_warning('Requested format is not available')
2266                 # Process what we can, even without any available formats.
2267                 self.process_info(dict(info_dict))
2268         elif download:
2269             self.to_screen(
2270                 '[info] %s: Downloading %d format(s): %s' % (
2271                     info_dict['id'], len(formats_to_download),
2272                     ", ".join([f['format_id'] for f in formats_to_download])))
2273             for fmt in formats_to_download:
2274                 new_info = dict(info_dict)
2275                 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2276                 new_info['__original_infodict'] = info_dict
2277                 new_info.update(fmt)
2278                 self.process_info(new_info)
2279         # We update the info dict with the best quality format (backwards compatibility)
2280         if formats_to_download:
2281             info_dict.update(formats_to_download[-1])
2282         return info_dict
2283
2284     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2285         """Select the requested subtitles and their format"""
2286         available_subs = {}
2287         if normal_subtitles and self.params.get('writesubtitles'):
2288             available_subs.update(normal_subtitles)
2289         if automatic_captions and self.params.get('writeautomaticsub'):
2290             for lang, cap_info in automatic_captions.items():
2291                 if lang not in available_subs:
2292                     available_subs[lang] = cap_info
2293
2294         if (not self.params.get('writesubtitles') and not
2295                 self.params.get('writeautomaticsub') or not
2296                 available_subs):
2297             return None
2298
2299         all_sub_langs = available_subs.keys()
2300         if self.params.get('allsubtitles', False):
2301             requested_langs = all_sub_langs
2302         elif self.params.get('subtitleslangs', False):
2303             requested_langs = set()
2304             for lang in self.params.get('subtitleslangs'):
2305                 if lang == 'all':
2306                     requested_langs.update(all_sub_langs)
2307                     continue
2308                 discard = lang[0] == '-'
2309                 if discard:
2310                     lang = lang[1:]
2311                 current_langs = filter(re.compile(lang + '$').match, all_sub_langs)
2312                 if discard:
2313                     for lang in current_langs:
2314                         requested_langs.discard(lang)
2315                 else:
2316                     requested_langs.update(current_langs)
2317         elif 'en' in available_subs:
2318             requested_langs = ['en']
2319         else:
2320             requested_langs = [list(all_sub_langs)[0]]
2321         self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2322
2323         formats_query = self.params.get('subtitlesformat', 'best')
2324         formats_preference = formats_query.split('/') if formats_query else []
2325         subs = {}
2326         for lang in requested_langs:
2327             formats = available_subs.get(lang)
2328             if formats is None:
2329                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2330                 continue
2331             for ext in formats_preference:
2332                 if ext == 'best':
2333                     f = formats[-1]
2334                     break
2335                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2336                 if matches:
2337                     f = matches[-1]
2338                     break
2339             else:
2340                 f = formats[-1]
2341                 self.report_warning(
2342                     'No subtitle format found matching "%s" for language %s, '
2343                     'using %s' % (formats_query, lang, f['ext']))
2344             subs[lang] = f
2345         return subs
2346
2347     def __forced_printings(self, info_dict, filename, incomplete):
2348         def print_mandatory(field, actual_field=None):
2349             if actual_field is None:
2350                 actual_field = field
2351             if (self.params.get('force%s' % field, False)
2352                     and (not incomplete or info_dict.get(actual_field) is not None)):
2353                 self.to_stdout(info_dict[actual_field])
2354
2355         def print_optional(field):
2356             if (self.params.get('force%s' % field, False)
2357                     and info_dict.get(field) is not None):
2358                 self.to_stdout(info_dict[field])
2359
2360         info_dict = info_dict.copy()
2361         if filename is not None:
2362             info_dict['filename'] = filename
2363         if info_dict.get('requested_formats') is not None:
2364             # For RTMP URLs, also include the playpath
2365             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2366         elif 'url' in info_dict:
2367             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2368
2369         for tmpl in self.params.get('forceprint', []):
2370             if re.match(r'\w+$', tmpl):
2371                 tmpl = '%({})s'.format(tmpl)
2372             tmpl, info_copy = self.prepare_outtmpl(tmpl, info_dict)
2373             self.to_stdout(self.escape_outtmpl(tmpl) % info_copy)
2374
2375         print_mandatory('title')
2376         print_mandatory('id')
2377         print_mandatory('url', 'urls')
2378         print_optional('thumbnail')
2379         print_optional('description')
2380         print_optional('filename')
2381         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
2382             self.to_stdout(formatSeconds(info_dict['duration']))
2383         print_mandatory('format')
2384
2385         if self.params.get('forcejson', False):
2386             self.post_extract(info_dict)
2387             self.to_stdout(json.dumps(info_dict, default=repr))
2388
2389     def dl(self, name, info, subtitle=False, test=False):
2390
2391         if test:
2392             verbose = self.params.get('verbose')
2393             params = {
2394                 'test': True,
2395                 'quiet': not verbose,
2396                 'verbose': verbose,
2397                 'noprogress': not verbose,
2398                 'nopart': True,
2399                 'skip_unavailable_fragments': False,
2400                 'keep_fragments': False,
2401                 'overwrites': True,
2402                 '_no_ytdl_file': True,
2403             }
2404         else:
2405             params = self.params
2406         fd = get_suitable_downloader(info, params)(self, params)
2407         if not test:
2408             for ph in self._progress_hooks:
2409                 fd.add_progress_hook(ph)
2410             urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2411             self.write_debug('Invoking downloader on "%s"' % urls)
2412         new_info = dict(info)
2413         if new_info.get('http_headers') is None:
2414             new_info['http_headers'] = self._calc_headers(new_info)
2415         return fd.download(name, new_info, subtitle)
2416
2417     def process_info(self, info_dict):
2418         """Process a single resolved IE result."""
2419
2420         assert info_dict.get('_type', 'video') == 'video'
2421
2422         info_dict.setdefault('__postprocessors', [])
2423
2424         max_downloads = self.params.get('max_downloads')
2425         if max_downloads is not None:
2426             if self._num_downloads >= int(max_downloads):
2427                 raise MaxDownloadsReached()
2428
2429         # TODO: backward compatibility, to be removed
2430         info_dict['fulltitle'] = info_dict['title']
2431
2432         if 'format' not in info_dict and 'ext' in info_dict:
2433             info_dict['format'] = info_dict['ext']
2434
2435         if self._match_entry(info_dict) is not None:
2436             return
2437
2438         self.post_extract(info_dict)
2439         self._num_downloads += 1
2440
2441         # info_dict['_filename'] needs to be set for backward compatibility
2442         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2443         temp_filename = self.prepare_filename(info_dict, 'temp')
2444         files_to_move = {}
2445
2446         # Forced printings
2447         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2448
2449         if self.params.get('simulate', False):
2450             if self.params.get('force_write_download_archive', False):
2451                 self.record_download_archive(info_dict)
2452
2453             # Do nothing else if in simulate mode
2454             return
2455
2456         if full_filename is None:
2457             return
2458
2459         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2460             return
2461         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2462             return
2463
2464         if self.params.get('writedescription', False):
2465             descfn = self.prepare_filename(info_dict, 'description')
2466             if not self._ensure_dir_exists(encodeFilename(descfn)):
2467                 return
2468             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
2469                 self.to_screen('[info] Video description is already present')
2470             elif info_dict.get('description') is None:
2471                 self.report_warning('There\'s no description to write.')
2472             else:
2473                 try:
2474                     self.to_screen('[info] Writing video description to: ' + descfn)
2475                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
2476                         descfile.write(info_dict['description'])
2477                 except (OSError, IOError):
2478                     self.report_error('Cannot write description file ' + descfn)
2479                     return
2480
2481         if self.params.get('writeannotations', False):
2482             annofn = self.prepare_filename(info_dict, 'annotation')
2483             if not self._ensure_dir_exists(encodeFilename(annofn)):
2484                 return
2485             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2486                 self.to_screen('[info] Video annotations are already present')
2487             elif not info_dict.get('annotations'):
2488                 self.report_warning('There are no annotations to write.')
2489             else:
2490                 try:
2491                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2492                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2493                         annofile.write(info_dict['annotations'])
2494                 except (KeyError, TypeError):
2495                     self.report_warning('There are no annotations to write.')
2496                 except (OSError, IOError):
2497                     self.report_error('Cannot write annotations file: ' + annofn)
2498                     return
2499
2500         subtitles_are_requested = any([self.params.get('writesubtitles', False),
2501                                        self.params.get('writeautomaticsub')])
2502
2503         if subtitles_are_requested and info_dict.get('requested_subtitles'):
2504             # subtitles download errors are already managed as troubles in relevant IE
2505             # that way it will silently go on when used with unsupporting IE
2506             subtitles = info_dict['requested_subtitles']
2507             # ie = self.get_info_extractor(info_dict['extractor_key'])
2508             for sub_lang, sub_info in subtitles.items():
2509                 sub_format = sub_info['ext']
2510                 sub_filename = subtitles_filename(temp_filename, sub_lang, sub_format, info_dict.get('ext'))
2511                 sub_filename_final = subtitles_filename(
2512                     self.prepare_filename(info_dict, 'subtitle'), sub_lang, sub_format, info_dict.get('ext'))
2513                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
2514                     self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
2515                     sub_info['filepath'] = sub_filename
2516                     files_to_move[sub_filename] = sub_filename_final
2517                 else:
2518                     self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
2519                     if sub_info.get('data') is not None:
2520                         try:
2521                             # Use newline='' to prevent conversion of newline characters
2522                             # See https://github.com/ytdl-org/youtube-dl/issues/10268
2523                             with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
2524                                 subfile.write(sub_info['data'])
2525                             sub_info['filepath'] = sub_filename
2526                             files_to_move[sub_filename] = sub_filename_final
2527                         except (OSError, IOError):
2528                             self.report_error('Cannot write subtitles file ' + sub_filename)
2529                             return
2530                     else:
2531                         try:
2532                             self.dl(sub_filename, sub_info.copy(), subtitle=True)
2533                             sub_info['filepath'] = sub_filename
2534                             files_to_move[sub_filename] = sub_filename_final
2535                         except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
2536                             self.report_warning('Unable to download subtitle for "%s": %s' %
2537                                                 (sub_lang, error_to_compat_str(err)))
2538                             continue
2539
2540         if self.params.get('writeinfojson', False):
2541             infofn = self.prepare_filename(info_dict, 'infojson')
2542             if not self._ensure_dir_exists(encodeFilename(infofn)):
2543                 return
2544             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
2545                 self.to_screen('[info] Video metadata is already present')
2546             else:
2547                 self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
2548                 try:
2549                     write_json_file(self.filter_requested_info(info_dict, self.params.get('clean_infojson', True)), infofn)
2550                 except (OSError, IOError):
2551                     self.report_error('Cannot write video metadata to JSON file ' + infofn)
2552                     return
2553             info_dict['__infojson_filename'] = infofn
2554
2555         for thumb_ext in self._write_thumbnails(info_dict, temp_filename):
2556             thumb_filename_temp = replace_extension(temp_filename, thumb_ext, info_dict.get('ext'))
2557             thumb_filename = replace_extension(
2558                 self.prepare_filename(info_dict, 'thumbnail'), thumb_ext, info_dict.get('ext'))
2559             files_to_move[thumb_filename_temp] = thumb_filename
2560
2561         # Write internet shortcut files
2562         url_link = webloc_link = desktop_link = False
2563         if self.params.get('writelink', False):
2564             if sys.platform == "darwin":  # macOS.
2565                 webloc_link = True
2566             elif sys.platform.startswith("linux"):
2567                 desktop_link = True
2568             else:  # if sys.platform in ['win32', 'cygwin']:
2569                 url_link = True
2570         if self.params.get('writeurllink', False):
2571             url_link = True
2572         if self.params.get('writewebloclink', False):
2573             webloc_link = True
2574         if self.params.get('writedesktoplink', False):
2575             desktop_link = True
2576
2577         if url_link or webloc_link or desktop_link:
2578             if 'webpage_url' not in info_dict:
2579                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2580                 return
2581             ascii_url = iri_to_uri(info_dict['webpage_url'])
2582
2583         def _write_link_file(extension, template, newline, embed_filename):
2584             linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
2585             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2586                 self.to_screen('[info] Internet shortcut is already present')
2587             else:
2588                 try:
2589                     self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2590                     with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2591                         template_vars = {'url': ascii_url}
2592                         if embed_filename:
2593                             template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2594                         linkfile.write(template % template_vars)
2595                 except (OSError, IOError):
2596                     self.report_error('Cannot write internet shortcut ' + linkfn)
2597                     return False
2598             return True
2599
2600         if url_link:
2601             if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2602                 return
2603         if webloc_link:
2604             if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2605                 return
2606         if desktop_link:
2607             if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2608                 return
2609
2610         try:
2611             info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2612         except PostProcessingError as err:
2613             self.report_error('Preprocessing: %s' % str(err))
2614             return
2615
2616         must_record_download_archive = False
2617         if self.params.get('skip_download', False):
2618             info_dict['filepath'] = temp_filename
2619             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2620             info_dict['__files_to_move'] = files_to_move
2621             info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2622         else:
2623             # Download
2624             try:
2625
2626                 def existing_file(*filepaths):
2627                     ext = info_dict.get('ext')
2628                     final_ext = self.params.get('final_ext', ext)
2629                     existing_files = []
2630                     for file in orderedSet(filepaths):
2631                         if final_ext != ext:
2632                             converted = replace_extension(file, final_ext, ext)
2633                             if os.path.exists(encodeFilename(converted)):
2634                                 existing_files.append(converted)
2635                         if os.path.exists(encodeFilename(file)):
2636                             existing_files.append(file)
2637
2638                     if not existing_files or self.params.get('overwrites', False):
2639                         for file in orderedSet(existing_files):
2640                             self.report_file_delete(file)
2641                             os.remove(encodeFilename(file))
2642                         return None
2643
2644                     self.report_file_already_downloaded(existing_files[0])
2645                     info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2646                     return existing_files[0]
2647
2648                 success = True
2649                 if info_dict.get('requested_formats') is not None:
2650
2651                     def compatible_formats(formats):
2652                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2653                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2654                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2655                         if len(video_formats) > 2 or len(audio_formats) > 2:
2656                             return False
2657
2658                         # Check extension
2659                         exts = set(format.get('ext') for format in formats)
2660                         COMPATIBLE_EXTS = (
2661                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2662                             set(('webm',)),
2663                         )
2664                         for ext_sets in COMPATIBLE_EXTS:
2665                             if ext_sets.issuperset(exts):
2666                                 return True
2667                         # TODO: Check acodec/vcodec
2668                         return False
2669
2670                     requested_formats = info_dict['requested_formats']
2671                     old_ext = info_dict['ext']
2672                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
2673                         info_dict['ext'] = 'mkv'
2674                         self.report_warning(
2675                             'Requested formats are incompatible for merge and will be merged into mkv.')
2676
2677                     def correct_ext(filename):
2678                         filename_real_ext = os.path.splitext(filename)[1][1:]
2679                         filename_wo_ext = (
2680                             os.path.splitext(filename)[0]
2681                             if filename_real_ext == old_ext
2682                             else filename)
2683                         return '%s.%s' % (filename_wo_ext, info_dict['ext'])
2684
2685                     # Ensure filename always has a correct extension for successful merge
2686                     full_filename = correct_ext(full_filename)
2687                     temp_filename = correct_ext(temp_filename)
2688                     dl_filename = existing_file(full_filename, temp_filename)
2689                     info_dict['__real_download'] = False
2690
2691                     _protocols = set(determine_protocol(f) for f in requested_formats)
2692                     if len(_protocols) == 1:
2693                         info_dict['protocol'] = _protocols.pop()
2694                     directly_mergable = (
2695                         'no-direct-merge' not in self.params.get('compat_opts', [])
2696                         and info_dict.get('protocol') is not None  # All requested formats have same protocol
2697                         and not self.params.get('allow_unplayable_formats')
2698                         and get_suitable_downloader(info_dict, self.params).__name__ == 'FFmpegFD')
2699                     if directly_mergable:
2700                         info_dict['url'] = requested_formats[0]['url']
2701                         # Treat it as a single download
2702                         dl_filename = existing_file(full_filename, temp_filename)
2703                         if dl_filename is None:
2704                             success, real_download = self.dl(temp_filename, info_dict)
2705                             info_dict['__real_download'] = real_download
2706                     else:
2707                         downloaded = []
2708                         merger = FFmpegMergerPP(self)
2709                         if self.params.get('allow_unplayable_formats'):
2710                             self.report_warning(
2711                                 'You have requested merging of multiple formats '
2712                                 'while also allowing unplayable formats to be downloaded. '
2713                                 'The formats won\'t be merged to prevent data corruption.')
2714                         elif not merger.available:
2715                             self.report_warning(
2716                                 'You have requested merging of multiple formats but ffmpeg is not installed. '
2717                                 'The formats won\'t be merged.')
2718
2719                         if dl_filename is None:
2720                             for f in requested_formats:
2721                                 new_info = dict(info_dict)
2722                                 del new_info['requested_formats']
2723                                 new_info.update(f)
2724                                 fname = prepend_extension(
2725                                     self.prepare_filename(new_info, 'temp'),
2726                                     'f%s' % f['format_id'], new_info['ext'])
2727                                 if not self._ensure_dir_exists(fname):
2728                                     return
2729                                 downloaded.append(fname)
2730                                 partial_success, real_download = self.dl(fname, new_info)
2731                                 info_dict['__real_download'] = info_dict['__real_download'] or real_download
2732                                 success = success and partial_success
2733                             if merger.available and not self.params.get('allow_unplayable_formats'):
2734                                 info_dict['__postprocessors'].append(merger)
2735                                 info_dict['__files_to_merge'] = downloaded
2736                                 # Even if there were no downloads, it is being merged only now
2737                                 info_dict['__real_download'] = True
2738                             else:
2739                                 for file in downloaded:
2740                                     files_to_move[file] = None
2741                 else:
2742                     # Just a single file
2743                     dl_filename = existing_file(full_filename, temp_filename)
2744                     if dl_filename is None:
2745                         success, real_download = self.dl(temp_filename, info_dict)
2746                         info_dict['__real_download'] = real_download
2747
2748                 dl_filename = dl_filename or temp_filename
2749                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2750
2751             except network_exceptions as err:
2752                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2753                 return
2754             except (OSError, IOError) as err:
2755                 raise UnavailableVideoError(err)
2756             except (ContentTooShortError, ) as err:
2757                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2758                 return
2759
2760             if success and full_filename != '-':
2761
2762                 def fixup():
2763                     do_fixup = True
2764                     fixup_policy = self.params.get('fixup')
2765                     vid = info_dict['id']
2766
2767                     if fixup_policy in ('ignore', 'never'):
2768                         return
2769                     elif fixup_policy == 'warn':
2770                         do_fixup = False
2771                     elif fixup_policy != 'force':
2772                         assert fixup_policy in ('detect_or_warn', None)
2773                         if not info_dict.get('__real_download'):
2774                             do_fixup = False
2775
2776                     def ffmpeg_fixup(cndn, msg, cls):
2777                         if not cndn:
2778                             return
2779                         if not do_fixup:
2780                             self.report_warning(f'{vid}: {msg}')
2781                             return
2782                         pp = cls(self)
2783                         if pp.available:
2784                             info_dict['__postprocessors'].append(pp)
2785                         else:
2786                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
2787
2788                     stretched_ratio = info_dict.get('stretched_ratio')
2789                     ffmpeg_fixup(
2790                         stretched_ratio not in (1, None),
2791                         f'Non-uniform pixel ratio {stretched_ratio}',
2792                         FFmpegFixupStretchedPP)
2793
2794                     ffmpeg_fixup(
2795                         (info_dict.get('requested_formats') is None
2796                          and info_dict.get('container') == 'm4a_dash'
2797                          and info_dict.get('ext') == 'm4a'),
2798                         'writing DASH m4a. Only some players support this container',
2799                         FFmpegFixupM4aPP)
2800
2801                     downloader = (get_suitable_downloader(info_dict, self.params).__name__
2802                                   if 'protocol' in info_dict else None)
2803                     ffmpeg_fixup(downloader == 'HlsFD', 'malformed AAC bitstream detected', FFmpegFixupM3u8PP)
2804                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP)
2805                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed duration detected', FFmpegFixupDurationPP)
2806
2807                 fixup()
2808                 try:
2809                     info_dict = self.post_process(dl_filename, info_dict, files_to_move)
2810                 except PostProcessingError as err:
2811                     self.report_error('Postprocessing: %s' % str(err))
2812                     return
2813                 try:
2814                     for ph in self._post_hooks:
2815                         ph(info_dict['filepath'])
2816                 except Exception as err:
2817                     self.report_error('post hooks: %s' % str(err))
2818                     return
2819                 must_record_download_archive = True
2820
2821         if must_record_download_archive or self.params.get('force_write_download_archive', False):
2822             self.record_download_archive(info_dict)
2823         max_downloads = self.params.get('max_downloads')
2824         if max_downloads is not None and self._num_downloads >= int(max_downloads):
2825             raise MaxDownloadsReached()
2826
2827     def download(self, url_list):
2828         """Download a given list of URLs."""
2829         outtmpl = self.outtmpl_dict['default']
2830         if (len(url_list) > 1
2831                 and outtmpl != '-'
2832                 and '%' not in outtmpl
2833                 and self.params.get('max_downloads') != 1):
2834             raise SameFileError(outtmpl)
2835
2836         for url in url_list:
2837             try:
2838                 # It also downloads the videos
2839                 res = self.extract_info(
2840                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2841             except UnavailableVideoError:
2842                 self.report_error('unable to download video')
2843             except MaxDownloadsReached:
2844                 self.to_screen('[info] Maximum number of downloaded files reached')
2845                 raise
2846             except ExistingVideoReached:
2847                 self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
2848                 raise
2849             except RejectedVideoReached:
2850                 self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
2851                 raise
2852             else:
2853                 if self.params.get('dump_single_json', False):
2854                     self.post_extract(res)
2855                     self.to_stdout(json.dumps(res, default=repr))
2856
2857         return self._download_retcode
2858
2859     def download_with_info_file(self, info_filename):
2860         with contextlib.closing(fileinput.FileInput(
2861                 [info_filename], mode='r',
2862                 openhook=fileinput.hook_encoded('utf-8'))) as f:
2863             # FileInput doesn't have a read method, we can't call json.load
2864             info = self.filter_requested_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
2865         try:
2866             self.process_ie_result(info, download=True)
2867         except (DownloadError, EntryNotInPlaylist, ThrottledDownload):
2868             webpage_url = info.get('webpage_url')
2869             if webpage_url is not None:
2870                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2871                 return self.download([webpage_url])
2872             else:
2873                 raise
2874         return self._download_retcode
2875
2876     @staticmethod
2877     def filter_requested_info(info_dict, actually_filter=True):
2878         remove_keys = ['__original_infodict']  # Always remove this since this may contain a copy of the entire dict
2879         keep_keys = ['_type'],  # Always keep this to facilitate load-info-json
2880         if actually_filter:
2881             remove_keys += ('requested_formats', 'requested_subtitles', 'requested_entries', 'filepath', 'entries', 'original_url')
2882             empty_values = (None, {}, [], set(), tuple())
2883             reject = lambda k, v: k not in keep_keys and (
2884                 k.startswith('_') or k in remove_keys or v in empty_values)
2885         else:
2886             info_dict['epoch'] = int(time.time())
2887             reject = lambda k, v: k in remove_keys
2888         filter_fn = lambda obj: (
2889             list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))
2890             else obj if not isinstance(obj, dict)
2891             else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
2892         return filter_fn(info_dict)
2893
2894     def run_pp(self, pp, infodict):
2895         files_to_delete = []
2896         if '__files_to_move' not in infodict:
2897             infodict['__files_to_move'] = {}
2898         files_to_delete, infodict = pp.run(infodict)
2899         if not files_to_delete:
2900             return infodict
2901
2902         if self.params.get('keepvideo', False):
2903             for f in files_to_delete:
2904                 infodict['__files_to_move'].setdefault(f, '')
2905         else:
2906             for old_filename in set(files_to_delete):
2907                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2908                 try:
2909                     os.remove(encodeFilename(old_filename))
2910                 except (IOError, OSError):
2911                     self.report_warning('Unable to remove downloaded original file')
2912                 if old_filename in infodict['__files_to_move']:
2913                     del infodict['__files_to_move'][old_filename]
2914         return infodict
2915
2916     @staticmethod
2917     def post_extract(info_dict):
2918         def actual_post_extract(info_dict):
2919             if info_dict.get('_type') in ('playlist', 'multi_video'):
2920                 for video_dict in info_dict.get('entries', {}):
2921                     actual_post_extract(video_dict or {})
2922                 return
2923
2924             post_extractor = info_dict.get('__post_extractor') or (lambda: {})
2925             extra = post_extractor().items()
2926             info_dict.update(extra)
2927             info_dict.pop('__post_extractor', None)
2928
2929             original_infodict = info_dict.get('__original_infodict') or {}
2930             original_infodict.update(extra)
2931             original_infodict.pop('__post_extractor', None)
2932
2933         actual_post_extract(info_dict or {})
2934
2935     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
2936         info = dict(ie_info)
2937         info['__files_to_move'] = files_to_move or {}
2938         for pp in self._pps[key]:
2939             info = self.run_pp(pp, info)
2940         return info, info.pop('__files_to_move', None)
2941
2942     def post_process(self, filename, ie_info, files_to_move=None):
2943         """Run all the postprocessors on the given file."""
2944         info = dict(ie_info)
2945         info['filepath'] = filename
2946         info['__files_to_move'] = files_to_move or {}
2947
2948         for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
2949             info = self.run_pp(pp, info)
2950         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
2951         del info['__files_to_move']
2952         for pp in self._pps['after_move']:
2953             info = self.run_pp(pp, info)
2954         return info
2955
2956     def _make_archive_id(self, info_dict):
2957         video_id = info_dict.get('id')
2958         if not video_id:
2959             return
2960         # Future-proof against any change in case
2961         # and backwards compatibility with prior versions
2962         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
2963         if extractor is None:
2964             url = str_or_none(info_dict.get('url'))
2965             if not url:
2966                 return
2967             # Try to find matching extractor for the URL and take its ie_key
2968             for ie in self._ies:
2969                 if ie.suitable(url):
2970                     extractor = ie.ie_key()
2971                     break
2972             else:
2973                 return
2974         return '%s %s' % (extractor.lower(), video_id)
2975
2976     def in_download_archive(self, info_dict):
2977         fn = self.params.get('download_archive')
2978         if fn is None:
2979             return False
2980
2981         vid_id = self._make_archive_id(info_dict)
2982         if not vid_id:
2983             return False  # Incomplete video information
2984
2985         return vid_id in self.archive
2986
2987     def record_download_archive(self, info_dict):
2988         fn = self.params.get('download_archive')
2989         if fn is None:
2990             return
2991         vid_id = self._make_archive_id(info_dict)
2992         assert vid_id
2993         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
2994             archive_file.write(vid_id + '\n')
2995         self.archive.add(vid_id)
2996
2997     @staticmethod
2998     def format_resolution(format, default='unknown'):
2999         if format.get('vcodec') == 'none':
3000             if format.get('acodec') == 'none':
3001                 return 'images'
3002             return 'audio only'
3003         if format.get('resolution') is not None:
3004             return format['resolution']
3005         if format.get('width') and format.get('height'):
3006             res = '%dx%d' % (format['width'], format['height'])
3007         elif format.get('height'):
3008             res = '%sp' % format['height']
3009         elif format.get('width'):
3010             res = '%dx?' % format['width']
3011         else:
3012             res = default
3013         return res
3014
3015     def _format_note(self, fdict):
3016         res = ''
3017         if fdict.get('ext') in ['f4f', 'f4m']:
3018             res += '(unsupported) '
3019         if fdict.get('language'):
3020             if res:
3021                 res += ' '
3022             res += '[%s] ' % fdict['language']
3023         if fdict.get('format_note') is not None:
3024             res += fdict['format_note'] + ' '
3025         if fdict.get('tbr') is not None:
3026             res += '%4dk ' % fdict['tbr']
3027         if fdict.get('container') is not None:
3028             if res:
3029                 res += ', '
3030             res += '%s container' % fdict['container']
3031         if (fdict.get('vcodec') is not None
3032                 and fdict.get('vcodec') != 'none'):
3033             if res:
3034                 res += ', '
3035             res += fdict['vcodec']
3036             if fdict.get('vbr') is not None:
3037                 res += '@'
3038         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3039             res += 'video@'
3040         if fdict.get('vbr') is not None:
3041             res += '%4dk' % fdict['vbr']
3042         if fdict.get('fps') is not None:
3043             if res:
3044                 res += ', '
3045             res += '%sfps' % fdict['fps']
3046         if fdict.get('acodec') is not None:
3047             if res:
3048                 res += ', '
3049             if fdict['acodec'] == 'none':
3050                 res += 'video only'
3051             else:
3052                 res += '%-5s' % fdict['acodec']
3053         elif fdict.get('abr') is not None:
3054             if res:
3055                 res += ', '
3056             res += 'audio'
3057         if fdict.get('abr') is not None:
3058             res += '@%3dk' % fdict['abr']
3059         if fdict.get('asr') is not None:
3060             res += ' (%5dHz)' % fdict['asr']
3061         if fdict.get('filesize') is not None:
3062             if res:
3063                 res += ', '
3064             res += format_bytes(fdict['filesize'])
3065         elif fdict.get('filesize_approx') is not None:
3066             if res:
3067                 res += ', '
3068             res += '~' + format_bytes(fdict['filesize_approx'])
3069         return res
3070
3071     def list_formats(self, info_dict):
3072         formats = info_dict.get('formats', [info_dict])
3073         new_format = (
3074             'list-formats' not in self.params.get('compat_opts', [])
3075             and self.params.get('listformats_table', True) is not False)
3076         if new_format:
3077             table = [
3078                 [
3079                     format_field(f, 'format_id'),
3080                     format_field(f, 'ext'),
3081                     self.format_resolution(f),
3082                     format_field(f, 'fps', '%d'),
3083                     '|',
3084                     format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
3085                     format_field(f, 'tbr', '%4dk'),
3086                     shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
3087                     '|',
3088                     format_field(f, 'vcodec', default='unknown').replace('none', ''),
3089                     format_field(f, 'vbr', '%4dk'),
3090                     format_field(f, 'acodec', default='unknown').replace('none', ''),
3091                     format_field(f, 'abr', '%3dk'),
3092                     format_field(f, 'asr', '%5dHz'),
3093                     ', '.join(filter(None, (
3094                         'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
3095                         format_field(f, 'language', '[%s]'),
3096                         format_field(f, 'format_note'),
3097                         format_field(f, 'container', ignore=(None, f.get('ext'))),
3098                     ))),
3099                 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3100             header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', '  TBR', 'PROTO',
3101                            '|', 'VCODEC', '  VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO']
3102         else:
3103             table = [
3104                 [
3105                     format_field(f, 'format_id'),
3106                     format_field(f, 'ext'),
3107                     self.format_resolution(f),
3108                     self._format_note(f)]
3109                 for f in formats
3110                 if f.get('preference') is None or f['preference'] >= -1000]
3111             header_line = ['format code', 'extension', 'resolution', 'note']
3112
3113         self.to_screen(
3114             '[info] Available formats for %s:' % info_dict['id'])
3115         self.to_stdout(render_table(
3116             header_line, table, delim=new_format, extraGap=(0 if new_format else 1), hideEmpty=new_format))
3117
3118     def list_thumbnails(self, info_dict):
3119         thumbnails = list(info_dict.get('thumbnails'))
3120         if not thumbnails:
3121             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3122             return
3123
3124         self.to_screen(
3125             '[info] Thumbnails for %s:' % info_dict['id'])
3126         self.to_stdout(render_table(
3127             ['ID', 'width', 'height', 'URL'],
3128             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
3129
3130     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3131         if not subtitles:
3132             self.to_screen('%s has no %s' % (video_id, name))
3133             return
3134         self.to_screen(
3135             'Available %s for %s:' % (name, video_id))
3136
3137         def _row(lang, formats):
3138             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3139             if len(set(names)) == 1:
3140                 names = [] if names[0] == 'unknown' else names[:1]
3141             return [lang, ', '.join(names), ', '.join(exts)]
3142
3143         self.to_stdout(render_table(
3144             ['Language', 'Name', 'Formats'],
3145             [_row(lang, formats) for lang, formats in subtitles.items()],
3146             hideEmpty=True))
3147
3148     def urlopen(self, req):
3149         """ Start an HTTP download """
3150         if isinstance(req, compat_basestring):
3151             req = sanitized_Request(req)
3152         return self._opener.open(req, timeout=self._socket_timeout)
3153
3154     def print_debug_header(self):
3155         if not self.params.get('verbose'):
3156             return
3157
3158         if type('') is not compat_str:
3159             # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
3160             self.report_warning(
3161                 'Your Python is broken! Update to a newer and supported version')
3162
3163         stdout_encoding = getattr(
3164             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
3165         encoding_str = (
3166             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
3167                 locale.getpreferredencoding(),
3168                 sys.getfilesystemencoding(),
3169                 stdout_encoding,
3170                 self.get_encoding()))
3171         write_string(encoding_str, encoding=None)
3172
3173         source = (
3174             '(exe)' if hasattr(sys, 'frozen')
3175             else '(zip)' if isinstance(globals().get('__loader__'), zipimporter)
3176             else '(source)' if os.path.basename(sys.argv[0]) == '__main__.py'
3177             else '')
3178         self._write_string('[debug] yt-dlp version %s %s\n' % (__version__, source))
3179         if _LAZY_LOADER:
3180             self._write_string('[debug] Lazy loading extractors enabled\n')
3181         if _PLUGIN_CLASSES:
3182             self._write_string(
3183                 '[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])
3184         if self.params.get('compat_opts'):
3185             self._write_string(
3186                 '[debug] Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts')))
3187         try:
3188             sp = subprocess.Popen(
3189                 ['git', 'rev-parse', '--short', 'HEAD'],
3190                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3191                 cwd=os.path.dirname(os.path.abspath(__file__)))
3192             out, err = process_communicate_or_kill(sp)
3193             out = out.decode().strip()
3194             if re.match('[0-9a-f]+', out):
3195                 self._write_string('[debug] Git HEAD: %s\n' % out)
3196         except Exception:
3197             try:
3198                 sys.exc_clear()
3199             except Exception:
3200                 pass
3201
3202         def python_implementation():
3203             impl_name = platform.python_implementation()
3204             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3205                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3206             return impl_name
3207
3208         self._write_string('[debug] Python version %s (%s %s) - %s\n' % (
3209             platform.python_version(),
3210             python_implementation(),
3211             platform.architecture()[0],
3212             platform_name()))
3213
3214         exe_versions = FFmpegPostProcessor.get_versions(self)
3215         exe_versions['rtmpdump'] = rtmpdump_version()
3216         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3217         exe_str = ', '.join(
3218             '%s %s' % (exe, v)
3219             for exe, v in sorted(exe_versions.items())
3220             if v
3221         )
3222         if not exe_str:
3223             exe_str = 'none'
3224         self._write_string('[debug] exe versions: %s\n' % exe_str)
3225
3226         proxy_map = {}
3227         for handler in self._opener.handlers:
3228             if hasattr(handler, 'proxies'):
3229                 proxy_map.update(handler.proxies)
3230         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
3231
3232         if self.params.get('call_home', False):
3233             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3234             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
3235             return
3236             latest_version = self.urlopen(
3237                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3238             if version_tuple(latest_version) > version_tuple(__version__):
3239                 self.report_warning(
3240                     'You are using an outdated version (newest version: %s)! '
3241                     'See https://yt-dl.org/update if you need help updating.' %
3242                     latest_version)
3243
3244     def _setup_opener(self):
3245         timeout_val = self.params.get('socket_timeout')
3246         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
3247
3248         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3249         opts_cookiefile = self.params.get('cookiefile')
3250         opts_proxy = self.params.get('proxy')
3251
3252         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3253
3254         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3255         if opts_proxy is not None:
3256             if opts_proxy == '':
3257                 proxies = {}
3258             else:
3259                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3260         else:
3261             proxies = compat_urllib_request.getproxies()
3262             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3263             if 'http' in proxies and 'https' not in proxies:
3264                 proxies['https'] = proxies['http']
3265         proxy_handler = PerRequestProxyHandler(proxies)
3266
3267         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3268         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3269         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3270         redirect_handler = YoutubeDLRedirectHandler()
3271         data_handler = compat_urllib_request_DataHandler()
3272
3273         # When passing our own FileHandler instance, build_opener won't add the
3274         # default FileHandler and allows us to disable the file protocol, which
3275         # can be used for malicious purposes (see
3276         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3277         file_handler = compat_urllib_request.FileHandler()
3278
3279         def file_open(*args, **kwargs):
3280             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3281         file_handler.file_open = file_open
3282
3283         opener = compat_urllib_request.build_opener(
3284             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3285
3286         # Delete the default user-agent header, which would otherwise apply in
3287         # cases where our custom HTTP handler doesn't come into play
3288         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3289         opener.addheaders = []
3290         self._opener = opener
3291
3292     def encode(self, s):
3293         if isinstance(s, bytes):
3294             return s  # Already encoded
3295
3296         try:
3297             return s.encode(self.get_encoding())
3298         except UnicodeEncodeError as err:
3299             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3300             raise
3301
3302     def get_encoding(self):
3303         encoding = self.params.get('encoding')
3304         if encoding is None:
3305             encoding = preferredencoding()
3306         return encoding
3307
3308     def _write_thumbnails(self, info_dict, filename):  # return the extensions
3309         write_all = self.params.get('write_all_thumbnails', False)
3310         thumbnails = []
3311         if write_all or self.params.get('writethumbnail', False):
3312             thumbnails = info_dict.get('thumbnails') or []
3313         multiple = write_all and len(thumbnails) > 1
3314
3315         ret = []
3316         for t in thumbnails[::-1]:
3317             thumb_ext = determine_ext(t['url'], 'jpg')
3318             suffix = '%s.' % t['id'] if multiple else ''
3319             thumb_display_id = '%s ' % t['id'] if multiple else ''
3320             thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
3321
3322             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
3323                 ret.append(suffix + thumb_ext)
3324                 t['filepath'] = thumb_filename
3325                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
3326                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
3327             else:
3328                 self.to_screen('[%s] %s: Downloading thumbnail %s ...' %
3329                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
3330                 try:
3331                     uf = self.urlopen(t['url'])
3332                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3333                         shutil.copyfileobj(uf, thumbf)
3334                     ret.append(suffix + thumb_ext)
3335                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
3336                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
3337                     t['filepath'] = thumb_filename
3338                 except network_exceptions as err:
3339                     self.report_warning('Unable to download thumbnail "%s": %s' %
3340                                         (t['url'], error_to_compat_str(err)))
3341             if ret and not write_all:
3342                 break
3343         return ret