yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import sys
  23 import tempfile
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28
  29 from string import ascii_letters
  30 from zipimport import zipimporter
  31
  32 from .compat import (
  33     compat_basestring,
  34     compat_get_terminal_size,
  35     compat_kwargs,
  36     compat_numeric_types,
  37     compat_os_name,
  38     compat_shlex_quote,
  39     compat_str,
  40     compat_tokenize_tokenize,
  41     compat_urllib_error,
  42     compat_urllib_request,
  43     compat_urllib_request_DataHandler,
  44 )
  45 from .cookies import load_cookies
  46 from .utils import (
  47     age_restricted,
  48     args_to_str,
  49     ContentTooShortError,
  50     date_from_str,
  51     DateRange,
  52     DEFAULT_OUTTMPL,
  53     determine_ext,
  54     determine_protocol,
  55     DOT_DESKTOP_LINK_TEMPLATE,
  56     DOT_URL_LINK_TEMPLATE,
  57     DOT_WEBLOC_LINK_TEMPLATE,
  58     DownloadError,
  59     encode_compat_str,
  60     encodeFilename,
  61     EntryNotInPlaylist,
  62     error_to_compat_str,
  63     ExistingVideoReached,
  64     expand_path,
  65     ExtractorError,
  66     float_or_none,
  67     format_bytes,
  68     format_field,
  69     STR_FORMAT_RE_TMPL,
  70     STR_FORMAT_TYPES,
  71     formatSeconds,
  72     GeoRestrictedError,
  73     HEADRequest,
  74     int_or_none,
  75     iri_to_uri,
  76     ISO3166Utils,
  77     LazyList,
  78     locked_file,
  79     make_dir,
  80     make_HTTPS_handler,
  81     MaxDownloadsReached,
  82     network_exceptions,
  83     orderedSet,
  84     OUTTMPL_TYPES,
  85     PagedList,
  86     parse_filesize,
  87     PerRequestProxyHandler,
  88     platform_name,
  89     PostProcessingError,
  90     preferredencoding,
  91     prepend_extension,
  92     process_communicate_or_kill,
  93     register_socks_protocols,
  94     RejectedVideoReached,
  95     render_table,
  96     replace_extension,
  97     SameFileError,
  98     sanitize_filename,
  99     sanitize_path,
 100     sanitize_url,
 101     sanitized_Request,
 102     std_headers,
 103     str_or_none,
 104     strftime_or_none,
 105     subtitles_filename,
 106     ThrottledDownload,
 107     to_high_limit_path,
 108     traverse_obj,
 109     try_get,
 110     UnavailableVideoError,
 111     url_basename,
 112     variadic,
 113     version_tuple,
 114     write_json_file,
 115     write_string,
 116     YoutubeDLCookieProcessor,
 117     YoutubeDLHandler,
 118     YoutubeDLRedirectHandler,
 119 )
 120 from .cache import Cache
 121 from .extractor import (
 122     gen_extractor_classes,
 123     get_info_extractor,
 124     _LAZY_LOADER,
 125     _PLUGIN_CLASSES
 126 )
 127 from .extractor.openload import PhantomJSwrapper
 128 from .downloader import (
 129     FFmpegFD,
 130     get_suitable_downloader,
 131     shorten_protocol_name
 132 )
 133 from .downloader.rtmp import rtmpdump_version
 134 from .postprocessor import (
 135     get_postprocessor,
 136     FFmpegFixupDurationPP,
 137     FFmpegFixupM3u8PP,
 138     FFmpegFixupM4aPP,
 139     FFmpegFixupStretchedPP,
 140     FFmpegFixupTimestampPP,
 141     FFmpegMergerPP,
 142     FFmpegPostProcessor,
 143     MoveFilesAfterDownloadPP,
 144 )
 145 from .version import __version__
 146
 147 if compat_os_name == 'nt':
 148     import ctypes
 149
 150
 151 class YoutubeDL(object):
 152     """YoutubeDL class.
 153
 154     YoutubeDL objects are the ones responsible of downloading the
 155     actual video file and writing it to disk if the user has requested
 156     it, among some other tasks. In most cases there should be one per
 157     program. As, given a video URL, the downloader doesn't know how to
 158     extract all the needed information, task that InfoExtractors do, it
 159     has to pass the URL to one of them.
 160
 161     For this, YoutubeDL objects have a method that allows
 162     InfoExtractors to be registered in a given order. When it is passed
 163     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 164     finds that reports being able to handle it. The InfoExtractor extracts
 165     all the information about the video or videos the URL refers to, and
 166     YoutubeDL process the extracted information, possibly using a File
 167     Downloader to download the video.
 168
 169     YoutubeDL objects accept a lot of parameters. In order not to saturate
 170     the object constructor with arguments, it receives a dictionary of
 171     options instead. These options are available through the params
 172     attribute for the InfoExtractors to use. The YoutubeDL also
 173     registers itself as the downloader in charge for the InfoExtractors
 174     that are added to it, so this is a "mutual registration".
 175
 176     Available options:
 177
 178     username:          Username for authentication purposes.
 179     password:          Password for authentication purposes.
 180     videopassword:     Password for accessing a video.
 181     ap_mso:            Adobe Pass multiple-system operator identifier.
 182     ap_username:       Multiple-system operator account username.
 183     ap_password:       Multiple-system operator account password.
 184     usenetrc:          Use netrc for authentication instead.
 185     verbose:           Print additional info to stdout.
 186     quiet:             Do not print messages to stdout.
 187     no_warnings:       Do not print out anything for warnings.
 188     forceprint:        A list of templates to force print
 189     forceurl:          Force printing final URL. (Deprecated)
 190     forcetitle:        Force printing title. (Deprecated)
 191     forceid:           Force printing ID. (Deprecated)
 192     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 193     forcedescription:  Force printing description. (Deprecated)
 194     forcefilename:     Force printing final filename. (Deprecated)
 195     forceduration:     Force printing duration. (Deprecated)
 196     forcejson:         Force printing info_dict as JSON.
 197     dump_single_json:  Force printing the info_dict of the whole playlist
 198                        (or video) as a single JSON line.
 199     force_write_download_archive: Force writing download archive regardless
 200                        of 'skip_download' or 'simulate'.
 201     simulate:          Do not download the video files. If unset (or None),
 202                        simulate only if listsubtitles, listformats or list_thumbnails is used
 203     format:            Video format code. see "FORMAT SELECTION" for more details.
 204     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 205     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 206                        extracting metadata even if the video is not actually
 207                        available for download (experimental)
 208     format_sort:       How to sort the video formats. see "Sorting Formats"
 209                        for more details.
 210     format_sort_force: Force the given format_sort. see "Sorting Formats"
 211                        for more details.
 212     allow_multiple_video_streams:   Allow multiple video streams to be merged
 213                        into a single file
 214     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 215                        into a single file
 216     check_formats      Whether to test if the formats are downloadable.
 217                        Can be True (check all), False (check none)
 218                        or None (check only if requested by extractor)
 219     paths:             Dictionary of output paths. The allowed keys are 'home'
 220                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 221     outtmpl:           Dictionary of templates for output names. Allowed keys
 222                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 223                        For compatibility with youtube-dl, a single string can also be used
 224     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 225     restrictfilenames: Do not allow "&" and spaces in file names
 226     trim_file_name:    Limit length of filename (extension excluded)
 227     windowsfilenames:  Force the filenames to be windows compatible
 228     ignoreerrors:      Do not stop on download errors
 229                        (Default True when running yt-dlp,
 230                        but False when directly accessing YoutubeDL class)
 231     skip_playlist_after_errors: Number of allowed failures until the rest of
 232                        the playlist is skipped
 233     force_generic_extractor: Force downloader to use the generic extractor
 234     overwrites:        Overwrite all video and metadata files if True,
 235                        overwrite only non-video files if None
 236                        and don't overwrite any file if False
 237                        For compatibility with youtube-dl,
 238                        "nooverwrites" may also be used instead
 239     playliststart:     Playlist item to start at.
 240     playlistend:       Playlist item to end at.
 241     playlist_items:    Specific indices of playlist to download.
 242     playlistreverse:   Download playlist items in reverse order.
 243     playlistrandom:    Download playlist items in random order.
 244     matchtitle:        Download only matching titles.
 245     rejecttitle:       Reject downloads for matching titles.
 246     logger:            Log messages to a logging.Logger instance.
 247     logtostderr:       Log messages to stderr instead of stdout.
 248     writedescription:  Write the video description to a .description file
 249     writeinfojson:     Write the video description to a .info.json file
 250     clean_infojson:    Remove private fields from the infojson
 251     getcomments:       Extract video comments. This will not be written to disk
 252                        unless writeinfojson is also given
 253     writeannotations:  Write the video annotations to a .annotations.xml file
 254     writethumbnail:    Write the thumbnail image to a file
 255     allow_playlist_files: Whether to write playlists' description, infojson etc
 256                        also to disk when using the 'write*' options
 257     write_all_thumbnails:  Write all thumbnail formats to files
 258     writelink:         Write an internet shortcut file, depending on the
 259                        current platform (.url/.webloc/.desktop)
 260     writeurllink:      Write a Windows internet shortcut file (.url)
 261     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 262     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 263     writesubtitles:    Write the video subtitles to a file
 264     writeautomaticsub: Write the automatically generated subtitles to a file
 265     allsubtitles:      Deprecated - Use subtitleslangs = ['all']
 266                        Downloads all the subtitles of the video
 267                        (requires writesubtitles or writeautomaticsub)
 268     listsubtitles:     Lists all available subtitles for the video
 269     subtitlesformat:   The format code for subtitles
 270     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 271                        The list may contain "all" to refer to all the available
 272                        subtitles. The language can be prefixed with a "-" to
 273                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 274     keepvideo:         Keep the video file after post-processing
 275     daterange:         A DateRange object, download only if the upload_date is in the range.
 276     skip_download:     Skip the actual download of the video file
 277     cachedir:          Location of the cache files in the filesystem.
 278                        False to disable filesystem cache.
 279     noplaylist:        Download single video instead of a playlist if in doubt.
 280     age_limit:         An integer representing the user's age in years.
 281                        Unsuitable videos for the given age are skipped.
 282     min_views:         An integer representing the minimum view count the video
 283                        must have in order to not be skipped.
 284                        Videos without view count information are always
 285                        downloaded. None for no limit.
 286     max_views:         An integer representing the maximum view count.
 287                        Videos that are more popular than that are not
 288                        downloaded.
 289                        Videos without view count information are always
 290                        downloaded. None for no limit.
 291     download_archive:  File name of a file where all downloads are recorded.
 292                        Videos already present in the file are not downloaded
 293                        again.
 294     break_on_existing: Stop the download process after attempting to download a
 295                        file that is in the archive.
 296     break_on_reject:   Stop the download process when encountering a video that
 297                        has been filtered out.
 298     cookiefile:        File name where cookies should be read from and dumped to
 299     cookiesfrombrowser: A tuple containing the name of the browser and the profile
 300                        name/path from where cookies are loaded.
 301                        Eg: ('chrome', ) or (vivaldi, 'default')
 302     nocheckcertificate:Do not verify SSL certificates
 303     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 304                        At the moment, this is only supported by YouTube.
 305     proxy:             URL of the proxy server to use
 306     geo_verification_proxy:  URL of the proxy to use for IP address verification
 307                        on geo-restricted sites.
 308     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 309     bidi_workaround:   Work around buggy terminals without bidirectional text
 310                        support, using fridibi
 311     debug_printtraffic:Print out sent and received HTTP traffic
 312     include_ads:       Download ads as well
 313     default_search:    Prepend this string if an input url is not valid.
 314                        'auto' for elaborate guessing
 315     encoding:          Use this encoding instead of the system-specified.
 316     extract_flat:      Do not resolve URLs, return the immediate result.
 317                        Pass in 'in_playlist' to only show this behavior for
 318                        playlist items.
 319     postprocessors:    A list of dictionaries, each with an entry
 320                        * key:  The name of the postprocessor. See
 321                                yt_dlp/postprocessor/__init__.py for a list.
 322                        * when: When to run the postprocessor. Can be one of
 323                                pre_process|before_dl|post_process|after_move.
 324                                Assumed to be 'post_process' if not given
 325     post_hooks:        A list of functions that get called as the final step
 326                        for each video file, after all postprocessors have been
 327                        called. The filename will be passed as the only argument.
 328     progress_hooks:    A list of functions that get called on download
 329                        progress, with a dictionary with the entries
 330                        * status: One of "downloading", "error", or "finished".
 331                                  Check this first and ignore unknown values.
 332                        * info_dict: The extracted info_dict
 333
 334                        If status is one of "downloading", or "finished", the
 335                        following properties may also be present:
 336                        * filename: The final filename (always present)
 337                        * tmpfilename: The filename we're currently writing to
 338                        * downloaded_bytes: Bytes on disk
 339                        * total_bytes: Size of the whole file, None if unknown
 340                        * total_bytes_estimate: Guess of the eventual file size,
 341                                                None if unavailable.
 342                        * elapsed: The number of seconds since download started.
 343                        * eta: The estimated time in seconds, None if unknown
 344                        * speed: The download speed in bytes/second, None if
 345                                 unknown
 346                        * fragment_index: The counter of the currently
 347                                          downloaded video fragment.
 348                        * fragment_count: The number of fragments (= individual
 349                                          files that will be merged)
 350
 351                        Progress hooks are guaranteed to be called at least once
 352                        (with status "finished") if the download is successful.
 353     merge_output_format: Extension to use when merging formats.
 354     final_ext:         Expected final extension; used to detect when the file was
 355                        already downloaded and converted. "merge_output_format" is
 356                        replaced by this extension when given
 357     fixup:             Automatically correct known faults of the file.
 358                        One of:
 359                        - "never": do nothing
 360                        - "warn": only emit a warning
 361                        - "detect_or_warn": check whether we can do anything
 362                                            about it, warn otherwise (default)
 363     source_address:    Client-side IP address to bind to.
 364     call_home:         Boolean, true iff we are allowed to contact the
 365                        yt-dlp servers for debugging. (BROKEN)
 366     sleep_interval_requests: Number of seconds to sleep between requests
 367                        during extraction
 368     sleep_interval:    Number of seconds to sleep before each download when
 369                        used alone or a lower bound of a range for randomized
 370                        sleep before each download (minimum possible number
 371                        of seconds to sleep) when used along with
 372                        max_sleep_interval.
 373     max_sleep_interval:Upper bound of a range for randomized sleep before each
 374                        download (maximum possible number of seconds to sleep).
 375                        Must only be used along with sleep_interval.
 376                        Actual sleep time will be a random float from range
 377                        [sleep_interval; max_sleep_interval].
 378     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 379     listformats:       Print an overview of available video formats and exit.
 380     list_thumbnails:   Print a table of all thumbnails and exit.
 381     match_filter:      A function that gets called with the info_dict of
 382                        every video.
 383                        If it returns a message, the video is ignored.
 384                        If it returns None, the video is downloaded.
 385                        match_filter_func in utils.py is one example for this.
 386     no_color:          Do not emit color codes in output.
 387     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 388                        HTTP header
 389     geo_bypass_country:
 390                        Two-letter ISO 3166-2 country code that will be used for
 391                        explicit geographic restriction bypassing via faking
 392                        X-Forwarded-For HTTP header
 393     geo_bypass_ip_block:
 394                        IP range in CIDR notation that will be used similarly to
 395                        geo_bypass_country
 396
 397     The following options determine which downloader is picked:
 398     external_downloader: A dictionary of protocol keys and the executable of the
 399                        external downloader to use for it. The allowed protocols
 400                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 401                        Set the value to 'native' to use the native downloader
 402     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 403                        or {'m3u8': 'ffmpeg'} instead.
 404                        Use the native HLS downloader instead of ffmpeg/avconv
 405                        if True, otherwise use ffmpeg/avconv if False, otherwise
 406                        use downloader suggested by extractor if None.
 407     compat_opts:       Compatibility options. See "Differences in default behavior".
 408                        The following options do not work when used through the API:
 409                        filename, abort-on-error, multistreams, no-live-chat,
 410                        no-clean-infojson, no-playlist-metafiles, no-keep-subs.
 411                        Refer __init__.py for their implementation
 412
 413     The following parameters are not used by YoutubeDL itself, they are used by
 414     the downloader (see yt_dlp/downloader/common.py):
 415     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 416     max_filesize, test, noresizebuffer, retries, continuedl, noprogress, consoletitle,
 417     xattr_set_filesize, external_downloader_args, hls_use_mpegts, http_chunk_size.
 418
 419     The following options are used by the post processors:
 420     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 421                        otherwise prefer ffmpeg. (avconv support is deprecated)
 422     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 423                        to the binary or its containing directory.
 424     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 425                        and a list of additional command-line arguments for the
 426                        postprocessor/executable. The dict can also have "PP+EXE" keys
 427                        which are used when the given exe is used by the given PP.
 428                        Use 'default' as the name for arguments to passed to all PP
 429                        For compatibility with youtube-dl, a single list of args
 430                        can also be used
 431
 432     The following options are used by the extractors:
 433     extractor_retries: Number of times to retry for known errors
 434     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 435     hls_split_discontinuity: Split HLS playlists to different formats at
 436                        discontinuities such as ad breaks (default: False)
 437     extractor_args:    A dictionary of arguments to be passed to the extractors.
 438                        See "EXTRACTOR ARGUMENTS" for details.
 439                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 440     youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
 441                        If True (default), DASH manifests and related
 442                        data will be downloaded and processed by extractor.
 443                        You can reduce network I/O by disabling it if you don't
 444                        care about DASH. (only for youtube)
 445     youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
 446                        If True (default), HLS manifests and related
 447                        data will be downloaded and processed by extractor.
 448                        You can reduce network I/O by disabling it if you don't
 449                        care about HLS. (only for youtube)
 450     """
 451
 452     _NUMERIC_FIELDS = set((
 453         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 454         'timestamp', 'upload_year', 'upload_month', 'upload_day',
 455         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 456         'average_rating', 'comment_count', 'age_limit',
 457         'start_time', 'end_time',
 458         'chapter_number', 'season_number', 'episode_number',
 459         'track_number', 'disc_number', 'release_year',
 460         'playlist_index',
 461     ))
 462
 463     params = None
 464     _ies = []
 465     _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 466     _printed_messages = set()
 467     _first_webpage_request = True
 468     _download_retcode = None
 469     _num_downloads = None
 470     _playlist_level = 0
 471     _playlist_urls = set()
 472     _screen_file = None
 473
 474     def __init__(self, params=None, auto_init=True):
 475         """Create a FileDownloader object with the given options."""
 476         if params is None:
 477             params = {}
 478         self._ies = []
 479         self._ies_instances = {}
 480         self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 481         self._printed_messages = set()
 482         self._first_webpage_request = True
 483         self._post_hooks = []
 484         self._progress_hooks = []
 485         self._download_retcode = 0
 486         self._num_downloads = 0
 487         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 488         self._err_file = sys.stderr
 489         self.params = {
 490             # Default parameters
 491             'nocheckcertificate': False,
 492         }
 493         self.params.update(params)
 494         self.cache = Cache(self)
 495
 496         if sys.version_info < (3, 6):
 497             self.report_warning(
 498                 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 499
 500         if self.params.get('allow_unplayable_formats'):
 501             self.report_warning(
 502                 'You have asked for unplayable formats to be listed/downloaded. '
 503                 'This is a developer option intended for debugging. '
 504                 'If you experience any issues while using this option, DO NOT open a bug report')
 505
 506         def check_deprecated(param, option, suggestion):
 507             if self.params.get(param) is not None:
 508                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 509                 return True
 510             return False
 511
 512         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 513             if self.params.get('geo_verification_proxy') is None:
 514                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 515
 516         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 517         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 518         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 519
 520         for msg in self.params.get('warnings', []):
 521             self.report_warning(msg)
 522
 523         if self.params.get('overwrites') is None:
 524             self.params.pop('overwrites', None)
 525         elif self.params.get('nooverwrites') is not None:
 526             # nooverwrites was unnecessarily changed to overwrites
 527             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 528             # This ensures compatibility with both keys
 529             self.params['overwrites'] = not self.params['nooverwrites']
 530         else:
 531             self.params['nooverwrites'] = not self.params['overwrites']
 532
 533         if params.get('bidi_workaround', False):
 534             try:
 535                 import pty
 536                 master, slave = pty.openpty()
 537                 width = compat_get_terminal_size().columns
 538                 if width is None:
 539                     width_args = []
 540                 else:
 541                     width_args = ['-w', str(width)]
 542                 sp_kwargs = dict(
 543                     stdin=subprocess.PIPE,
 544                     stdout=slave,
 545                     stderr=self._err_file)
 546                 try:
 547                     self._output_process = subprocess.Popen(
 548                         ['bidiv'] + width_args, **sp_kwargs
 549                     )
 550                 except OSError:
 551                     self._output_process = subprocess.Popen(
 552                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 553                 self._output_channel = os.fdopen(master, 'rb')
 554             except OSError as ose:
 555                 if ose.errno == errno.ENOENT:
 556                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 557                 else:
 558                     raise
 559
 560         if (sys.platform != 'win32'
 561                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 562                 and not params.get('restrictfilenames', False)):
 563             # Unicode filesystem API will throw errors (#1474, #13027)
 564             self.report_warning(
 565                 'Assuming --restrict-filenames since file system encoding '
 566                 'cannot encode all characters. '
 567                 'Set the LC_ALL environment variable to fix this.')
 568             self.params['restrictfilenames'] = True
 569
 570         self.outtmpl_dict = self.parse_outtmpl()
 571
 572         # Creating format selector here allows us to catch syntax errors before the extraction
 573         self.format_selector = (
 574             None if self.params.get('format') is None
 575             else self.build_format_selector(self.params['format']))
 576
 577         self._setup_opener()
 578
 579         """Preload the archive, if any is specified"""
 580         def preload_download_archive(fn):
 581             if fn is None:
 582                 return False
 583             self.write_debug('Loading archive file %r\n' % fn)
 584             try:
 585                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 586                     for line in archive_file:
 587                         self.archive.add(line.strip())
 588             except IOError as ioe:
 589                 if ioe.errno != errno.ENOENT:
 590                     raise
 591                 return False
 592             return True
 593
 594         self.archive = set()
 595         preload_download_archive(self.params.get('download_archive'))
 596
 597         if auto_init:
 598             self.print_debug_header()
 599             self.add_default_info_extractors()
 600
 601         for pp_def_raw in self.params.get('postprocessors', []):
 602             pp_def = dict(pp_def_raw)
 603             when = pp_def.pop('when', 'post_process')
 604             pp_class = get_postprocessor(pp_def.pop('key'))
 605             pp = pp_class(self, **compat_kwargs(pp_def))
 606             self.add_post_processor(pp, when=when)
 607
 608         for ph in self.params.get('post_hooks', []):
 609             self.add_post_hook(ph)
 610
 611         for ph in self.params.get('progress_hooks', []):
 612             self.add_progress_hook(ph)
 613
 614         register_socks_protocols()
 615
 616     def warn_if_short_id(self, argv):
 617         # short YouTube ID starting with dash?
 618         idxs = [
 619             i for i, a in enumerate(argv)
 620             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 621         if idxs:
 622             correct_argv = (
 623                 ['yt-dlp']
 624                 + [a for i, a in enumerate(argv) if i not in idxs]
 625                 + ['--'] + [argv[i] for i in idxs]
 626             )
 627             self.report_warning(
 628                 'Long argument string detected. '
 629                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 630                 args_to_str(correct_argv))
 631
 632     def add_info_extractor(self, ie):
 633         """Add an InfoExtractor object to the end of the list."""
 634         self._ies.append(ie)
 635         if not isinstance(ie, type):
 636             self._ies_instances[ie.ie_key()] = ie
 637             ie.set_downloader(self)
 638
 639     def get_info_extractor(self, ie_key):
 640         """
 641         Get an instance of an IE with name ie_key, it will try to get one from
 642         the _ies list, if there's no instance it will create a new one and add
 643         it to the extractor list.
 644         """
 645         ie = self._ies_instances.get(ie_key)
 646         if ie is None:
 647             ie = get_info_extractor(ie_key)()
 648             self.add_info_extractor(ie)
 649         return ie
 650
 651     def add_default_info_extractors(self):
 652         """
 653         Add the InfoExtractors returned by gen_extractors to the end of the list
 654         """
 655         for ie in gen_extractor_classes():
 656             self.add_info_extractor(ie)
 657
 658     def add_post_processor(self, pp, when='post_process'):
 659         """Add a PostProcessor object to the end of the chain."""
 660         self._pps[when].append(pp)
 661         pp.set_downloader(self)
 662
 663     def add_post_hook(self, ph):
 664         """Add the post hook"""
 665         self._post_hooks.append(ph)
 666
 667     def add_progress_hook(self, ph):
 668         """Add the progress hook (currently only for the file downloader)"""
 669         self._progress_hooks.append(ph)
 670
 671     def _bidi_workaround(self, message):
 672         if not hasattr(self, '_output_channel'):
 673             return message
 674
 675         assert hasattr(self, '_output_process')
 676         assert isinstance(message, compat_str)
 677         line_count = message.count('\n') + 1
 678         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 679         self._output_process.stdin.flush()
 680         res = ''.join(self._output_channel.readline().decode('utf-8')
 681                       for _ in range(line_count))
 682         return res[:-len('\n')]
 683
 684     def _write_string(self, message, out=None, only_once=False):
 685         if only_once:
 686             if message in self._printed_messages:
 687                 return
 688             self._printed_messages.add(message)
 689         write_string(message, out=out, encoding=self.params.get('encoding'))
 690
 691     def to_stdout(self, message, skip_eol=False, quiet=False):
 692         """Print message to stdout"""
 693         if self.params.get('logger'):
 694             self.params['logger'].debug(message)
 695         elif not quiet or self.params.get('verbose'):
 696             self._write_string(
 697                 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 698                 self._err_file if quiet else self._screen_file)
 699
 700     def to_stderr(self, message, only_once=False):
 701         """Print message to stderr"""
 702         assert isinstance(message, compat_str)
 703         if self.params.get('logger'):
 704             self.params['logger'].error(message)
 705         else:
 706             self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
 707
 708     def to_console_title(self, message):
 709         if not self.params.get('consoletitle', False):
 710             return
 711         if compat_os_name == 'nt':
 712             if ctypes.windll.kernel32.GetConsoleWindow():
 713                 # c_wchar_p() might not be necessary if `message` is
 714                 # already of type unicode()
 715                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 716         elif 'TERM' in os.environ:
 717             self._write_string('\033]0;%s\007' % message, self._screen_file)
 718
 719     def save_console_title(self):
 720         if not self.params.get('consoletitle', False):
 721             return
 722         if self.params.get('simulate'):
 723             return
 724         if compat_os_name != 'nt' and 'TERM' in os.environ:
 725             # Save the title on stack
 726             self._write_string('\033[22;0t', self._screen_file)
 727
 728     def restore_console_title(self):
 729         if not self.params.get('consoletitle', False):
 730             return
 731         if self.params.get('simulate'):
 732             return
 733         if compat_os_name != 'nt' and 'TERM' in os.environ:
 734             # Restore the title from stack
 735             self._write_string('\033[23;0t', self._screen_file)
 736
 737     def __enter__(self):
 738         self.save_console_title()
 739         return self
 740
 741     def __exit__(self, *args):
 742         self.restore_console_title()
 743
 744         if self.params.get('cookiefile') is not None:
 745             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 746
 747     def trouble(self, message=None, tb=None):
 748         """Determine action to take when a download problem appears.
 749
 750         Depending on if the downloader has been configured to ignore
 751         download errors or not, this method may throw an exception or
 752         not when errors are found, after printing the message.
 753
 754         tb, if given, is additional traceback information.
 755         """
 756         if message is not None:
 757             self.to_stderr(message)
 758         if self.params.get('verbose'):
 759             if tb is None:
 760                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 761                     tb = ''
 762                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 763                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 764                     tb += encode_compat_str(traceback.format_exc())
 765                 else:
 766                     tb_data = traceback.format_list(traceback.extract_stack())
 767                     tb = ''.join(tb_data)
 768             if tb:
 769                 self.to_stderr(tb)
 770         if not self.params.get('ignoreerrors', False):
 771             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 772                 exc_info = sys.exc_info()[1].exc_info
 773             else:
 774                 exc_info = sys.exc_info()
 775             raise DownloadError(message, exc_info)
 776         self._download_retcode = 1
 777
 778     def to_screen(self, message, skip_eol=False):
 779         """Print message to stdout if not in quiet mode"""
 780         self.to_stdout(
 781             message, skip_eol, quiet=self.params.get('quiet', False))
 782
 783     def report_warning(self, message, only_once=False):
 784         '''
 785         Print the message to stderr, it will be prefixed with 'WARNING:'
 786         If stderr is a tty file the 'WARNING:' will be colored
 787         '''
 788         if self.params.get('logger') is not None:
 789             self.params['logger'].warning(message)
 790         else:
 791             if self.params.get('no_warnings'):
 792                 return
 793             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 794                 _msg_header = '\033[0;33mWARNING:\033[0m'
 795             else:
 796                 _msg_header = 'WARNING:'
 797             warning_message = '%s %s' % (_msg_header, message)
 798             self.to_stderr(warning_message, only_once)
 799
 800     def report_error(self, message, tb=None):
 801         '''
 802         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 803         in red if stderr is a tty file.
 804         '''
 805         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 806             _msg_header = '\033[0;31mERROR:\033[0m'
 807         else:
 808             _msg_header = 'ERROR:'
 809         error_message = '%s %s' % (_msg_header, message)
 810         self.trouble(error_message, tb)
 811
 812     def write_debug(self, message, only_once=False):
 813         '''Log debug message or Print message to stderr'''
 814         if not self.params.get('verbose', False):
 815             return
 816         message = '[debug] %s' % message
 817         if self.params.get('logger'):
 818             self.params['logger'].debug(message)
 819         else:
 820             self.to_stderr(message, only_once)
 821
 822     def report_file_already_downloaded(self, file_name):
 823         """Report file has already been fully downloaded."""
 824         try:
 825             self.to_screen('[download] %s has already been downloaded' % file_name)
 826         except UnicodeEncodeError:
 827             self.to_screen('[download] The file has already been downloaded')
 828
 829     def report_file_delete(self, file_name):
 830         """Report that existing file will be deleted."""
 831         try:
 832             self.to_screen('Deleting existing file %s' % file_name)
 833         except UnicodeEncodeError:
 834             self.to_screen('Deleting existing file')
 835
 836     def raise_no_formats(self, info, forced=False):
 837         has_drm = info.get('__has_drm')
 838         msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
 839         expected = self.params.get('ignore_no_formats_error')
 840         if forced or not expected:
 841             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
 842                                  expected=has_drm or expected)
 843         else:
 844             self.report_warning(msg)
 845
 846     def parse_outtmpl(self):
 847         outtmpl_dict = self.params.get('outtmpl', {})
 848         if not isinstance(outtmpl_dict, dict):
 849             outtmpl_dict = {'default': outtmpl_dict}
 850         outtmpl_dict.update({
 851             k: v for k, v in DEFAULT_OUTTMPL.items()
 852             if not outtmpl_dict.get(k)})
 853         for key, val in outtmpl_dict.items():
 854             if isinstance(val, bytes):
 855                 self.report_warning(
 856                     'Parameter outtmpl is bytes, but should be a unicode string. '
 857                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 858         return outtmpl_dict
 859
 860     def get_output_path(self, dir_type='', filename=None):
 861         paths = self.params.get('paths', {})
 862         assert isinstance(paths, dict)
 863         path = os.path.join(
 864             expand_path(paths.get('home', '').strip()),
 865             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
 866             filename or '')
 867
 868         # Temporary fix for #4787
 869         # 'Treat' all problem characters by passing filename through preferredencoding
 870         # to workaround encoding issues with subprocess on python2 @ Windows
 871         if sys.version_info < (3, 0) and sys.platform == 'win32':
 872             path = encodeFilename(path, True).decode(preferredencoding())
 873         return sanitize_path(path, force=self.params.get('windowsfilenames'))
 874
 875     @staticmethod
 876     def _outtmpl_expandpath(outtmpl):
 877         # expand_path translates '%%' into '%' and '$$' into '$'
 878         # correspondingly that is not what we want since we need to keep
 879         # '%%' intact for template dict substitution step. Working around
 880         # with boundary-alike separator hack.
 881         sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 882         outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 883
 884         # outtmpl should be expand_path'ed before template dict substitution
 885         # because meta fields may contain env variables we don't want to
 886         # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 887         # title "Hello $PATH", we don't want `$PATH` to be expanded.
 888         return expand_path(outtmpl).replace(sep, '')
 889
 890     @staticmethod
 891     def escape_outtmpl(outtmpl):
 892         ''' Escape any remaining strings like %s, %abc% etc. '''
 893         return re.sub(
 894             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
 895             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
 896             outtmpl)
 897
 898     @classmethod
 899     def validate_outtmpl(cls, outtmpl):
 900         ''' @return None or Exception object '''
 901         outtmpl = re.sub(
 902             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljq]'),
 903             lambda mobj: f'{mobj.group(0)[:-1]}s',
 904             cls._outtmpl_expandpath(outtmpl))
 905         try:
 906             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
 907             return None
 908         except ValueError as err:
 909             return err
 910
 911     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
 912         """ Make the template and info_dict suitable for substitution : ydl.outtmpl_escape(outtmpl) % info_dict """
 913         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
 914
 915         info_dict = dict(info_dict)  # Do not sanitize so as not to consume LazyList
 916         for key in ('__original_infodict', '__postprocessors'):
 917             info_dict.pop(key, None)
 918         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
 919             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
 920             if info_dict.get('duration', None) is not None
 921             else None)
 922         info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
 923         if info_dict.get('resolution') is None:
 924             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
 925
 926         # For fields playlist_index and autonumber convert all occurrences
 927         # of %(field)s to %(field)0Nd for backward compatibility
 928         field_size_compat_map = {
 929             'playlist_index': len(str(info_dict.get('_last_playlist_index') or '')),
 930             'autonumber': self.params.get('autonumber_size') or 5,
 931         }
 932
 933         TMPL_DICT = {}
 934         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljq]'))
 935         MATH_FUNCTIONS = {
 936             '+': float.__add__,
 937             '-': float.__sub__,
 938         }
 939         # Field is of the form key1.key2...
 940         # where keys (except first) can be string, int or slice
 941         FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
 942         MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
 943         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
 944         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
 945             (?P<negate>-)?
 946             (?P<fields>{field})
 947             (?P<maths>(?:{math_op}{math_field})*)
 948             (?:>(?P<strf_format>.+?))?
 949             (?:\|(?P<default>.*?))?
 950             $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
 951
 952         def _traverse_infodict(k):
 953             k = k.split('.')
 954             if k[0] == '':
 955                 k.pop(0)
 956             return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
 957
 958         def get_value(mdict):
 959             # Object traversal
 960             value = _traverse_infodict(mdict['fields'])
 961             # Negative
 962             if mdict['negate']:
 963                 value = float_or_none(value)
 964                 if value is not None:
 965                     value *= -1
 966             # Do maths
 967             offset_key = mdict['maths']
 968             if offset_key:
 969                 value = float_or_none(value)
 970                 operator = None
 971                 while offset_key:
 972                     item = re.match(
 973                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
 974                         offset_key).group(0)
 975                     offset_key = offset_key[len(item):]
 976                     if operator is None:
 977                         operator = MATH_FUNCTIONS[item]
 978                         continue
 979                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
 980                     offset = float_or_none(item)
 981                     if offset is None:
 982                         offset = float_or_none(_traverse_infodict(item))
 983                     try:
 984                         value = operator(value, multiplier * offset)
 985                     except (TypeError, ZeroDivisionError):
 986                         return None
 987                     operator = None
 988             # Datetime formatting
 989             if mdict['strf_format']:
 990                 value = strftime_or_none(value, mdict['strf_format'])
 991
 992             return value
 993
 994         na = self.params.get('outtmpl_na_placeholder', 'NA')
 995
 996         def _dumpjson_default(obj):
 997             if isinstance(obj, (set, LazyList)):
 998                 return list(obj)
 999             raise TypeError(f'Object of type {type(obj).__name__} is not JSON serializable')
1000
1001         def create_key(outer_mobj):
1002             if not outer_mobj.group('has_key'):
1003                 return f'%{outer_mobj.group(0)}'
1004             key = outer_mobj.group('key')
1005             mobj = re.match(INTERNAL_FORMAT_RE, key)
1006             if mobj is None:
1007                 value, default, mobj = None, na, {'fields': ''}
1008             else:
1009                 mobj = mobj.groupdict()
1010                 default = mobj['default'] if mobj['default'] is not None else na
1011                 value = get_value(mobj)
1012
1013             fmt = outer_mobj.group('format')
1014             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1015                 fmt = '0{:d}d'.format(field_size_compat_map[key])
1016
1017             value = default if value is None else value
1018
1019             str_fmt = f'{fmt[:-1]}s'
1020             if fmt[-1] == 'l':
1021                 value, fmt = ', '.join(variadic(value)), str_fmt
1022             elif fmt[-1] == 'j':
1023                 value, fmt = json.dumps(value, default=_dumpjson_default), str_fmt
1024             elif fmt[-1] == 'q':
1025                 value, fmt = compat_shlex_quote(str(value)), str_fmt
1026             elif fmt[-1] == 'c':
1027                 value = str(value)
1028                 if value is None:
1029                     value, fmt = default, 's'
1030                 else:
1031                     value = value[0]
1032             elif fmt[-1] not in 'rs':  # numeric
1033                 value = float_or_none(value)
1034                 if value is None:
1035                     value, fmt = default, 's'
1036
1037             if sanitize:
1038                 if fmt[-1] == 'r':
1039                     # If value is an object, sanitize might convert it to a string
1040                     # So we convert it to repr first
1041                     value, fmt = repr(value), str_fmt
1042                 if fmt[-1] in 'csr':
1043                     value = sanitize(mobj['fields'].split('.')[-1], value)
1044
1045             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1046             TMPL_DICT[key] = value
1047             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1048
1049         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1050
1051     def _prepare_filename(self, info_dict, tmpl_type='default'):
1052         try:
1053             sanitize = lambda k, v: sanitize_filename(
1054                 compat_str(v),
1055                 restricted=self.params.get('restrictfilenames'),
1056                 is_id=(k == 'id' or k.endswith('_id')))
1057             outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
1058             outtmpl, template_dict = self.prepare_outtmpl(outtmpl, info_dict, sanitize)
1059             outtmpl = self.escape_outtmpl(self._outtmpl_expandpath(outtmpl))
1060             filename = outtmpl % template_dict
1061
1062             force_ext = OUTTMPL_TYPES.get(tmpl_type)
1063             if force_ext is not None:
1064                 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1065
1066             # https://github.com/blackjack4494/youtube-dlc/issues/85
1067             trim_file_name = self.params.get('trim_file_name', False)
1068             if trim_file_name:
1069                 fn_groups = filename.rsplit('.')
1070                 ext = fn_groups[-1]
1071                 sub_ext = ''
1072                 if len(fn_groups) > 2:
1073                     sub_ext = fn_groups[-2]
1074                 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
1075
1076             return filename
1077         except ValueError as err:
1078             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1079             return None
1080
1081     def prepare_filename(self, info_dict, dir_type='', warn=False):
1082         """Generate the output filename."""
1083
1084         filename = self._prepare_filename(info_dict, dir_type or 'default')
1085
1086         if warn:
1087             if not self.params.get('paths'):
1088                 pass
1089             elif filename == '-':
1090                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1091             elif os.path.isabs(filename):
1092                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1093         if filename == '-' or not filename:
1094             return filename
1095
1096         return self.get_output_path(dir_type, filename)
1097
1098     def _match_entry(self, info_dict, incomplete=False, silent=False):
1099         """ Returns None if the file should be downloaded """
1100
1101         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1102
1103         def check_filter():
1104             if 'title' in info_dict:
1105                 # This can happen when we're just evaluating the playlist
1106                 title = info_dict['title']
1107                 matchtitle = self.params.get('matchtitle', False)
1108                 if matchtitle:
1109                     if not re.search(matchtitle, title, re.IGNORECASE):
1110                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1111                 rejecttitle = self.params.get('rejecttitle', False)
1112                 if rejecttitle:
1113                     if re.search(rejecttitle, title, re.IGNORECASE):
1114                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1115             date = info_dict.get('upload_date')
1116             if date is not None:
1117                 dateRange = self.params.get('daterange', DateRange())
1118                 if date not in dateRange:
1119                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1120             view_count = info_dict.get('view_count')
1121             if view_count is not None:
1122                 min_views = self.params.get('min_views')
1123                 if min_views is not None and view_count < min_views:
1124                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1125                 max_views = self.params.get('max_views')
1126                 if max_views is not None and view_count > max_views:
1127                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1128             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1129                 return 'Skipping "%s" because it is age restricted' % video_title
1130
1131             match_filter = self.params.get('match_filter')
1132             if match_filter is not None:
1133                 try:
1134                     ret = match_filter(info_dict, incomplete=incomplete)
1135                 except TypeError:
1136                     # For backward compatibility
1137                     ret = None if incomplete else match_filter(info_dict)
1138                 if ret is not None:
1139                     return ret
1140             return None
1141
1142         if self.in_download_archive(info_dict):
1143             reason = '%s has already been recorded in the archive' % video_title
1144             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1145         else:
1146             reason = check_filter()
1147             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1148         if reason is not None:
1149             if not silent:
1150                 self.to_screen('[download] ' + reason)
1151             if self.params.get(break_opt, False):
1152                 raise break_err()
1153         return reason
1154
1155     @staticmethod
1156     def add_extra_info(info_dict, extra_info):
1157         '''Set the keys from extra_info in info dict if they are missing'''
1158         for key, value in extra_info.items():
1159             info_dict.setdefault(key, value)
1160
1161     def extract_info(self, url, download=True, ie_key=None, extra_info={},
1162                      process=True, force_generic_extractor=False):
1163         """
1164         Return a list with a dictionary for each video extracted.
1165
1166         Arguments:
1167         url -- URL to extract
1168
1169         Keyword arguments:
1170         download -- whether to download videos during extraction
1171         ie_key -- extractor key hint
1172         extra_info -- dictionary containing the extra values to add to each result
1173         process -- whether to resolve all unresolved references (URLs, playlist items),
1174             must be True for download to work.
1175         force_generic_extractor -- force using the generic extractor
1176         """
1177
1178         if not ie_key and force_generic_extractor:
1179             ie_key = 'Generic'
1180
1181         if ie_key:
1182             ies = [self.get_info_extractor(ie_key)]
1183         else:
1184             ies = self._ies
1185
1186         for ie in ies:
1187             if not ie.suitable(url):
1188                 continue
1189
1190             ie_key = ie.ie_key()
1191             ie = self.get_info_extractor(ie_key)
1192             if not ie.working():
1193                 self.report_warning('The program functionality for this site has been marked as broken, '
1194                                     'and will probably not work.')
1195
1196             temp_id = ie.get_temp_id(url)
1197             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1198                 self.to_screen("[%s] %s: has already been recorded in archive" % (
1199                                ie_key, temp_id))
1200                 break
1201             return self.__extract_info(url, ie, download, extra_info, process)
1202         else:
1203             self.report_error('no suitable InfoExtractor for URL %s' % url)
1204
1205     def __handle_extraction_exceptions(func, handle_all_errors=True):
1206         def wrapper(self, *args, **kwargs):
1207             try:
1208                 return func(self, *args, **kwargs)
1209             except GeoRestrictedError as e:
1210                 msg = e.msg
1211                 if e.countries:
1212                     msg += '\nThis video is available in %s.' % ', '.join(
1213                         map(ISO3166Utils.short2full, e.countries))
1214                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1215                 self.report_error(msg)
1216             except ExtractorError as e:  # An error we somewhat expected
1217                 self.report_error(compat_str(e), e.format_traceback())
1218             except ThrottledDownload:
1219                 self.to_stderr('\r')
1220                 self.report_warning('The download speed is below throttle limit. Re-extracting data')
1221                 return wrapper(self, *args, **kwargs)
1222             except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
1223                 raise
1224             except Exception as e:
1225                 if handle_all_errors and self.params.get('ignoreerrors', False):
1226                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
1227                 else:
1228                     raise
1229         return wrapper
1230
1231     @__handle_extraction_exceptions
1232     def __extract_info(self, url, ie, download, extra_info, process):
1233         ie_result = ie.extract(url)
1234         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1235             return
1236         if isinstance(ie_result, list):
1237             # Backwards compatibility: old IE result format
1238             ie_result = {
1239                 '_type': 'compat_list',
1240                 'entries': ie_result,
1241             }
1242         if extra_info.get('original_url'):
1243             ie_result.setdefault('original_url', extra_info['original_url'])
1244         self.add_default_extra_info(ie_result, ie, url)
1245         if process:
1246             return self.process_ie_result(ie_result, download, extra_info)
1247         else:
1248             return ie_result
1249
1250     def add_default_extra_info(self, ie_result, ie, url):
1251         if url is not None:
1252             self.add_extra_info(ie_result, {
1253                 'webpage_url': url,
1254                 'original_url': url,
1255                 'webpage_url_basename': url_basename(url),
1256             })
1257         if ie is not None:
1258             self.add_extra_info(ie_result, {
1259                 'extractor': ie.IE_NAME,
1260                 'extractor_key': ie.ie_key(),
1261             })
1262
1263     def process_ie_result(self, ie_result, download=True, extra_info=None):
1264         """
1265         Take the result of the ie(may be modified) and resolve all unresolved
1266         references (URLs, playlist items).
1267
1268         It will also download the videos if 'download'.
1269         Returns the resolved ie_result.
1270         """
1271         if extra_info is None:
1272             extra_info = {}
1273         result_type = ie_result.get('_type', 'video')
1274
1275         if result_type in ('url', 'url_transparent'):
1276             ie_result['url'] = sanitize_url(ie_result['url'])
1277             if ie_result.get('original_url'):
1278                 extra_info.setdefault('original_url', ie_result['original_url'])
1279
1280             extract_flat = self.params.get('extract_flat', False)
1281             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1282                     or extract_flat is True):
1283                 info_copy = ie_result.copy()
1284                 self.add_extra_info(info_copy, extra_info)
1285                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1286                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1287                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1288                 return ie_result
1289
1290         if result_type == 'video':
1291             self.add_extra_info(ie_result, extra_info)
1292             ie_result = self.process_video_result(ie_result, download=download)
1293             additional_urls = (ie_result or {}).get('additional_urls')
1294             if additional_urls:
1295                 # TODO: Improve MetadataParserPP to allow setting a list
1296                 if isinstance(additional_urls, compat_str):
1297                     additional_urls = [additional_urls]
1298                 self.to_screen(
1299                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1300                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1301                 ie_result['additional_entries'] = [
1302                     self.extract_info(
1303                         url, download, extra_info,
1304                         force_generic_extractor=self.params.get('force_generic_extractor'))
1305                     for url in additional_urls
1306                 ]
1307             return ie_result
1308         elif result_type == 'url':
1309             # We have to add extra_info to the results because it may be
1310             # contained in a playlist
1311             return self.extract_info(
1312                 ie_result['url'], download,
1313                 ie_key=ie_result.get('ie_key'),
1314                 extra_info=extra_info)
1315         elif result_type == 'url_transparent':
1316             # Use the information from the embedding page
1317             info = self.extract_info(
1318                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1319                 extra_info=extra_info, download=False, process=False)
1320
1321             # extract_info may return None when ignoreerrors is enabled and
1322             # extraction failed with an error, don't crash and return early
1323             # in this case
1324             if not info:
1325                 return info
1326
1327             force_properties = dict(
1328                 (k, v) for k, v in ie_result.items() if v is not None)
1329             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1330                 if f in force_properties:
1331                     del force_properties[f]
1332             new_result = info.copy()
1333             new_result.update(force_properties)
1334
1335             # Extracted info may not be a video result (i.e.
1336             # info.get('_type', 'video') != video) but rather an url or
1337             # url_transparent. In such cases outer metadata (from ie_result)
1338             # should be propagated to inner one (info). For this to happen
1339             # _type of info should be overridden with url_transparent. This
1340             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1341             if new_result.get('_type') == 'url':
1342                 new_result['_type'] = 'url_transparent'
1343
1344             return self.process_ie_result(
1345                 new_result, download=download, extra_info=extra_info)
1346         elif result_type in ('playlist', 'multi_video'):
1347             # Protect from infinite recursion due to recursively nested playlists
1348             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1349             webpage_url = ie_result['webpage_url']
1350             if webpage_url in self._playlist_urls:
1351                 self.to_screen(
1352                     '[download] Skipping already downloaded playlist: %s'
1353                     % ie_result.get('title') or ie_result.get('id'))
1354                 return
1355
1356             self._playlist_level += 1
1357             self._playlist_urls.add(webpage_url)
1358             self._sanitize_thumbnails(ie_result)
1359             try:
1360                 return self.__process_playlist(ie_result, download)
1361             finally:
1362                 self._playlist_level -= 1
1363                 if not self._playlist_level:
1364                     self._playlist_urls.clear()
1365         elif result_type == 'compat_list':
1366             self.report_warning(
1367                 'Extractor %s returned a compat_list result. '
1368                 'It needs to be updated.' % ie_result.get('extractor'))
1369
1370             def _fixup(r):
1371                 self.add_extra_info(r, {
1372                     'extractor': ie_result['extractor'],
1373                     'webpage_url': ie_result['webpage_url'],
1374                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1375                     'extractor_key': ie_result['extractor_key'],
1376                 })
1377                 return r
1378             ie_result['entries'] = [
1379                 self.process_ie_result(_fixup(r), download, extra_info)
1380                 for r in ie_result['entries']
1381             ]
1382             return ie_result
1383         else:
1384             raise Exception('Invalid result type: %s' % result_type)
1385
1386     def _ensure_dir_exists(self, path):
1387         return make_dir(path, self.report_error)
1388
1389     def __process_playlist(self, ie_result, download):
1390         # We process each entry in the playlist
1391         playlist = ie_result.get('title') or ie_result.get('id')
1392         self.to_screen('[download] Downloading playlist: %s' % playlist)
1393
1394         if 'entries' not in ie_result:
1395             raise EntryNotInPlaylist()
1396         incomplete_entries = bool(ie_result.get('requested_entries'))
1397         if incomplete_entries:
1398             def fill_missing_entries(entries, indexes):
1399                 ret = [None] * max(*indexes)
1400                 for i, entry in zip(indexes, entries):
1401                     ret[i - 1] = entry
1402                 return ret
1403             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1404
1405         playlist_results = []
1406
1407         playliststart = self.params.get('playliststart', 1)
1408         playlistend = self.params.get('playlistend')
1409         # For backwards compatibility, interpret -1 as whole list
1410         if playlistend == -1:
1411             playlistend = None
1412
1413         playlistitems_str = self.params.get('playlist_items')
1414         playlistitems = None
1415         if playlistitems_str is not None:
1416             def iter_playlistitems(format):
1417                 for string_segment in format.split(','):
1418                     if '-' in string_segment:
1419                         start, end = string_segment.split('-')
1420                         for item in range(int(start), int(end) + 1):
1421                             yield int(item)
1422                     else:
1423                         yield int(string_segment)
1424             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1425
1426         ie_entries = ie_result['entries']
1427         msg = (
1428             'Downloading %d videos' if not isinstance(ie_entries, list)
1429             else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
1430         if not isinstance(ie_entries, (list, PagedList)):
1431             ie_entries = LazyList(ie_entries)
1432
1433         def get_entry(i):
1434             return YoutubeDL.__handle_extraction_exceptions(
1435                 lambda self, i: ie_entries[i - 1],
1436                 False
1437             )(self, i)
1438
1439         entries = []
1440         for i in playlistitems or itertools.count(playliststart):
1441             if playlistitems is None and playlistend is not None and playlistend < i:
1442                 break
1443             entry = None
1444             try:
1445                 entry = get_entry(i)
1446                 if entry is None:
1447                     raise EntryNotInPlaylist()
1448             except (IndexError, EntryNotInPlaylist):
1449                 if incomplete_entries:
1450                     raise EntryNotInPlaylist()
1451                 elif not playlistitems:
1452                     break
1453             entries.append(entry)
1454             try:
1455                 if entry is not None:
1456                     self._match_entry(entry, incomplete=True, silent=True)
1457             except (ExistingVideoReached, RejectedVideoReached):
1458                 break
1459         ie_result['entries'] = entries
1460
1461         # Save playlist_index before re-ordering
1462         entries = [
1463             ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
1464             for i, entry in enumerate(entries, 1)
1465             if entry is not None]
1466         n_entries = len(entries)
1467
1468         if not playlistitems and (playliststart or playlistend):
1469             playlistitems = list(range(playliststart, playliststart + n_entries))
1470         ie_result['requested_entries'] = playlistitems
1471
1472         if self.params.get('allow_playlist_files', True):
1473             ie_copy = {
1474                 'playlist': playlist,
1475                 'playlist_id': ie_result.get('id'),
1476                 'playlist_title': ie_result.get('title'),
1477                 'playlist_uploader': ie_result.get('uploader'),
1478                 'playlist_uploader_id': ie_result.get('uploader_id'),
1479                 'playlist_index': 0,
1480             }
1481             ie_copy.update(dict(ie_result))
1482
1483             if self.params.get('writeinfojson', False):
1484                 infofn = self.prepare_filename(ie_copy, 'pl_infojson')
1485                 if not self._ensure_dir_exists(encodeFilename(infofn)):
1486                     return
1487                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
1488                     self.to_screen('[info] Playlist metadata is already present')
1489                 else:
1490                     self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
1491                     try:
1492                         write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
1493                     except (OSError, IOError):
1494                         self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
1495
1496             # TODO: This should be passed to ThumbnailsConvertor if necessary
1497             self._write_thumbnails(ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1498
1499             if self.params.get('writedescription', False):
1500                 descfn = self.prepare_filename(ie_copy, 'pl_description')
1501                 if not self._ensure_dir_exists(encodeFilename(descfn)):
1502                     return
1503                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
1504                     self.to_screen('[info] Playlist description is already present')
1505                 elif ie_result.get('description') is None:
1506                     self.report_warning('There\'s no playlist description to write.')
1507                 else:
1508                     try:
1509                         self.to_screen('[info] Writing playlist description to: ' + descfn)
1510                         with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1511                             descfile.write(ie_result['description'])
1512                     except (OSError, IOError):
1513                         self.report_error('Cannot write playlist description file ' + descfn)
1514                         return
1515
1516         if self.params.get('playlistreverse', False):
1517             entries = entries[::-1]
1518         if self.params.get('playlistrandom', False):
1519             random.shuffle(entries)
1520
1521         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1522
1523         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1524         failures = 0
1525         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1526         for i, entry_tuple in enumerate(entries, 1):
1527             playlist_index, entry = entry_tuple
1528             if 'playlist-index' in self.params.get('compat_options', []):
1529                 playlist_index = playlistitems[i - 1] if playlistitems else i
1530             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1531             # This __x_forwarded_for_ip thing is a bit ugly but requires
1532             # minimal changes
1533             if x_forwarded_for:
1534                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1535             extra = {
1536                 'n_entries': n_entries,
1537                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1538                 'playlist_index': playlist_index,
1539                 'playlist_autonumber': i,
1540                 'playlist': playlist,
1541                 'playlist_id': ie_result.get('id'),
1542                 'playlist_title': ie_result.get('title'),
1543                 'playlist_uploader': ie_result.get('uploader'),
1544                 'playlist_uploader_id': ie_result.get('uploader_id'),
1545                 'extractor': ie_result['extractor'],
1546                 'webpage_url': ie_result['webpage_url'],
1547                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1548                 'extractor_key': ie_result['extractor_key'],
1549             }
1550
1551             if self._match_entry(entry, incomplete=True) is not None:
1552                 continue
1553
1554             entry_result = self.__process_iterable_entry(entry, download, extra)
1555             if not entry_result:
1556                 failures += 1
1557             if failures >= max_failures:
1558                 self.report_error(
1559                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1560                 break
1561             # TODO: skip failed (empty) entries?
1562             playlist_results.append(entry_result)
1563         ie_result['entries'] = playlist_results
1564         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1565         return ie_result
1566
1567     @__handle_extraction_exceptions
1568     def __process_iterable_entry(self, entry, download, extra_info):
1569         return self.process_ie_result(
1570             entry, download=download, extra_info=extra_info)
1571
1572     def _build_format_filter(self, filter_spec):
1573         " Returns a function to filter the formats according to the filter_spec "
1574
1575         OPERATORS = {
1576             '<': operator.lt,
1577             '<=': operator.le,
1578             '>': operator.gt,
1579             '>=': operator.ge,
1580             '=': operator.eq,
1581             '!=': operator.ne,
1582         }
1583         operator_rex = re.compile(r'''(?x)\s*
1584             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1585             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1586             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1587             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1588         m = operator_rex.fullmatch(filter_spec)
1589         if m:
1590             try:
1591                 comparison_value = int(m.group('value'))
1592             except ValueError:
1593                 comparison_value = parse_filesize(m.group('value'))
1594                 if comparison_value is None:
1595                     comparison_value = parse_filesize(m.group('value') + 'B')
1596                 if comparison_value is None:
1597                     raise ValueError(
1598                         'Invalid value %r in format specification %r' % (
1599                             m.group('value'), filter_spec))
1600             op = OPERATORS[m.group('op')]
1601
1602         if not m:
1603             STR_OPERATORS = {
1604                 '=': operator.eq,
1605                 '^=': lambda attr, value: attr.startswith(value),
1606                 '$=': lambda attr, value: attr.endswith(value),
1607                 '*=': lambda attr, value: value in attr,
1608             }
1609             str_operator_rex = re.compile(r'''(?x)\s*
1610                 (?P<key>[a-zA-Z0-9._-]+)\s*
1611                 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1612                 (?P<value>[a-zA-Z0-9._-]+)\s*
1613                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1614             m = str_operator_rex.fullmatch(filter_spec)
1615             if m:
1616                 comparison_value = m.group('value')
1617                 str_op = STR_OPERATORS[m.group('op')]
1618                 if m.group('negation'):
1619                     op = lambda attr, value: not str_op(attr, value)
1620                 else:
1621                     op = str_op
1622
1623         if not m:
1624             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1625
1626         def _filter(f):
1627             actual_value = f.get(m.group('key'))
1628             if actual_value is None:
1629                 return m.group('none_inclusive')
1630             return op(actual_value, comparison_value)
1631         return _filter
1632
1633     def _default_format_spec(self, info_dict, download=True):
1634
1635         def can_merge():
1636             merger = FFmpegMergerPP(self)
1637             return merger.available and merger.can_merge()
1638
1639         prefer_best = (
1640             not self.params.get('simulate')
1641             and download
1642             and (
1643                 not can_merge()
1644                 or info_dict.get('is_live', False)
1645                 or self.outtmpl_dict['default'] == '-'))
1646         compat = (
1647             prefer_best
1648             or self.params.get('allow_multiple_audio_streams', False)
1649             or 'format-spec' in self.params.get('compat_opts', []))
1650
1651         return (
1652             'best/bestvideo+bestaudio' if prefer_best
1653             else 'bestvideo*+bestaudio/best' if not compat
1654             else 'bestvideo+bestaudio/best')
1655
1656     def build_format_selector(self, format_spec):
1657         def syntax_error(note, start):
1658             message = (
1659                 'Invalid format specification: '
1660                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1661             return SyntaxError(message)
1662
1663         PICKFIRST = 'PICKFIRST'
1664         MERGE = 'MERGE'
1665         SINGLE = 'SINGLE'
1666         GROUP = 'GROUP'
1667         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1668
1669         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1670                                   'video': self.params.get('allow_multiple_video_streams', False)}
1671
1672         check_formats = self.params.get('check_formats')
1673
1674         def _parse_filter(tokens):
1675             filter_parts = []
1676             for type, string, start, _, _ in tokens:
1677                 if type == tokenize.OP and string == ']':
1678                     return ''.join(filter_parts)
1679                 else:
1680                     filter_parts.append(string)
1681
1682         def _remove_unused_ops(tokens):
1683             # Remove operators that we don't use and join them with the surrounding strings
1684             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1685             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1686             last_string, last_start, last_end, last_line = None, None, None, None
1687             for type, string, start, end, line in tokens:
1688                 if type == tokenize.OP and string == '[':
1689                     if last_string:
1690                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1691                         last_string = None
1692                     yield type, string, start, end, line
1693                     # everything inside brackets will be handled by _parse_filter
1694                     for type, string, start, end, line in tokens:
1695                         yield type, string, start, end, line
1696                         if type == tokenize.OP and string == ']':
1697                             break
1698                 elif type == tokenize.OP and string in ALLOWED_OPS:
1699                     if last_string:
1700                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1701                         last_string = None
1702                     yield type, string, start, end, line
1703                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1704                     if not last_string:
1705                         last_string = string
1706                         last_start = start
1707                         last_end = end
1708                     else:
1709                         last_string += string
1710             if last_string:
1711                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1712
1713         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1714             selectors = []
1715             current_selector = None
1716             for type, string, start, _, _ in tokens:
1717                 # ENCODING is only defined in python 3.x
1718                 if type == getattr(tokenize, 'ENCODING', None):
1719                     continue
1720                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1721                     current_selector = FormatSelector(SINGLE, string, [])
1722                 elif type == tokenize.OP:
1723                     if string == ')':
1724                         if not inside_group:
1725                             # ')' will be handled by the parentheses group
1726                             tokens.restore_last_token()
1727                         break
1728                     elif inside_merge and string in ['/', ',']:
1729                         tokens.restore_last_token()
1730                         break
1731                     elif inside_choice and string == ',':
1732                         tokens.restore_last_token()
1733                         break
1734                     elif string == ',':
1735                         if not current_selector:
1736                             raise syntax_error('"," must follow a format selector', start)
1737                         selectors.append(current_selector)
1738                         current_selector = None
1739                     elif string == '/':
1740                         if not current_selector:
1741                             raise syntax_error('"/" must follow a format selector', start)
1742                         first_choice = current_selector
1743                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1744                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1745                     elif string == '[':
1746                         if not current_selector:
1747                             current_selector = FormatSelector(SINGLE, 'best', [])
1748                         format_filter = _parse_filter(tokens)
1749                         current_selector.filters.append(format_filter)
1750                     elif string == '(':
1751                         if current_selector:
1752                             raise syntax_error('Unexpected "("', start)
1753                         group = _parse_format_selection(tokens, inside_group=True)
1754                         current_selector = FormatSelector(GROUP, group, [])
1755                     elif string == '+':
1756                         if not current_selector:
1757                             raise syntax_error('Unexpected "+"', start)
1758                         selector_1 = current_selector
1759                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1760                         if not selector_2:
1761                             raise syntax_error('Expected a selector', start)
1762                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1763                     else:
1764                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1765                 elif type == tokenize.ENDMARKER:
1766                     break
1767             if current_selector:
1768                 selectors.append(current_selector)
1769             return selectors
1770
1771         def _merge(formats_pair):
1772             format_1, format_2 = formats_pair
1773
1774             formats_info = []
1775             formats_info.extend(format_1.get('requested_formats', (format_1,)))
1776             formats_info.extend(format_2.get('requested_formats', (format_2,)))
1777
1778             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1779                 get_no_more = {'video': False, 'audio': False}
1780                 for (i, fmt_info) in enumerate(formats_info):
1781                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
1782                         formats_info.pop(i)
1783                         continue
1784                     for aud_vid in ['audio', 'video']:
1785                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1786                             if get_no_more[aud_vid]:
1787                                 formats_info.pop(i)
1788                                 break
1789                             get_no_more[aud_vid] = True
1790
1791             if len(formats_info) == 1:
1792                 return formats_info[0]
1793
1794             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1795             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1796
1797             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1798             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1799
1800             output_ext = self.params.get('merge_output_format')
1801             if not output_ext:
1802                 if the_only_video:
1803                     output_ext = the_only_video['ext']
1804                 elif the_only_audio and not video_fmts:
1805                     output_ext = the_only_audio['ext']
1806                 else:
1807                     output_ext = 'mkv'
1808
1809             new_dict = {
1810                 'requested_formats': formats_info,
1811                 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1812                 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1813                 'ext': output_ext,
1814             }
1815
1816             if the_only_video:
1817                 new_dict.update({
1818                     'width': the_only_video.get('width'),
1819                     'height': the_only_video.get('height'),
1820                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1821                     'fps': the_only_video.get('fps'),
1822                     'vcodec': the_only_video.get('vcodec'),
1823                     'vbr': the_only_video.get('vbr'),
1824                     'stretched_ratio': the_only_video.get('stretched_ratio'),
1825                 })
1826
1827             if the_only_audio:
1828                 new_dict.update({
1829                     'acodec': the_only_audio.get('acodec'),
1830                     'abr': the_only_audio.get('abr'),
1831                 })
1832
1833             return new_dict
1834
1835         def _check_formats(formats):
1836             if not check_formats:
1837                 yield from formats
1838                 return
1839             for f in formats:
1840                 self.to_screen('[info] Testing format %s' % f['format_id'])
1841                 temp_file = tempfile.NamedTemporaryFile(
1842                     suffix='.tmp', delete=False,
1843                     dir=self.get_output_path('temp') or None)
1844                 temp_file.close()
1845                 try:
1846                     success, _ = self.dl(temp_file.name, f, test=True)
1847                 except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1848                     success = False
1849                 finally:
1850                     if os.path.exists(temp_file.name):
1851                         try:
1852                             os.remove(temp_file.name)
1853                         except OSError:
1854                             self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1855                 if success:
1856                     yield f
1857                 else:
1858                     self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1859
1860         def _build_selector_function(selector):
1861             if isinstance(selector, list):  # ,
1862                 fs = [_build_selector_function(s) for s in selector]
1863
1864                 def selector_function(ctx):
1865                     for f in fs:
1866                         yield from f(ctx)
1867                 return selector_function
1868
1869             elif selector.type == GROUP:  # ()
1870                 selector_function = _build_selector_function(selector.selector)
1871
1872             elif selector.type == PICKFIRST:  # /
1873                 fs = [_build_selector_function(s) for s in selector.selector]
1874
1875                 def selector_function(ctx):
1876                     for f in fs:
1877                         picked_formats = list(f(ctx))
1878                         if picked_formats:
1879                             return picked_formats
1880                     return []
1881
1882             elif selector.type == MERGE:  # +
1883                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1884
1885                 def selector_function(ctx):
1886                     for pair in itertools.product(
1887                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1888                         yield _merge(pair)
1889
1890             elif selector.type == SINGLE:  # atom
1891                 format_spec = selector.selector or 'best'
1892
1893                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
1894                 if format_spec == 'all':
1895                     def selector_function(ctx):
1896                         yield from _check_formats(ctx['formats'])
1897                 elif format_spec == 'mergeall':
1898                     def selector_function(ctx):
1899                         formats = list(_check_formats(ctx['formats']))
1900                         if not formats:
1901                             return
1902                         merged_format = formats[-1]
1903                         for f in formats[-2::-1]:
1904                             merged_format = _merge((merged_format, f))
1905                         yield merged_format
1906
1907                 else:
1908                     format_fallback, format_reverse, format_idx = False, True, 1
1909                     mobj = re.match(
1910                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
1911                         format_spec)
1912                     if mobj is not None:
1913                         format_idx = int_or_none(mobj.group('n'), default=1)
1914                         format_reverse = mobj.group('bw')[0] == 'b'
1915                         format_type = (mobj.group('type') or [None])[0]
1916                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
1917                         format_modified = mobj.group('mod') is not None
1918
1919                         format_fallback = not format_type and not format_modified  # for b, w
1920                         _filter_f = (
1921                             (lambda f: f.get('%scodec' % format_type) != 'none')
1922                             if format_type and format_modified  # bv*, ba*, wv*, wa*
1923                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
1924                             if format_type  # bv, ba, wv, wa
1925                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1926                             if not format_modified  # b, w
1927                             else lambda f: True)  # b*, w*
1928                         filter_f = lambda f: _filter_f(f) and (
1929                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
1930                     else:
1931                         filter_f = ((lambda f: f.get('ext') == format_spec)
1932                                     if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']  # extension
1933                                     else (lambda f: f.get('format_id') == format_spec))  # id
1934
1935                     def selector_function(ctx):
1936                         formats = list(ctx['formats'])
1937                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
1938                         if format_fallback and ctx['incomplete_formats'] and not matches:
1939                             # for extractors with incomplete formats (audio only (soundcloud)
1940                             # or video only (imgur)) best/worst will fallback to
1941                             # best/worst {video,audio}-only format
1942                             matches = formats
1943                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
1944                         try:
1945                             yield matches[format_idx - 1]
1946                         except IndexError:
1947                             return
1948
1949             filters = [self._build_format_filter(f) for f in selector.filters]
1950
1951             def final_selector(ctx):
1952                 ctx_copy = copy.deepcopy(ctx)
1953                 for _filter in filters:
1954                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1955                 return selector_function(ctx_copy)
1956             return final_selector
1957
1958         stream = io.BytesIO(format_spec.encode('utf-8'))
1959         try:
1960             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1961         except tokenize.TokenError:
1962             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1963
1964         class TokenIterator(object):
1965             def __init__(self, tokens):
1966                 self.tokens = tokens
1967                 self.counter = 0
1968
1969             def __iter__(self):
1970                 return self
1971
1972             def __next__(self):
1973                 if self.counter >= len(self.tokens):
1974                     raise StopIteration()
1975                 value = self.tokens[self.counter]
1976                 self.counter += 1
1977                 return value
1978
1979             next = __next__
1980
1981             def restore_last_token(self):
1982                 self.counter -= 1
1983
1984         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1985         return _build_selector_function(parsed_selector)
1986
1987     def _calc_headers(self, info_dict):
1988         res = std_headers.copy()
1989
1990         add_headers = info_dict.get('http_headers')
1991         if add_headers:
1992             res.update(add_headers)
1993
1994         cookies = self._calc_cookies(info_dict)
1995         if cookies:
1996             res['Cookie'] = cookies
1997
1998         if 'X-Forwarded-For' not in res:
1999             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2000             if x_forwarded_for_ip:
2001                 res['X-Forwarded-For'] = x_forwarded_for_ip
2002
2003         return res
2004
2005     def _calc_cookies(self, info_dict):
2006         pr = sanitized_Request(info_dict['url'])
2007         self.cookiejar.add_cookie_header(pr)
2008         return pr.get_header('Cookie')
2009
2010     def _sanitize_thumbnails(self, info_dict):
2011         thumbnails = info_dict.get('thumbnails')
2012         if thumbnails is None:
2013             thumbnail = info_dict.get('thumbnail')
2014             if thumbnail:
2015                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2016         if thumbnails:
2017             thumbnails.sort(key=lambda t: (
2018                 t.get('preference') if t.get('preference') is not None else -1,
2019                 t.get('width') if t.get('width') is not None else -1,
2020                 t.get('height') if t.get('height') is not None else -1,
2021                 t.get('id') if t.get('id') is not None else '',
2022                 t.get('url')))
2023
2024             def thumbnail_tester():
2025                 if self.params.get('check_formats'):
2026                     test_all = True
2027                     to_screen = lambda msg: self.to_screen(f'[info] {msg}')
2028                 else:
2029                     test_all = False
2030                     to_screen = self.write_debug
2031
2032                 def test_thumbnail(t):
2033                     if not test_all and not t.get('_test_url'):
2034                         return True
2035                     to_screen('Testing thumbnail %s' % t['id'])
2036                     try:
2037                         self.urlopen(HEADRequest(t['url']))
2038                     except network_exceptions as err:
2039                         to_screen('Unable to connect to thumbnail %s URL "%s" - %s. Skipping...' % (
2040                             t['id'], t['url'], error_to_compat_str(err)))
2041                         return False
2042                     return True
2043
2044                 return test_thumbnail
2045
2046             for i, t in enumerate(thumbnails):
2047                 if t.get('id') is None:
2048                     t['id'] = '%d' % i
2049                 if t.get('width') and t.get('height'):
2050                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
2051                 t['url'] = sanitize_url(t['url'])
2052
2053             if self.params.get('check_formats') is not False:
2054                 info_dict['thumbnails'] = LazyList(filter(thumbnail_tester(), thumbnails[::-1])).reverse()
2055             else:
2056                 info_dict['thumbnails'] = thumbnails
2057
2058     def process_video_result(self, info_dict, download=True):
2059         assert info_dict.get('_type', 'video') == 'video'
2060
2061         if 'id' not in info_dict:
2062             raise ExtractorError('Missing "id" field in extractor result')
2063         if 'title' not in info_dict:
2064             raise ExtractorError('Missing "title" field in extractor result',
2065                                  video_id=info_dict['id'], ie=info_dict['extractor'])
2066
2067         def report_force_conversion(field, field_not, conversion):
2068             self.report_warning(
2069                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2070                 % (field, field_not, conversion))
2071
2072         def sanitize_string_field(info, string_field):
2073             field = info.get(string_field)
2074             if field is None or isinstance(field, compat_str):
2075                 return
2076             report_force_conversion(string_field, 'a string', 'string')
2077             info[string_field] = compat_str(field)
2078
2079         def sanitize_numeric_fields(info):
2080             for numeric_field in self._NUMERIC_FIELDS:
2081                 field = info.get(numeric_field)
2082                 if field is None or isinstance(field, compat_numeric_types):
2083                     continue
2084                 report_force_conversion(numeric_field, 'numeric', 'int')
2085                 info[numeric_field] = int_or_none(field)
2086
2087         sanitize_string_field(info_dict, 'id')
2088         sanitize_numeric_fields(info_dict)
2089
2090         if 'playlist' not in info_dict:
2091             # It isn't part of a playlist
2092             info_dict['playlist'] = None
2093             info_dict['playlist_index'] = None
2094
2095         self._sanitize_thumbnails(info_dict)
2096
2097         thumbnail = info_dict.get('thumbnail')
2098         thumbnails = info_dict.get('thumbnails')
2099         if thumbnail:
2100             info_dict['thumbnail'] = sanitize_url(thumbnail)
2101         elif thumbnails:
2102             info_dict['thumbnail'] = thumbnails[-1]['url']
2103
2104         if info_dict.get('display_id') is None and 'id' in info_dict:
2105             info_dict['display_id'] = info_dict['id']
2106
2107         for ts_key, date_key in (
2108                 ('timestamp', 'upload_date'),
2109                 ('release_timestamp', 'release_date'),
2110         ):
2111             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2112                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2113                 # see http://bugs.python.org/issue1646728)
2114                 try:
2115                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2116                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2117                 except (ValueError, OverflowError, OSError):
2118                     pass
2119
2120         live_keys = ('is_live', 'was_live')
2121         live_status = info_dict.get('live_status')
2122         if live_status is None:
2123             for key in live_keys:
2124                 if info_dict.get(key) is False:
2125                     continue
2126                 if info_dict.get(key):
2127                     live_status = key
2128                 break
2129             if all(info_dict.get(key) is False for key in live_keys):
2130                 live_status = 'not_live'
2131         if live_status:
2132             info_dict['live_status'] = live_status
2133             for key in live_keys:
2134                 if info_dict.get(key) is None:
2135                     info_dict[key] = (live_status == key)
2136
2137         # Auto generate title fields corresponding to the *_number fields when missing
2138         # in order to always have clean titles. This is very common for TV series.
2139         for field in ('chapter', 'season', 'episode'):
2140             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2141                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2142
2143         for cc_kind in ('subtitles', 'automatic_captions'):
2144             cc = info_dict.get(cc_kind)
2145             if cc:
2146                 for _, subtitle in cc.items():
2147                     for subtitle_format in subtitle:
2148                         if subtitle_format.get('url'):
2149                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2150                         if subtitle_format.get('ext') is None:
2151                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2152
2153         automatic_captions = info_dict.get('automatic_captions')
2154         subtitles = info_dict.get('subtitles')
2155
2156         info_dict['requested_subtitles'] = self.process_subtitles(
2157             info_dict['id'], subtitles, automatic_captions)
2158
2159         # We now pick which formats have to be downloaded
2160         if info_dict.get('formats') is None:
2161             # There's only one format available
2162             formats = [info_dict]
2163         else:
2164             formats = info_dict['formats']
2165
2166         if not self.params.get('allow_unplayable_formats'):
2167             formats = [f for f in formats if not f.get('has_drm')]
2168         info_dict['__has_drm'] = len(info_dict.get('formats') or ['']) > len(formats)
2169
2170         if not formats:
2171             self.raise_no_formats(info_dict)
2172
2173         def is_wellformed(f):
2174             url = f.get('url')
2175             if not url:
2176                 self.report_warning(
2177                     '"url" field is missing or empty - skipping format, '
2178                     'there is an error in extractor')
2179                 return False
2180             if isinstance(url, bytes):
2181                 sanitize_string_field(f, 'url')
2182             return True
2183
2184         # Filter out malformed formats for better extraction robustness
2185         formats = list(filter(is_wellformed, formats))
2186
2187         formats_dict = {}
2188
2189         # We check that all the formats have the format and format_id fields
2190         for i, format in enumerate(formats):
2191             sanitize_string_field(format, 'format_id')
2192             sanitize_numeric_fields(format)
2193             format['url'] = sanitize_url(format['url'])
2194             if not format.get('format_id'):
2195                 format['format_id'] = compat_str(i)
2196             else:
2197                 # Sanitize format_id from characters used in format selector expression
2198                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2199             format_id = format['format_id']
2200             if format_id not in formats_dict:
2201                 formats_dict[format_id] = []
2202             formats_dict[format_id].append(format)
2203
2204         # Make sure all formats have unique format_id
2205         for format_id, ambiguous_formats in formats_dict.items():
2206             if len(ambiguous_formats) > 1:
2207                 for i, format in enumerate(ambiguous_formats):
2208                     format['format_id'] = '%s-%d' % (format_id, i)
2209
2210         for i, format in enumerate(formats):
2211             if format.get('format') is None:
2212                 format['format'] = '{id} - {res}{note}'.format(
2213                     id=format['format_id'],
2214                     res=self.format_resolution(format),
2215                     note=format_field(format, 'format_note', ' (%s)'),
2216                 )
2217             # Automatically determine file extension if missing
2218             if format.get('ext') is None:
2219                 format['ext'] = determine_ext(format['url']).lower()
2220             # Automatically determine protocol if missing (useful for format
2221             # selection purposes)
2222             if format.get('protocol') is None:
2223                 format['protocol'] = determine_protocol(format)
2224             # Add HTTP headers, so that external programs can use them from the
2225             # json output
2226             full_format_info = info_dict.copy()
2227             full_format_info.update(format)
2228             format['http_headers'] = self._calc_headers(full_format_info)
2229         # Remove private housekeeping stuff
2230         if '__x_forwarded_for_ip' in info_dict:
2231             del info_dict['__x_forwarded_for_ip']
2232
2233         # TODO Central sorting goes here
2234
2235         if not formats or formats[0] is not info_dict:
2236             # only set the 'formats' fields if the original info_dict list them
2237             # otherwise we end up with a circular reference, the first (and unique)
2238             # element in the 'formats' field in info_dict is info_dict itself,
2239             # which can't be exported to json
2240             info_dict['formats'] = formats
2241
2242         info_dict, _ = self.pre_process(info_dict)
2243
2244         if self.params.get('list_thumbnails'):
2245             self.list_thumbnails(info_dict)
2246         if self.params.get('listformats'):
2247             if not info_dict.get('formats') and not info_dict.get('url'):
2248                 self.to_screen('%s has no formats' % info_dict['id'])
2249             else:
2250                 self.list_formats(info_dict)
2251         if self.params.get('listsubtitles'):
2252             if 'automatic_captions' in info_dict:
2253                 self.list_subtitles(
2254                     info_dict['id'], automatic_captions, 'automatic captions')
2255             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2256         list_only = self.params.get('simulate') is None and (
2257             self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2258         if list_only:
2259             # Without this printing, -F --print-json will not work
2260             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2261             return
2262
2263         format_selector = self.format_selector
2264         if format_selector is None:
2265             req_format = self._default_format_spec(info_dict, download=download)
2266             self.write_debug('Default format spec: %s' % req_format)
2267             format_selector = self.build_format_selector(req_format)
2268
2269         # While in format selection we may need to have an access to the original
2270         # format set in order to calculate some metrics or do some processing.
2271         # For now we need to be able to guess whether original formats provided
2272         # by extractor are incomplete or not (i.e. whether extractor provides only
2273         # video-only or audio-only formats) for proper formats selection for
2274         # extractors with such incomplete formats (see
2275         # https://github.com/ytdl-org/youtube-dl/pull/5556).
2276         # Since formats may be filtered during format selection and may not match
2277         # the original formats the results may be incorrect. Thus original formats
2278         # or pre-calculated metrics should be passed to format selection routines
2279         # as well.
2280         # We will pass a context object containing all necessary additional data
2281         # instead of just formats.
2282         # This fixes incorrect format selection issue (see
2283         # https://github.com/ytdl-org/youtube-dl/issues/10083).
2284         incomplete_formats = (
2285             # All formats are video-only or
2286             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2287             # all formats are audio-only
2288             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2289
2290         ctx = {
2291             'formats': formats,
2292             'incomplete_formats': incomplete_formats,
2293         }
2294
2295         formats_to_download = list(format_selector(ctx))
2296         if not formats_to_download:
2297             if not self.params.get('ignore_no_formats_error'):
2298                 raise ExtractorError('Requested format is not available', expected=True,
2299                                      video_id=info_dict['id'], ie=info_dict['extractor'])
2300             else:
2301                 self.report_warning('Requested format is not available')
2302                 # Process what we can, even without any available formats.
2303                 self.process_info(dict(info_dict))
2304         elif download:
2305             self.to_screen(
2306                 '[info] %s: Downloading %d format(s): %s' % (
2307                     info_dict['id'], len(formats_to_download),
2308                     ", ".join([f['format_id'] for f in formats_to_download])))
2309             for fmt in formats_to_download:
2310                 new_info = dict(info_dict)
2311                 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2312                 new_info['__original_infodict'] = info_dict
2313                 new_info.update(fmt)
2314                 self.process_info(new_info)
2315         # We update the info dict with the best quality format (backwards compatibility)
2316         if formats_to_download:
2317             info_dict.update(formats_to_download[-1])
2318         return info_dict
2319
2320     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2321         """Select the requested subtitles and their format"""
2322         available_subs = {}
2323         if normal_subtitles and self.params.get('writesubtitles'):
2324             available_subs.update(normal_subtitles)
2325         if automatic_captions and self.params.get('writeautomaticsub'):
2326             for lang, cap_info in automatic_captions.items():
2327                 if lang not in available_subs:
2328                     available_subs[lang] = cap_info
2329
2330         if (not self.params.get('writesubtitles') and not
2331                 self.params.get('writeautomaticsub') or not
2332                 available_subs):
2333             return None
2334
2335         all_sub_langs = available_subs.keys()
2336         if self.params.get('allsubtitles', False):
2337             requested_langs = all_sub_langs
2338         elif self.params.get('subtitleslangs', False):
2339             requested_langs = set()
2340             for lang in self.params.get('subtitleslangs'):
2341                 if lang == 'all':
2342                     requested_langs.update(all_sub_langs)
2343                     continue
2344                 discard = lang[0] == '-'
2345                 if discard:
2346                     lang = lang[1:]
2347                 current_langs = filter(re.compile(lang + '$').match, all_sub_langs)
2348                 if discard:
2349                     for lang in current_langs:
2350                         requested_langs.discard(lang)
2351                 else:
2352                     requested_langs.update(current_langs)
2353         elif 'en' in available_subs:
2354             requested_langs = ['en']
2355         else:
2356             requested_langs = [list(all_sub_langs)[0]]
2357         if requested_langs:
2358             self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2359
2360         formats_query = self.params.get('subtitlesformat', 'best')
2361         formats_preference = formats_query.split('/') if formats_query else []
2362         subs = {}
2363         for lang in requested_langs:
2364             formats = available_subs.get(lang)
2365             if formats is None:
2366                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2367                 continue
2368             for ext in formats_preference:
2369                 if ext == 'best':
2370                     f = formats[-1]
2371                     break
2372                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2373                 if matches:
2374                     f = matches[-1]
2375                     break
2376             else:
2377                 f = formats[-1]
2378                 self.report_warning(
2379                     'No subtitle format found matching "%s" for language %s, '
2380                     'using %s' % (formats_query, lang, f['ext']))
2381             subs[lang] = f
2382         return subs
2383
2384     def __forced_printings(self, info_dict, filename, incomplete):
2385         def print_mandatory(field, actual_field=None):
2386             if actual_field is None:
2387                 actual_field = field
2388             if (self.params.get('force%s' % field, False)
2389                     and (not incomplete or info_dict.get(actual_field) is not None)):
2390                 self.to_stdout(info_dict[actual_field])
2391
2392         def print_optional(field):
2393             if (self.params.get('force%s' % field, False)
2394                     and info_dict.get(field) is not None):
2395                 self.to_stdout(info_dict[field])
2396
2397         info_dict = info_dict.copy()
2398         if filename is not None:
2399             info_dict['filename'] = filename
2400         if info_dict.get('requested_formats') is not None:
2401             # For RTMP URLs, also include the playpath
2402             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2403         elif 'url' in info_dict:
2404             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2405
2406         if self.params.get('forceprint') or self.params.get('forcejson'):
2407             self.post_extract(info_dict)
2408         for tmpl in self.params.get('forceprint', []):
2409             if re.match(r'\w+$', tmpl):
2410                 tmpl = '%({})s'.format(tmpl)
2411             tmpl, info_copy = self.prepare_outtmpl(tmpl, info_dict)
2412             self.to_stdout(self.escape_outtmpl(tmpl) % info_copy)
2413
2414         print_mandatory('title')
2415         print_mandatory('id')
2416         print_mandatory('url', 'urls')
2417         print_optional('thumbnail')
2418         print_optional('description')
2419         print_optional('filename')
2420         if self.params.get('forceduration') and info_dict.get('duration') is not None:
2421             self.to_stdout(formatSeconds(info_dict['duration']))
2422         print_mandatory('format')
2423
2424         if self.params.get('forcejson'):
2425             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2426
2427     def dl(self, name, info, subtitle=False, test=False):
2428         if not info.get('url'):
2429             self.raise_no_formats(info, True)
2430
2431         if test:
2432             verbose = self.params.get('verbose')
2433             params = {
2434                 'test': True,
2435                 'quiet': not verbose,
2436                 'verbose': verbose,
2437                 'noprogress': not verbose,
2438                 'nopart': True,
2439                 'skip_unavailable_fragments': False,
2440                 'keep_fragments': False,
2441                 'overwrites': True,
2442                 '_no_ytdl_file': True,
2443             }
2444         else:
2445             params = self.params
2446         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2447         if not test:
2448             for ph in self._progress_hooks:
2449                 fd.add_progress_hook(ph)
2450             urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2451             self.write_debug('Invoking downloader on "%s"' % urls)
2452         new_info = dict(info)
2453         if new_info.get('http_headers') is None:
2454             new_info['http_headers'] = self._calc_headers(new_info)
2455         return fd.download(name, new_info, subtitle)
2456
2457     def process_info(self, info_dict):
2458         """Process a single resolved IE result."""
2459
2460         assert info_dict.get('_type', 'video') == 'video'
2461
2462         max_downloads = self.params.get('max_downloads')
2463         if max_downloads is not None:
2464             if self._num_downloads >= int(max_downloads):
2465                 raise MaxDownloadsReached()
2466
2467         # TODO: backward compatibility, to be removed
2468         info_dict['fulltitle'] = info_dict['title']
2469
2470         if 'format' not in info_dict and 'ext' in info_dict:
2471             info_dict['format'] = info_dict['ext']
2472
2473         if self._match_entry(info_dict) is not None:
2474             return
2475
2476         self.post_extract(info_dict)
2477         self._num_downloads += 1
2478
2479         # info_dict['_filename'] needs to be set for backward compatibility
2480         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2481         temp_filename = self.prepare_filename(info_dict, 'temp')
2482         files_to_move = {}
2483
2484         # Forced printings
2485         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2486
2487         if self.params.get('simulate'):
2488             if self.params.get('force_write_download_archive', False):
2489                 self.record_download_archive(info_dict)
2490
2491             # Do nothing else if in simulate mode
2492             return
2493
2494         if full_filename is None:
2495             return
2496
2497         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2498             return
2499         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2500             return
2501
2502         if self.params.get('writedescription', False):
2503             descfn = self.prepare_filename(info_dict, 'description')
2504             if not self._ensure_dir_exists(encodeFilename(descfn)):
2505                 return
2506             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
2507                 self.to_screen('[info] Video description is already present')
2508             elif info_dict.get('description') is None:
2509                 self.report_warning('There\'s no description to write.')
2510             else:
2511                 try:
2512                     self.to_screen('[info] Writing video description to: ' + descfn)
2513                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
2514                         descfile.write(info_dict['description'])
2515                 except (OSError, IOError):
2516                     self.report_error('Cannot write description file ' + descfn)
2517                     return
2518
2519         if self.params.get('writeannotations', False):
2520             annofn = self.prepare_filename(info_dict, 'annotation')
2521             if not self._ensure_dir_exists(encodeFilename(annofn)):
2522                 return
2523             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2524                 self.to_screen('[info] Video annotations are already present')
2525             elif not info_dict.get('annotations'):
2526                 self.report_warning('There are no annotations to write.')
2527             else:
2528                 try:
2529                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2530                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2531                         annofile.write(info_dict['annotations'])
2532                 except (KeyError, TypeError):
2533                     self.report_warning('There are no annotations to write.')
2534                 except (OSError, IOError):
2535                     self.report_error('Cannot write annotations file: ' + annofn)
2536                     return
2537
2538         subtitles_are_requested = any([self.params.get('writesubtitles', False),
2539                                        self.params.get('writeautomaticsub')])
2540
2541         if subtitles_are_requested and info_dict.get('requested_subtitles'):
2542             # subtitles download errors are already managed as troubles in relevant IE
2543             # that way it will silently go on when used with unsupporting IE
2544             subtitles = info_dict['requested_subtitles']
2545             # ie = self.get_info_extractor(info_dict['extractor_key'])
2546             for sub_lang, sub_info in subtitles.items():
2547                 sub_format = sub_info['ext']
2548                 sub_filename = subtitles_filename(temp_filename, sub_lang, sub_format, info_dict.get('ext'))
2549                 sub_filename_final = subtitles_filename(
2550                     self.prepare_filename(info_dict, 'subtitle'), sub_lang, sub_format, info_dict.get('ext'))
2551                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
2552                     self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
2553                     sub_info['filepath'] = sub_filename
2554                     files_to_move[sub_filename] = sub_filename_final
2555                 else:
2556                     self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
2557                     if sub_info.get('data') is not None:
2558                         try:
2559                             # Use newline='' to prevent conversion of newline characters
2560                             # See https://github.com/ytdl-org/youtube-dl/issues/10268
2561                             with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
2562                                 subfile.write(sub_info['data'])
2563                             sub_info['filepath'] = sub_filename
2564                             files_to_move[sub_filename] = sub_filename_final
2565                         except (OSError, IOError):
2566                             self.report_error('Cannot write subtitles file ' + sub_filename)
2567                             return
2568                     else:
2569                         try:
2570                             self.dl(sub_filename, sub_info.copy(), subtitle=True)
2571                             sub_info['filepath'] = sub_filename
2572                             files_to_move[sub_filename] = sub_filename_final
2573                         except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
2574                             self.report_warning('Unable to download subtitle for "%s": %s' %
2575                                                 (sub_lang, error_to_compat_str(err)))
2576                             continue
2577
2578         if self.params.get('writeinfojson', False):
2579             infofn = self.prepare_filename(info_dict, 'infojson')
2580             if not self._ensure_dir_exists(encodeFilename(infofn)):
2581                 return
2582             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
2583                 self.to_screen('[info] Video metadata is already present')
2584             else:
2585                 self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
2586                 try:
2587                     write_json_file(self.sanitize_info(info_dict, self.params.get('clean_infojson', True)), infofn)
2588                 except (OSError, IOError):
2589                     self.report_error('Cannot write video metadata to JSON file ' + infofn)
2590                     return
2591             info_dict['__infojson_filename'] = infofn
2592
2593         for thumb_ext in self._write_thumbnails(info_dict, temp_filename):
2594             thumb_filename_temp = replace_extension(temp_filename, thumb_ext, info_dict.get('ext'))
2595             thumb_filename = replace_extension(
2596                 self.prepare_filename(info_dict, 'thumbnail'), thumb_ext, info_dict.get('ext'))
2597             files_to_move[thumb_filename_temp] = thumb_filename
2598
2599         # Write internet shortcut files
2600         url_link = webloc_link = desktop_link = False
2601         if self.params.get('writelink', False):
2602             if sys.platform == "darwin":  # macOS.
2603                 webloc_link = True
2604             elif sys.platform.startswith("linux"):
2605                 desktop_link = True
2606             else:  # if sys.platform in ['win32', 'cygwin']:
2607                 url_link = True
2608         if self.params.get('writeurllink', False):
2609             url_link = True
2610         if self.params.get('writewebloclink', False):
2611             webloc_link = True
2612         if self.params.get('writedesktoplink', False):
2613             desktop_link = True
2614
2615         if url_link or webloc_link or desktop_link:
2616             if 'webpage_url' not in info_dict:
2617                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2618                 return
2619             ascii_url = iri_to_uri(info_dict['webpage_url'])
2620
2621         def _write_link_file(extension, template, newline, embed_filename):
2622             linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
2623             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2624                 self.to_screen('[info] Internet shortcut is already present')
2625             else:
2626                 try:
2627                     self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2628                     with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2629                         template_vars = {'url': ascii_url}
2630                         if embed_filename:
2631                             template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2632                         linkfile.write(template % template_vars)
2633                 except (OSError, IOError):
2634                     self.report_error('Cannot write internet shortcut ' + linkfn)
2635                     return False
2636             return True
2637
2638         if url_link:
2639             if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2640                 return
2641         if webloc_link:
2642             if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2643                 return
2644         if desktop_link:
2645             if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2646                 return
2647
2648         try:
2649             info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2650         except PostProcessingError as err:
2651             self.report_error('Preprocessing: %s' % str(err))
2652             return
2653
2654         must_record_download_archive = False
2655         if self.params.get('skip_download', False):
2656             info_dict['filepath'] = temp_filename
2657             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2658             info_dict['__files_to_move'] = files_to_move
2659             info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2660         else:
2661             # Download
2662             info_dict.setdefault('__postprocessors', [])
2663             try:
2664
2665                 def existing_file(*filepaths):
2666                     ext = info_dict.get('ext')
2667                     final_ext = self.params.get('final_ext', ext)
2668                     existing_files = []
2669                     for file in orderedSet(filepaths):
2670                         if final_ext != ext:
2671                             converted = replace_extension(file, final_ext, ext)
2672                             if os.path.exists(encodeFilename(converted)):
2673                                 existing_files.append(converted)
2674                         if os.path.exists(encodeFilename(file)):
2675                             existing_files.append(file)
2676
2677                     if not existing_files or self.params.get('overwrites', False):
2678                         for file in orderedSet(existing_files):
2679                             self.report_file_delete(file)
2680                             os.remove(encodeFilename(file))
2681                         return None
2682
2683                     info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2684                     return existing_files[0]
2685
2686                 success = True
2687                 if info_dict.get('requested_formats') is not None:
2688
2689                     def compatible_formats(formats):
2690                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2691                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2692                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2693                         if len(video_formats) > 2 or len(audio_formats) > 2:
2694                             return False
2695
2696                         # Check extension
2697                         exts = set(format.get('ext') for format in formats)
2698                         COMPATIBLE_EXTS = (
2699                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2700                             set(('webm',)),
2701                         )
2702                         for ext_sets in COMPATIBLE_EXTS:
2703                             if ext_sets.issuperset(exts):
2704                                 return True
2705                         # TODO: Check acodec/vcodec
2706                         return False
2707
2708                     requested_formats = info_dict['requested_formats']
2709                     old_ext = info_dict['ext']
2710                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
2711                         info_dict['ext'] = 'mkv'
2712                         self.report_warning(
2713                             'Requested formats are incompatible for merge and will be merged into mkv.')
2714                     new_ext = info_dict['ext']
2715
2716                     def correct_ext(filename, ext=new_ext):
2717                         if filename == '-':
2718                             return filename
2719                         filename_real_ext = os.path.splitext(filename)[1][1:]
2720                         filename_wo_ext = (
2721                             os.path.splitext(filename)[0]
2722                             if filename_real_ext in (old_ext, new_ext)
2723                             else filename)
2724                         return '%s.%s' % (filename_wo_ext, ext)
2725
2726                     # Ensure filename always has a correct extension for successful merge
2727                     full_filename = correct_ext(full_filename)
2728                     temp_filename = correct_ext(temp_filename)
2729                     dl_filename = existing_file(full_filename, temp_filename)
2730                     info_dict['__real_download'] = False
2731
2732                     _protocols = set(determine_protocol(f) for f in requested_formats)
2733                     if len(_protocols) == 1:  # All requested formats have same protocol
2734                         info_dict['protocol'] = _protocols.pop()
2735                     directly_mergable = FFmpegFD.can_merge_formats(info_dict)
2736                     if dl_filename is not None:
2737                         self.report_file_already_downloaded(dl_filename)
2738                     elif (directly_mergable and get_suitable_downloader(
2739                             info_dict, self.params, to_stdout=(temp_filename == '-')) == FFmpegFD):
2740                         info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
2741                         success, real_download = self.dl(temp_filename, info_dict)
2742                         info_dict['__real_download'] = real_download
2743                     else:
2744                         downloaded = []
2745                         merger = FFmpegMergerPP(self)
2746                         if self.params.get('allow_unplayable_formats'):
2747                             self.report_warning(
2748                                 'You have requested merging of multiple formats '
2749                                 'while also allowing unplayable formats to be downloaded. '
2750                                 'The formats won\'t be merged to prevent data corruption.')
2751                         elif not merger.available:
2752                             self.report_warning(
2753                                 'You have requested merging of multiple formats but ffmpeg is not installed. '
2754                                 'The formats won\'t be merged.')
2755
2756                         if temp_filename == '-':
2757                             reason = ('using a downloader other than ffmpeg' if directly_mergable
2758                                       else 'but the formats are incompatible for simultaneous download' if merger.available
2759                                       else 'but ffmpeg is not installed')
2760                             self.report_warning(
2761                                 f'You have requested downloading multiple formats to stdout {reason}. '
2762                                 'The formats will be streamed one after the other')
2763                             fname = temp_filename
2764                         for f in requested_formats:
2765                             new_info = dict(info_dict)
2766                             del new_info['requested_formats']
2767                             new_info.update(f)
2768                             if temp_filename != '-':
2769                                 fname = prepend_extension(
2770                                     correct_ext(temp_filename, new_info['ext']),
2771                                     'f%s' % f['format_id'], new_info['ext'])
2772                                 if not self._ensure_dir_exists(fname):
2773                                     return
2774                                 downloaded.append(fname)
2775                             partial_success, real_download = self.dl(fname, new_info)
2776                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
2777                             success = success and partial_success
2778                         if merger.available and not self.params.get('allow_unplayable_formats'):
2779                             info_dict['__postprocessors'].append(merger)
2780                             info_dict['__files_to_merge'] = downloaded
2781                             # Even if there were no downloads, it is being merged only now
2782                             info_dict['__real_download'] = True
2783                         else:
2784                             for file in downloaded:
2785                                 files_to_move[file] = None
2786                 else:
2787                     # Just a single file
2788                     dl_filename = existing_file(full_filename, temp_filename)
2789                     if dl_filename is None or dl_filename == temp_filename:
2790                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
2791                         # So we should try to resume the download
2792                         success, real_download = self.dl(temp_filename, info_dict)
2793                         info_dict['__real_download'] = real_download
2794                     else:
2795                         self.report_file_already_downloaded(dl_filename)
2796
2797                 dl_filename = dl_filename or temp_filename
2798                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2799
2800             except network_exceptions as err:
2801                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2802                 return
2803             except (OSError, IOError) as err:
2804                 raise UnavailableVideoError(err)
2805             except (ContentTooShortError, ) as err:
2806                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2807                 return
2808
2809             if success and full_filename != '-':
2810
2811                 def fixup():
2812                     do_fixup = True
2813                     fixup_policy = self.params.get('fixup')
2814                     vid = info_dict['id']
2815
2816                     if fixup_policy in ('ignore', 'never'):
2817                         return
2818                     elif fixup_policy == 'warn':
2819                         do_fixup = False
2820                     elif fixup_policy != 'force':
2821                         assert fixup_policy in ('detect_or_warn', None)
2822                         if not info_dict.get('__real_download'):
2823                             do_fixup = False
2824
2825                     def ffmpeg_fixup(cndn, msg, cls):
2826                         if not cndn:
2827                             return
2828                         if not do_fixup:
2829                             self.report_warning(f'{vid}: {msg}')
2830                             return
2831                         pp = cls(self)
2832                         if pp.available:
2833                             info_dict['__postprocessors'].append(pp)
2834                         else:
2835                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
2836
2837                     stretched_ratio = info_dict.get('stretched_ratio')
2838                     ffmpeg_fixup(
2839                         stretched_ratio not in (1, None),
2840                         f'Non-uniform pixel ratio {stretched_ratio}',
2841                         FFmpegFixupStretchedPP)
2842
2843                     ffmpeg_fixup(
2844                         (info_dict.get('requested_formats') is None
2845                          and info_dict.get('container') == 'm4a_dash'
2846                          and info_dict.get('ext') == 'm4a'),
2847                         'writing DASH m4a. Only some players support this container',
2848                         FFmpegFixupM4aPP)
2849
2850                     downloader = (get_suitable_downloader(info_dict, self.params).__name__
2851                                   if 'protocol' in info_dict else None)
2852                     ffmpeg_fixup(downloader == 'HlsFD', 'malformed AAC bitstream detected', FFmpegFixupM3u8PP)
2853                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP)
2854                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed duration detected', FFmpegFixupDurationPP)
2855
2856                 fixup()
2857                 try:
2858                     info_dict = self.post_process(dl_filename, info_dict, files_to_move)
2859                 except PostProcessingError as err:
2860                     self.report_error('Postprocessing: %s' % str(err))
2861                     return
2862                 try:
2863                     for ph in self._post_hooks:
2864                         ph(info_dict['filepath'])
2865                 except Exception as err:
2866                     self.report_error('post hooks: %s' % str(err))
2867                     return
2868                 must_record_download_archive = True
2869
2870         if must_record_download_archive or self.params.get('force_write_download_archive', False):
2871             self.record_download_archive(info_dict)
2872         max_downloads = self.params.get('max_downloads')
2873         if max_downloads is not None and self._num_downloads >= int(max_downloads):
2874             raise MaxDownloadsReached()
2875
2876     def download(self, url_list):
2877         """Download a given list of URLs."""
2878         outtmpl = self.outtmpl_dict['default']
2879         if (len(url_list) > 1
2880                 and outtmpl != '-'
2881                 and '%' not in outtmpl
2882                 and self.params.get('max_downloads') != 1):
2883             raise SameFileError(outtmpl)
2884
2885         for url in url_list:
2886             try:
2887                 # It also downloads the videos
2888                 res = self.extract_info(
2889                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2890             except UnavailableVideoError:
2891                 self.report_error('unable to download video')
2892             except MaxDownloadsReached:
2893                 self.to_screen('[info] Maximum number of downloads reached')
2894                 raise
2895             except ExistingVideoReached:
2896                 self.to_screen('[info] Encountered a video that is already in the archive, stopping due to --break-on-existing')
2897                 raise
2898             except RejectedVideoReached:
2899                 self.to_screen('[info] Encountered a video that did not match filter, stopping due to --break-on-reject')
2900                 raise
2901             else:
2902                 if self.params.get('dump_single_json', False):
2903                     self.post_extract(res)
2904                     self.to_stdout(json.dumps(self.sanitize_info(res)))
2905
2906         return self._download_retcode
2907
2908     def download_with_info_file(self, info_filename):
2909         with contextlib.closing(fileinput.FileInput(
2910                 [info_filename], mode='r',
2911                 openhook=fileinput.hook_encoded('utf-8'))) as f:
2912             # FileInput doesn't have a read method, we can't call json.load
2913             info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
2914         try:
2915             self.process_ie_result(info, download=True)
2916         except (DownloadError, EntryNotInPlaylist, ThrottledDownload):
2917             webpage_url = info.get('webpage_url')
2918             if webpage_url is not None:
2919                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2920                 return self.download([webpage_url])
2921             else:
2922                 raise
2923         return self._download_retcode
2924
2925     @staticmethod
2926     def sanitize_info(info_dict, remove_private_keys=False):
2927         ''' Sanitize the infodict for converting to json '''
2928         if info_dict is None:
2929             return info_dict
2930         info_dict.setdefault('epoch', int(time.time()))
2931         remove_keys = {'__original_infodict'}  # Always remove this since this may contain a copy of the entire dict
2932         keep_keys = ['_type'],  # Always keep this to facilitate load-info-json
2933         if remove_private_keys:
2934             remove_keys |= {
2935                 'requested_formats', 'requested_subtitles', 'requested_entries',
2936                 'filepath', 'entries', 'original_url', 'playlist_autonumber',
2937             }
2938             empty_values = (None, {}, [], set(), tuple())
2939             reject = lambda k, v: k not in keep_keys and (
2940                 k.startswith('_') or k in remove_keys or v in empty_values)
2941         else:
2942             reject = lambda k, v: k in remove_keys
2943         filter_fn = lambda obj: (
2944             list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))
2945             else obj if not isinstance(obj, dict)
2946             else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
2947         return filter_fn(info_dict)
2948
2949     @staticmethod
2950     def filter_requested_info(info_dict, actually_filter=True):
2951         ''' Alias of sanitize_info for backward compatibility '''
2952         return YoutubeDL.sanitize_info(info_dict, actually_filter)
2953
2954     def run_pp(self, pp, infodict):
2955         files_to_delete = []
2956         if '__files_to_move' not in infodict:
2957             infodict['__files_to_move'] = {}
2958         files_to_delete, infodict = pp.run(infodict)
2959         if not files_to_delete:
2960             return infodict
2961
2962         if self.params.get('keepvideo', False):
2963             for f in files_to_delete:
2964                 infodict['__files_to_move'].setdefault(f, '')
2965         else:
2966             for old_filename in set(files_to_delete):
2967                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2968                 try:
2969                     os.remove(encodeFilename(old_filename))
2970                 except (IOError, OSError):
2971                     self.report_warning('Unable to remove downloaded original file')
2972                 if old_filename in infodict['__files_to_move']:
2973                     del infodict['__files_to_move'][old_filename]
2974         return infodict
2975
2976     @staticmethod
2977     def post_extract(info_dict):
2978         def actual_post_extract(info_dict):
2979             if info_dict.get('_type') in ('playlist', 'multi_video'):
2980                 for video_dict in info_dict.get('entries', {}):
2981                     actual_post_extract(video_dict or {})
2982                 return
2983
2984             post_extractor = info_dict.get('__post_extractor') or (lambda: {})
2985             extra = post_extractor().items()
2986             info_dict.update(extra)
2987             info_dict.pop('__post_extractor', None)
2988
2989             original_infodict = info_dict.get('__original_infodict') or {}
2990             original_infodict.update(extra)
2991             original_infodict.pop('__post_extractor', None)
2992
2993         actual_post_extract(info_dict or {})
2994
2995     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
2996         info = dict(ie_info)
2997         info['__files_to_move'] = files_to_move or {}
2998         for pp in self._pps[key]:
2999             info = self.run_pp(pp, info)
3000         return info, info.pop('__files_to_move', None)
3001
3002     def post_process(self, filename, ie_info, files_to_move=None):
3003         """Run all the postprocessors on the given file."""
3004         info = dict(ie_info)
3005         info['filepath'] = filename
3006         info['__files_to_move'] = files_to_move or {}
3007
3008         for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
3009             info = self.run_pp(pp, info)
3010         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3011         del info['__files_to_move']
3012         for pp in self._pps['after_move']:
3013             info = self.run_pp(pp, info)
3014         return info
3015
3016     def _make_archive_id(self, info_dict):
3017         video_id = info_dict.get('id')
3018         if not video_id:
3019             return
3020         # Future-proof against any change in case
3021         # and backwards compatibility with prior versions
3022         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3023         if extractor is None:
3024             url = str_or_none(info_dict.get('url'))
3025             if not url:
3026                 return
3027             # Try to find matching extractor for the URL and take its ie_key
3028             for ie in self._ies:
3029                 if ie.suitable(url):
3030                     extractor = ie.ie_key()
3031                     break
3032             else:
3033                 return
3034         return '%s %s' % (extractor.lower(), video_id)
3035
3036     def in_download_archive(self, info_dict):
3037         fn = self.params.get('download_archive')
3038         if fn is None:
3039             return False
3040
3041         vid_id = self._make_archive_id(info_dict)
3042         if not vid_id:
3043             return False  # Incomplete video information
3044
3045         return vid_id in self.archive
3046
3047     def record_download_archive(self, info_dict):
3048         fn = self.params.get('download_archive')
3049         if fn is None:
3050             return
3051         vid_id = self._make_archive_id(info_dict)
3052         assert vid_id
3053         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3054             archive_file.write(vid_id + '\n')
3055         self.archive.add(vid_id)
3056
3057     @staticmethod
3058     def format_resolution(format, default='unknown'):
3059         if format.get('vcodec') == 'none':
3060             if format.get('acodec') == 'none':
3061                 return 'images'
3062             return 'audio only'
3063         if format.get('resolution') is not None:
3064             return format['resolution']
3065         if format.get('width') and format.get('height'):
3066             res = '%dx%d' % (format['width'], format['height'])
3067         elif format.get('height'):
3068             res = '%sp' % format['height']
3069         elif format.get('width'):
3070             res = '%dx?' % format['width']
3071         else:
3072             res = default
3073         return res
3074
3075     def _format_note(self, fdict):
3076         res = ''
3077         if fdict.get('ext') in ['f4f', 'f4m']:
3078             res += '(unsupported) '
3079         if fdict.get('language'):
3080             if res:
3081                 res += ' '
3082             res += '[%s] ' % fdict['language']
3083         if fdict.get('format_note') is not None:
3084             res += fdict['format_note'] + ' '
3085         if fdict.get('tbr') is not None:
3086             res += '%4dk ' % fdict['tbr']
3087         if fdict.get('container') is not None:
3088             if res:
3089                 res += ', '
3090             res += '%s container' % fdict['container']
3091         if (fdict.get('vcodec') is not None
3092                 and fdict.get('vcodec') != 'none'):
3093             if res:
3094                 res += ', '
3095             res += fdict['vcodec']
3096             if fdict.get('vbr') is not None:
3097                 res += '@'
3098         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3099             res += 'video@'
3100         if fdict.get('vbr') is not None:
3101             res += '%4dk' % fdict['vbr']
3102         if fdict.get('fps') is not None:
3103             if res:
3104                 res += ', '
3105             res += '%sfps' % fdict['fps']
3106         if fdict.get('acodec') is not None:
3107             if res:
3108                 res += ', '
3109             if fdict['acodec'] == 'none':
3110                 res += 'video only'
3111             else:
3112                 res += '%-5s' % fdict['acodec']
3113         elif fdict.get('abr') is not None:
3114             if res:
3115                 res += ', '
3116             res += 'audio'
3117         if fdict.get('abr') is not None:
3118             res += '@%3dk' % fdict['abr']
3119         if fdict.get('asr') is not None:
3120             res += ' (%5dHz)' % fdict['asr']
3121         if fdict.get('filesize') is not None:
3122             if res:
3123                 res += ', '
3124             res += format_bytes(fdict['filesize'])
3125         elif fdict.get('filesize_approx') is not None:
3126             if res:
3127                 res += ', '
3128             res += '~' + format_bytes(fdict['filesize_approx'])
3129         return res
3130
3131     def list_formats(self, info_dict):
3132         formats = info_dict.get('formats', [info_dict])
3133         new_format = (
3134             'list-formats' not in self.params.get('compat_opts', [])
3135             and self.params.get('listformats_table', True) is not False)
3136         if new_format:
3137             table = [
3138                 [
3139                     format_field(f, 'format_id'),
3140                     format_field(f, 'ext'),
3141                     self.format_resolution(f),
3142                     format_field(f, 'fps', '%d'),
3143                     '|',
3144                     format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
3145                     format_field(f, 'tbr', '%4dk'),
3146                     shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
3147                     '|',
3148                     format_field(f, 'vcodec', default='unknown').replace('none', ''),
3149                     format_field(f, 'vbr', '%4dk'),
3150                     format_field(f, 'acodec', default='unknown').replace('none', ''),
3151                     format_field(f, 'abr', '%3dk'),
3152                     format_field(f, 'asr', '%5dHz'),
3153                     ', '.join(filter(None, (
3154                         'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
3155                         format_field(f, 'language', '[%s]'),
3156                         format_field(f, 'format_note'),
3157                         format_field(f, 'container', ignore=(None, f.get('ext'))),
3158                     ))),
3159                 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3160             header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', '  TBR', 'PROTO',
3161                            '|', 'VCODEC', '  VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO']
3162         else:
3163             table = [
3164                 [
3165                     format_field(f, 'format_id'),
3166                     format_field(f, 'ext'),
3167                     self.format_resolution(f),
3168                     self._format_note(f)]
3169                 for f in formats
3170                 if f.get('preference') is None or f['preference'] >= -1000]
3171             header_line = ['format code', 'extension', 'resolution', 'note']
3172
3173         self.to_screen(
3174             '[info] Available formats for %s:' % info_dict['id'])
3175         self.to_stdout(render_table(
3176             header_line, table, delim=new_format, extraGap=(0 if new_format else 1), hideEmpty=new_format))
3177
3178     def list_thumbnails(self, info_dict):
3179         thumbnails = list(info_dict.get('thumbnails'))
3180         if not thumbnails:
3181             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3182             return
3183
3184         self.to_screen(
3185             '[info] Thumbnails for %s:' % info_dict['id'])
3186         self.to_stdout(render_table(
3187             ['ID', 'width', 'height', 'URL'],
3188             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
3189
3190     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3191         if not subtitles:
3192             self.to_screen('%s has no %s' % (video_id, name))
3193             return
3194         self.to_screen(
3195             'Available %s for %s:' % (name, video_id))
3196
3197         def _row(lang, formats):
3198             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3199             if len(set(names)) == 1:
3200                 names = [] if names[0] == 'unknown' else names[:1]
3201             return [lang, ', '.join(names), ', '.join(exts)]
3202
3203         self.to_stdout(render_table(
3204             ['Language', 'Name', 'Formats'],
3205             [_row(lang, formats) for lang, formats in subtitles.items()],
3206             hideEmpty=True))
3207
3208     def urlopen(self, req):
3209         """ Start an HTTP download """
3210         if isinstance(req, compat_basestring):
3211             req = sanitized_Request(req)
3212         return self._opener.open(req, timeout=self._socket_timeout)
3213
3214     def print_debug_header(self):
3215         if not self.params.get('verbose'):
3216             return
3217
3218         stdout_encoding = getattr(
3219             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
3220         encoding_str = (
3221             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
3222                 locale.getpreferredencoding(),
3223                 sys.getfilesystemencoding(),
3224                 stdout_encoding,
3225                 self.get_encoding()))
3226         write_string(encoding_str, encoding=None)
3227
3228         source = (
3229             '(exe)' if hasattr(sys, 'frozen')
3230             else '(zip)' if isinstance(globals().get('__loader__'), zipimporter)
3231             else '(source)' if os.path.basename(sys.argv[0]) == '__main__.py'
3232             else '')
3233         self._write_string('[debug] yt-dlp version %s %s\n' % (__version__, source))
3234         if _LAZY_LOADER:
3235             self._write_string('[debug] Lazy loading extractors enabled\n')
3236         if _PLUGIN_CLASSES:
3237             self._write_string(
3238                 '[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])
3239         if self.params.get('compat_opts'):
3240             self._write_string(
3241                 '[debug] Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts')))
3242         try:
3243             sp = subprocess.Popen(
3244                 ['git', 'rev-parse', '--short', 'HEAD'],
3245                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3246                 cwd=os.path.dirname(os.path.abspath(__file__)))
3247             out, err = process_communicate_or_kill(sp)
3248             out = out.decode().strip()
3249             if re.match('[0-9a-f]+', out):
3250                 self._write_string('[debug] Git HEAD: %s\n' % out)
3251         except Exception:
3252             try:
3253                 sys.exc_clear()
3254             except Exception:
3255                 pass
3256
3257         def python_implementation():
3258             impl_name = platform.python_implementation()
3259             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3260                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3261             return impl_name
3262
3263         self._write_string('[debug] Python version %s (%s %s) - %s\n' % (
3264             platform.python_version(),
3265             python_implementation(),
3266             platform.architecture()[0],
3267             platform_name()))
3268
3269         exe_versions = FFmpegPostProcessor.get_versions(self)
3270         exe_versions['rtmpdump'] = rtmpdump_version()
3271         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3272         exe_str = ', '.join(
3273             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3274         ) or 'none'
3275         self._write_string('[debug] exe versions: %s\n' % exe_str)
3276
3277         from .downloader.fragment import can_decrypt_frag
3278         from .downloader.websocket import has_websockets
3279         from .postprocessor.embedthumbnail import has_mutagen
3280         from .cookies import SQLITE_AVAILABLE, KEYRING_AVAILABLE
3281
3282         lib_str = ', '.join(sorted(filter(None, (
3283             can_decrypt_frag and 'pycryptodome',
3284             has_websockets and 'websockets',
3285             has_mutagen and 'mutagen',
3286             SQLITE_AVAILABLE and 'sqlite',
3287             KEYRING_AVAILABLE and 'keyring',
3288         )))) or 'none'
3289         self._write_string('[debug] Optional libraries: %s\n' % lib_str)
3290
3291         proxy_map = {}
3292         for handler in self._opener.handlers:
3293             if hasattr(handler, 'proxies'):
3294                 proxy_map.update(handler.proxies)
3295         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
3296
3297         if self.params.get('call_home', False):
3298             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3299             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
3300             return
3301             latest_version = self.urlopen(
3302                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3303             if version_tuple(latest_version) > version_tuple(__version__):
3304                 self.report_warning(
3305                     'You are using an outdated version (newest version: %s)! '
3306                     'See https://yt-dl.org/update if you need help updating.' %
3307                     latest_version)
3308
3309     def _setup_opener(self):
3310         timeout_val = self.params.get('socket_timeout')
3311         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
3312
3313         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3314         opts_cookiefile = self.params.get('cookiefile')
3315         opts_proxy = self.params.get('proxy')
3316
3317         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3318
3319         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3320         if opts_proxy is not None:
3321             if opts_proxy == '':
3322                 proxies = {}
3323             else:
3324                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3325         else:
3326             proxies = compat_urllib_request.getproxies()
3327             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3328             if 'http' in proxies and 'https' not in proxies:
3329                 proxies['https'] = proxies['http']
3330         proxy_handler = PerRequestProxyHandler(proxies)
3331
3332         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3333         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3334         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3335         redirect_handler = YoutubeDLRedirectHandler()
3336         data_handler = compat_urllib_request_DataHandler()
3337
3338         # When passing our own FileHandler instance, build_opener won't add the
3339         # default FileHandler and allows us to disable the file protocol, which
3340         # can be used for malicious purposes (see
3341         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3342         file_handler = compat_urllib_request.FileHandler()
3343
3344         def file_open(*args, **kwargs):
3345             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3346         file_handler.file_open = file_open
3347
3348         opener = compat_urllib_request.build_opener(
3349             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3350
3351         # Delete the default user-agent header, which would otherwise apply in
3352         # cases where our custom HTTP handler doesn't come into play
3353         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3354         opener.addheaders = []
3355         self._opener = opener
3356
3357     def encode(self, s):
3358         if isinstance(s, bytes):
3359             return s  # Already encoded
3360
3361         try:
3362             return s.encode(self.get_encoding())
3363         except UnicodeEncodeError as err:
3364             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3365             raise
3366
3367     def get_encoding(self):
3368         encoding = self.params.get('encoding')
3369         if encoding is None:
3370             encoding = preferredencoding()
3371         return encoding
3372
3373     def _write_thumbnails(self, info_dict, filename):  # return the extensions
3374         write_all = self.params.get('write_all_thumbnails', False)
3375         thumbnails = []
3376         if write_all or self.params.get('writethumbnail', False):
3377             thumbnails = info_dict.get('thumbnails') or []
3378         multiple = write_all and len(thumbnails) > 1
3379
3380         ret = []
3381         for t in thumbnails[::-1]:
3382             thumb_ext = determine_ext(t['url'], 'jpg')
3383             suffix = '%s.' % t['id'] if multiple else ''
3384             thumb_display_id = '%s ' % t['id'] if multiple else ''
3385             thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
3386
3387             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
3388                 ret.append(suffix + thumb_ext)
3389                 t['filepath'] = thumb_filename
3390                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
3391                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
3392             else:
3393                 self.to_screen('[%s] %s: Downloading thumbnail %s ...' %
3394                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
3395                 try:
3396                     uf = self.urlopen(t['url'])
3397                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3398                         shutil.copyfileobj(uf, thumbf)
3399                     ret.append(suffix + thumb_ext)
3400                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
3401                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
3402                     t['filepath'] = thumb_filename
3403                 except network_exceptions as err:
3404                     self.report_warning('Unable to download thumbnail "%s": %s' %
3405                                         (t['url'], error_to_compat_str(err)))
3406             if ret and not write_all:
3407                 break
3408         return ret