yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import sys
  23 import tempfile
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28
  29 from string import ascii_letters
  30 from zipimport import zipimporter
  31
  32 from .compat import (
  33     compat_basestring,
  34     compat_get_terminal_size,
  35     compat_kwargs,
  36     compat_numeric_types,
  37     compat_os_name,
  38     compat_shlex_quote,
  39     compat_str,
  40     compat_tokenize_tokenize,
  41     compat_urllib_error,
  42     compat_urllib_request,
  43     compat_urllib_request_DataHandler,
  44 )
  45 from .cookies import load_cookies
  46 from .utils import (
  47     age_restricted,
  48     args_to_str,
  49     ContentTooShortError,
  50     date_from_str,
  51     DateRange,
  52     DEFAULT_OUTTMPL,
  53     determine_ext,
  54     determine_protocol,
  55     DOT_DESKTOP_LINK_TEMPLATE,
  56     DOT_URL_LINK_TEMPLATE,
  57     DOT_WEBLOC_LINK_TEMPLATE,
  58     DownloadError,
  59     encode_compat_str,
  60     encodeFilename,
  61     EntryNotInPlaylist,
  62     error_to_compat_str,
  63     ExistingVideoReached,
  64     expand_path,
  65     ExtractorError,
  66     float_or_none,
  67     format_bytes,
  68     format_field,
  69     STR_FORMAT_RE_TMPL,
  70     STR_FORMAT_TYPES,
  71     formatSeconds,
  72     GeoRestrictedError,
  73     HEADRequest,
  74     int_or_none,
  75     iri_to_uri,
  76     ISO3166Utils,
  77     LazyList,
  78     locked_file,
  79     make_dir,
  80     make_HTTPS_handler,
  81     MaxDownloadsReached,
  82     network_exceptions,
  83     orderedSet,
  84     OUTTMPL_TYPES,
  85     PagedList,
  86     parse_filesize,
  87     PerRequestProxyHandler,
  88     platform_name,
  89     PostProcessingError,
  90     preferredencoding,
  91     prepend_extension,
  92     process_communicate_or_kill,
  93     register_socks_protocols,
  94     RejectedVideoReached,
  95     render_table,
  96     replace_extension,
  97     SameFileError,
  98     sanitize_filename,
  99     sanitize_path,
 100     sanitize_url,
 101     sanitized_Request,
 102     std_headers,
 103     str_or_none,
 104     strftime_or_none,
 105     subtitles_filename,
 106     ThrottledDownload,
 107     to_high_limit_path,
 108     traverse_obj,
 109     try_get,
 110     UnavailableVideoError,
 111     url_basename,
 112     variadic,
 113     version_tuple,
 114     write_json_file,
 115     write_string,
 116     YoutubeDLCookieProcessor,
 117     YoutubeDLHandler,
 118     YoutubeDLRedirectHandler,
 119 )
 120 from .cache import Cache
 121 from .extractor import (
 122     gen_extractor_classes,
 123     get_info_extractor,
 124     _LAZY_LOADER,
 125     _PLUGIN_CLASSES
 126 )
 127 from .extractor.openload import PhantomJSwrapper
 128 from .downloader import (
 129     FFmpegFD,
 130     get_suitable_downloader,
 131     shorten_protocol_name
 132 )
 133 from .downloader.rtmp import rtmpdump_version
 134 from .postprocessor import (
 135     get_postprocessor,
 136     FFmpegFixupDurationPP,
 137     FFmpegFixupM3u8PP,
 138     FFmpegFixupM4aPP,
 139     FFmpegFixupStretchedPP,
 140     FFmpegFixupTimestampPP,
 141     FFmpegMergerPP,
 142     FFmpegPostProcessor,
 143     MoveFilesAfterDownloadPP,
 144 )
 145 from .version import __version__
 146
 147 if compat_os_name == 'nt':
 148     import ctypes
 149
 150
 151 class YoutubeDL(object):
 152     """YoutubeDL class.
 153
 154     YoutubeDL objects are the ones responsible of downloading the
 155     actual video file and writing it to disk if the user has requested
 156     it, among some other tasks. In most cases there should be one per
 157     program. As, given a video URL, the downloader doesn't know how to
 158     extract all the needed information, task that InfoExtractors do, it
 159     has to pass the URL to one of them.
 160
 161     For this, YoutubeDL objects have a method that allows
 162     InfoExtractors to be registered in a given order. When it is passed
 163     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 164     finds that reports being able to handle it. The InfoExtractor extracts
 165     all the information about the video or videos the URL refers to, and
 166     YoutubeDL process the extracted information, possibly using a File
 167     Downloader to download the video.
 168
 169     YoutubeDL objects accept a lot of parameters. In order not to saturate
 170     the object constructor with arguments, it receives a dictionary of
 171     options instead. These options are available through the params
 172     attribute for the InfoExtractors to use. The YoutubeDL also
 173     registers itself as the downloader in charge for the InfoExtractors
 174     that are added to it, so this is a "mutual registration".
 175
 176     Available options:
 177
 178     username:          Username for authentication purposes.
 179     password:          Password for authentication purposes.
 180     videopassword:     Password for accessing a video.
 181     ap_mso:            Adobe Pass multiple-system operator identifier.
 182     ap_username:       Multiple-system operator account username.
 183     ap_password:       Multiple-system operator account password.
 184     usenetrc:          Use netrc for authentication instead.
 185     verbose:           Print additional info to stdout.
 186     quiet:             Do not print messages to stdout.
 187     no_warnings:       Do not print out anything for warnings.
 188     forceprint:        A list of templates to force print
 189     forceurl:          Force printing final URL. (Deprecated)
 190     forcetitle:        Force printing title. (Deprecated)
 191     forceid:           Force printing ID. (Deprecated)
 192     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 193     forcedescription:  Force printing description. (Deprecated)
 194     forcefilename:     Force printing final filename. (Deprecated)
 195     forceduration:     Force printing duration. (Deprecated)
 196     forcejson:         Force printing info_dict as JSON.
 197     dump_single_json:  Force printing the info_dict of the whole playlist
 198                        (or video) as a single JSON line.
 199     force_write_download_archive: Force writing download archive regardless
 200                        of 'skip_download' or 'simulate'.
 201     simulate:          Do not download the video files. If unset (or None),
 202                        simulate only if listsubtitles, listformats or list_thumbnails is used
 203     format:            Video format code. see "FORMAT SELECTION" for more details.
 204     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 205     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 206                        extracting metadata even if the video is not actually
 207                        available for download (experimental)
 208     format_sort:       How to sort the video formats. see "Sorting Formats"
 209                        for more details.
 210     format_sort_force: Force the given format_sort. see "Sorting Formats"
 211                        for more details.
 212     allow_multiple_video_streams:   Allow multiple video streams to be merged
 213                        into a single file
 214     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 215                        into a single file
 216     check_formats      Whether to test if the formats are downloadable.
 217                        Can be True (check all), False (check none)
 218                        or None (check only if requested by extractor)
 219     paths:             Dictionary of output paths. The allowed keys are 'home'
 220                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 221     outtmpl:           Dictionary of templates for output names. Allowed keys
 222                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 223                        For compatibility with youtube-dl, a single string can also be used
 224     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 225     restrictfilenames: Do not allow "&" and spaces in file names
 226     trim_file_name:    Limit length of filename (extension excluded)
 227     windowsfilenames:  Force the filenames to be windows compatible
 228     ignoreerrors:      Do not stop on download errors
 229                        (Default True when running yt-dlp,
 230                        but False when directly accessing YoutubeDL class)
 231     skip_playlist_after_errors: Number of allowed failures until the rest of
 232                        the playlist is skipped
 233     force_generic_extractor: Force downloader to use the generic extractor
 234     overwrites:        Overwrite all video and metadata files if True,
 235                        overwrite only non-video files if None
 236                        and don't overwrite any file if False
 237                        For compatibility with youtube-dl,
 238                        "nooverwrites" may also be used instead
 239     playliststart:     Playlist item to start at.
 240     playlistend:       Playlist item to end at.
 241     playlist_items:    Specific indices of playlist to download.
 242     playlistreverse:   Download playlist items in reverse order.
 243     playlistrandom:    Download playlist items in random order.
 244     matchtitle:        Download only matching titles.
 245     rejecttitle:       Reject downloads for matching titles.
 246     logger:            Log messages to a logging.Logger instance.
 247     logtostderr:       Log messages to stderr instead of stdout.
 248     writedescription:  Write the video description to a .description file
 249     writeinfojson:     Write the video description to a .info.json file
 250     clean_infojson:    Remove private fields from the infojson
 251     getcomments:       Extract video comments. This will not be written to disk
 252                        unless writeinfojson is also given
 253     writeannotations:  Write the video annotations to a .annotations.xml file
 254     writethumbnail:    Write the thumbnail image to a file
 255     allow_playlist_files: Whether to write playlists' description, infojson etc
 256                        also to disk when using the 'write*' options
 257     write_all_thumbnails:  Write all thumbnail formats to files
 258     writelink:         Write an internet shortcut file, depending on the
 259                        current platform (.url/.webloc/.desktop)
 260     writeurllink:      Write a Windows internet shortcut file (.url)
 261     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 262     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 263     writesubtitles:    Write the video subtitles to a file
 264     writeautomaticsub: Write the automatically generated subtitles to a file
 265     allsubtitles:      Deprecated - Use subtitleslangs = ['all']
 266                        Downloads all the subtitles of the video
 267                        (requires writesubtitles or writeautomaticsub)
 268     listsubtitles:     Lists all available subtitles for the video
 269     subtitlesformat:   The format code for subtitles
 270     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 271                        The list may contain "all" to refer to all the available
 272                        subtitles. The language can be prefixed with a "-" to
 273                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 274     keepvideo:         Keep the video file after post-processing
 275     daterange:         A DateRange object, download only if the upload_date is in the range.
 276     skip_download:     Skip the actual download of the video file
 277     cachedir:          Location of the cache files in the filesystem.
 278                        False to disable filesystem cache.
 279     noplaylist:        Download single video instead of a playlist if in doubt.
 280     age_limit:         An integer representing the user's age in years.
 281                        Unsuitable videos for the given age are skipped.
 282     min_views:         An integer representing the minimum view count the video
 283                        must have in order to not be skipped.
 284                        Videos without view count information are always
 285                        downloaded. None for no limit.
 286     max_views:         An integer representing the maximum view count.
 287                        Videos that are more popular than that are not
 288                        downloaded.
 289                        Videos without view count information are always
 290                        downloaded. None for no limit.
 291     download_archive:  File name of a file where all downloads are recorded.
 292                        Videos already present in the file are not downloaded
 293                        again.
 294     break_on_existing: Stop the download process after attempting to download a
 295                        file that is in the archive.
 296     break_on_reject:   Stop the download process when encountering a video that
 297                        has been filtered out.
 298     cookiefile:        File name where cookies should be read from and dumped to
 299     cookiesfrombrowser: A tuple containing the name of the browser and the profile
 300                        name/path from where cookies are loaded.
 301                        Eg: ('chrome', ) or (vivaldi, 'default')
 302     nocheckcertificate:Do not verify SSL certificates
 303     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 304                        At the moment, this is only supported by YouTube.
 305     proxy:             URL of the proxy server to use
 306     geo_verification_proxy:  URL of the proxy to use for IP address verification
 307                        on geo-restricted sites.
 308     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 309     bidi_workaround:   Work around buggy terminals without bidirectional text
 310                        support, using fridibi
 311     debug_printtraffic:Print out sent and received HTTP traffic
 312     include_ads:       Download ads as well
 313     default_search:    Prepend this string if an input url is not valid.
 314                        'auto' for elaborate guessing
 315     encoding:          Use this encoding instead of the system-specified.
 316     extract_flat:      Do not resolve URLs, return the immediate result.
 317                        Pass in 'in_playlist' to only show this behavior for
 318                        playlist items.
 319     postprocessors:    A list of dictionaries, each with an entry
 320                        * key:  The name of the postprocessor. See
 321                                yt_dlp/postprocessor/__init__.py for a list.
 322                        * when: When to run the postprocessor. Can be one of
 323                                pre_process|before_dl|post_process|after_move.
 324                                Assumed to be 'post_process' if not given
 325     post_hooks:        A list of functions that get called as the final step
 326                        for each video file, after all postprocessors have been
 327                        called. The filename will be passed as the only argument.
 328     progress_hooks:    A list of functions that get called on download
 329                        progress, with a dictionary with the entries
 330                        * status: One of "downloading", "error", or "finished".
 331                                  Check this first and ignore unknown values.
 332                        * info_dict: The extracted info_dict
 333
 334                        If status is one of "downloading", or "finished", the
 335                        following properties may also be present:
 336                        * filename: The final filename (always present)
 337                        * tmpfilename: The filename we're currently writing to
 338                        * downloaded_bytes: Bytes on disk
 339                        * total_bytes: Size of the whole file, None if unknown
 340                        * total_bytes_estimate: Guess of the eventual file size,
 341                                                None if unavailable.
 342                        * elapsed: The number of seconds since download started.
 343                        * eta: The estimated time in seconds, None if unknown
 344                        * speed: The download speed in bytes/second, None if
 345                                 unknown
 346                        * fragment_index: The counter of the currently
 347                                          downloaded video fragment.
 348                        * fragment_count: The number of fragments (= individual
 349                                          files that will be merged)
 350
 351                        Progress hooks are guaranteed to be called at least once
 352                        (with status "finished") if the download is successful.
 353     merge_output_format: Extension to use when merging formats.
 354     final_ext:         Expected final extension; used to detect when the file was
 355                        already downloaded and converted. "merge_output_format" is
 356                        replaced by this extension when given
 357     fixup:             Automatically correct known faults of the file.
 358                        One of:
 359                        - "never": do nothing
 360                        - "warn": only emit a warning
 361                        - "detect_or_warn": check whether we can do anything
 362                                            about it, warn otherwise (default)
 363     source_address:    Client-side IP address to bind to.
 364     call_home:         Boolean, true iff we are allowed to contact the
 365                        yt-dlp servers for debugging. (BROKEN)
 366     sleep_interval_requests: Number of seconds to sleep between requests
 367                        during extraction
 368     sleep_interval:    Number of seconds to sleep before each download when
 369                        used alone or a lower bound of a range for randomized
 370                        sleep before each download (minimum possible number
 371                        of seconds to sleep) when used along with
 372                        max_sleep_interval.
 373     max_sleep_interval:Upper bound of a range for randomized sleep before each
 374                        download (maximum possible number of seconds to sleep).
 375                        Must only be used along with sleep_interval.
 376                        Actual sleep time will be a random float from range
 377                        [sleep_interval; max_sleep_interval].
 378     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 379     listformats:       Print an overview of available video formats and exit.
 380     list_thumbnails:   Print a table of all thumbnails and exit.
 381     match_filter:      A function that gets called with the info_dict of
 382                        every video.
 383                        If it returns a message, the video is ignored.
 384                        If it returns None, the video is downloaded.
 385                        match_filter_func in utils.py is one example for this.
 386     no_color:          Do not emit color codes in output.
 387     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 388                        HTTP header
 389     geo_bypass_country:
 390                        Two-letter ISO 3166-2 country code that will be used for
 391                        explicit geographic restriction bypassing via faking
 392                        X-Forwarded-For HTTP header
 393     geo_bypass_ip_block:
 394                        IP range in CIDR notation that will be used similarly to
 395                        geo_bypass_country
 396
 397     The following options determine which downloader is picked:
 398     external_downloader: A dictionary of protocol keys and the executable of the
 399                        external downloader to use for it. The allowed protocols
 400                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 401                        Set the value to 'native' to use the native downloader
 402     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 403                        or {'m3u8': 'ffmpeg'} instead.
 404                        Use the native HLS downloader instead of ffmpeg/avconv
 405                        if True, otherwise use ffmpeg/avconv if False, otherwise
 406                        use downloader suggested by extractor if None.
 407     compat_opts:       Compatibility options. See "Differences in default behavior".
 408                        The following options do not work when used through the API:
 409                        filename, abort-on-error, multistreams, no-live-chat,
 410                        no-clean-infojson, no-playlist-metafiles, no-keep-subs.
 411                        Refer __init__.py for their implementation
 412
 413     The following parameters are not used by YoutubeDL itself, they are used by
 414     the downloader (see yt_dlp/downloader/common.py):
 415     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 416     max_filesize, test, noresizebuffer, retries, continuedl, noprogress, consoletitle,
 417     xattr_set_filesize, external_downloader_args, hls_use_mpegts, http_chunk_size.
 418
 419     The following options are used by the post processors:
 420     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 421                        otherwise prefer ffmpeg. (avconv support is deprecated)
 422     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 423                        to the binary or its containing directory.
 424     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 425                        and a list of additional command-line arguments for the
 426                        postprocessor/executable. The dict can also have "PP+EXE" keys
 427                        which are used when the given exe is used by the given PP.
 428                        Use 'default' as the name for arguments to passed to all PP
 429                        For compatibility with youtube-dl, a single list of args
 430                        can also be used
 431
 432     The following options are used by the extractors:
 433     extractor_retries: Number of times to retry for known errors
 434     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 435     hls_split_discontinuity: Split HLS playlists to different formats at
 436                        discontinuities such as ad breaks (default: False)
 437     extractor_args:    A dictionary of arguments to be passed to the extractors.
 438                        See "EXTRACTOR ARGUMENTS" for details.
 439                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 440     youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
 441                        If True (default), DASH manifests and related
 442                        data will be downloaded and processed by extractor.
 443                        You can reduce network I/O by disabling it if you don't
 444                        care about DASH. (only for youtube)
 445     youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
 446                        If True (default), HLS manifests and related
 447                        data will be downloaded and processed by extractor.
 448                        You can reduce network I/O by disabling it if you don't
 449                        care about HLS. (only for youtube)
 450     """
 451
 452     _NUMERIC_FIELDS = set((
 453         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 454         'timestamp', 'upload_year', 'upload_month', 'upload_day',
 455         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 456         'average_rating', 'comment_count', 'age_limit',
 457         'start_time', 'end_time',
 458         'chapter_number', 'season_number', 'episode_number',
 459         'track_number', 'disc_number', 'release_year',
 460         'playlist_index',
 461     ))
 462
 463     params = None
 464     _ies = []
 465     _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 466     _printed_messages = set()
 467     _first_webpage_request = True
 468     _download_retcode = None
 469     _num_downloads = None
 470     _playlist_level = 0
 471     _playlist_urls = set()
 472     _screen_file = None
 473
 474     def __init__(self, params=None, auto_init=True):
 475         """Create a FileDownloader object with the given options."""
 476         if params is None:
 477             params = {}
 478         self._ies = []
 479         self._ies_instances = {}
 480         self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 481         self._printed_messages = set()
 482         self._first_webpage_request = True
 483         self._post_hooks = []
 484         self._progress_hooks = []
 485         self._download_retcode = 0
 486         self._num_downloads = 0
 487         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 488         self._err_file = sys.stderr
 489         self.params = {
 490             # Default parameters
 491             'nocheckcertificate': False,
 492         }
 493         self.params.update(params)
 494         self.cache = Cache(self)
 495
 496         if sys.version_info < (3, 6):
 497             self.report_warning(
 498                 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 499
 500         def check_deprecated(param, option, suggestion):
 501             if self.params.get(param) is not None:
 502                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 503                 return True
 504             return False
 505
 506         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 507             if self.params.get('geo_verification_proxy') is None:
 508                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 509
 510         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 511         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 512         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 513
 514         for msg in self.params.get('warnings', []):
 515             self.report_warning(msg)
 516
 517         if self.params.get('final_ext'):
 518             if self.params.get('merge_output_format'):
 519                 self.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
 520             self.params['merge_output_format'] = self.params['final_ext']
 521
 522         if self.params.get('overwrites') is None:
 523             self.params.pop('overwrites', None)
 524         elif self.params.get('nooverwrites') is not None:
 525             # nooverwrites was unnecessarily changed to overwrites
 526             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 527             # This ensures compatibility with both keys
 528             self.params['overwrites'] = not self.params['nooverwrites']
 529         else:
 530             self.params['nooverwrites'] = not self.params['overwrites']
 531
 532         if params.get('bidi_workaround', False):
 533             try:
 534                 import pty
 535                 master, slave = pty.openpty()
 536                 width = compat_get_terminal_size().columns
 537                 if width is None:
 538                     width_args = []
 539                 else:
 540                     width_args = ['-w', str(width)]
 541                 sp_kwargs = dict(
 542                     stdin=subprocess.PIPE,
 543                     stdout=slave,
 544                     stderr=self._err_file)
 545                 try:
 546                     self._output_process = subprocess.Popen(
 547                         ['bidiv'] + width_args, **sp_kwargs
 548                     )
 549                 except OSError:
 550                     self._output_process = subprocess.Popen(
 551                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 552                 self._output_channel = os.fdopen(master, 'rb')
 553             except OSError as ose:
 554                 if ose.errno == errno.ENOENT:
 555                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 556                 else:
 557                     raise
 558
 559         if (sys.platform != 'win32'
 560                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 561                 and not params.get('restrictfilenames', False)):
 562             # Unicode filesystem API will throw errors (#1474, #13027)
 563             self.report_warning(
 564                 'Assuming --restrict-filenames since file system encoding '
 565                 'cannot encode all characters. '
 566                 'Set the LC_ALL environment variable to fix this.')
 567             self.params['restrictfilenames'] = True
 568
 569         self.outtmpl_dict = self.parse_outtmpl()
 570
 571         # Creating format selector here allows us to catch syntax errors before the extraction
 572         self.format_selector = (
 573             None if self.params.get('format') is None
 574             else self.build_format_selector(self.params['format']))
 575
 576         self._setup_opener()
 577
 578         """Preload the archive, if any is specified"""
 579         def preload_download_archive(fn):
 580             if fn is None:
 581                 return False
 582             self.write_debug('Loading archive file %r\n' % fn)
 583             try:
 584                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 585                     for line in archive_file:
 586                         self.archive.add(line.strip())
 587             except IOError as ioe:
 588                 if ioe.errno != errno.ENOENT:
 589                     raise
 590                 return False
 591             return True
 592
 593         self.archive = set()
 594         preload_download_archive(self.params.get('download_archive'))
 595
 596         if auto_init:
 597             self.print_debug_header()
 598             self.add_default_info_extractors()
 599
 600         for pp_def_raw in self.params.get('postprocessors', []):
 601             pp_def = dict(pp_def_raw)
 602             when = pp_def.pop('when', 'post_process')
 603             pp_class = get_postprocessor(pp_def.pop('key'))
 604             pp = pp_class(self, **compat_kwargs(pp_def))
 605             self.add_post_processor(pp, when=when)
 606
 607         for ph in self.params.get('post_hooks', []):
 608             self.add_post_hook(ph)
 609
 610         for ph in self.params.get('progress_hooks', []):
 611             self.add_progress_hook(ph)
 612
 613         register_socks_protocols()
 614
 615     def warn_if_short_id(self, argv):
 616         # short YouTube ID starting with dash?
 617         idxs = [
 618             i for i, a in enumerate(argv)
 619             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 620         if idxs:
 621             correct_argv = (
 622                 ['yt-dlp']
 623                 + [a for i, a in enumerate(argv) if i not in idxs]
 624                 + ['--'] + [argv[i] for i in idxs]
 625             )
 626             self.report_warning(
 627                 'Long argument string detected. '
 628                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 629                 args_to_str(correct_argv))
 630
 631     def add_info_extractor(self, ie):
 632         """Add an InfoExtractor object to the end of the list."""
 633         self._ies.append(ie)
 634         if not isinstance(ie, type):
 635             self._ies_instances[ie.ie_key()] = ie
 636             ie.set_downloader(self)
 637
 638     def get_info_extractor(self, ie_key):
 639         """
 640         Get an instance of an IE with name ie_key, it will try to get one from
 641         the _ies list, if there's no instance it will create a new one and add
 642         it to the extractor list.
 643         """
 644         ie = self._ies_instances.get(ie_key)
 645         if ie is None:
 646             ie = get_info_extractor(ie_key)()
 647             self.add_info_extractor(ie)
 648         return ie
 649
 650     def add_default_info_extractors(self):
 651         """
 652         Add the InfoExtractors returned by gen_extractors to the end of the list
 653         """
 654         for ie in gen_extractor_classes():
 655             self.add_info_extractor(ie)
 656
 657     def add_post_processor(self, pp, when='post_process'):
 658         """Add a PostProcessor object to the end of the chain."""
 659         self._pps[when].append(pp)
 660         pp.set_downloader(self)
 661
 662     def add_post_hook(self, ph):
 663         """Add the post hook"""
 664         self._post_hooks.append(ph)
 665
 666     def add_progress_hook(self, ph):
 667         """Add the progress hook (currently only for the file downloader)"""
 668         self._progress_hooks.append(ph)
 669
 670     def _bidi_workaround(self, message):
 671         if not hasattr(self, '_output_channel'):
 672             return message
 673
 674         assert hasattr(self, '_output_process')
 675         assert isinstance(message, compat_str)
 676         line_count = message.count('\n') + 1
 677         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 678         self._output_process.stdin.flush()
 679         res = ''.join(self._output_channel.readline().decode('utf-8')
 680                       for _ in range(line_count))
 681         return res[:-len('\n')]
 682
 683     def _write_string(self, message, out=None, only_once=False):
 684         if only_once:
 685             if message in self._printed_messages:
 686                 return
 687             self._printed_messages.add(message)
 688         write_string(message, out=out, encoding=self.params.get('encoding'))
 689
 690     def to_stdout(self, message, skip_eol=False, quiet=False):
 691         """Print message to stdout"""
 692         if self.params.get('logger'):
 693             self.params['logger'].debug(message)
 694         elif not quiet or self.params.get('verbose'):
 695             self._write_string(
 696                 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 697                 self._err_file if quiet else self._screen_file)
 698
 699     def to_stderr(self, message, only_once=False):
 700         """Print message to stderr"""
 701         assert isinstance(message, compat_str)
 702         if self.params.get('logger'):
 703             self.params['logger'].error(message)
 704         else:
 705             self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
 706
 707     def to_console_title(self, message):
 708         if not self.params.get('consoletitle', False):
 709             return
 710         if compat_os_name == 'nt':
 711             if ctypes.windll.kernel32.GetConsoleWindow():
 712                 # c_wchar_p() might not be necessary if `message` is
 713                 # already of type unicode()
 714                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 715         elif 'TERM' in os.environ:
 716             self._write_string('\033]0;%s\007' % message, self._screen_file)
 717
 718     def save_console_title(self):
 719         if not self.params.get('consoletitle', False):
 720             return
 721         if self.params.get('simulate'):
 722             return
 723         if compat_os_name != 'nt' and 'TERM' in os.environ:
 724             # Save the title on stack
 725             self._write_string('\033[22;0t', self._screen_file)
 726
 727     def restore_console_title(self):
 728         if not self.params.get('consoletitle', False):
 729             return
 730         if self.params.get('simulate'):
 731             return
 732         if compat_os_name != 'nt' and 'TERM' in os.environ:
 733             # Restore the title from stack
 734             self._write_string('\033[23;0t', self._screen_file)
 735
 736     def __enter__(self):
 737         self.save_console_title()
 738         return self
 739
 740     def __exit__(self, *args):
 741         self.restore_console_title()
 742
 743         if self.params.get('cookiefile') is not None:
 744             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 745
 746     def trouble(self, message=None, tb=None):
 747         """Determine action to take when a download problem appears.
 748
 749         Depending on if the downloader has been configured to ignore
 750         download errors or not, this method may throw an exception or
 751         not when errors are found, after printing the message.
 752
 753         tb, if given, is additional traceback information.
 754         """
 755         if message is not None:
 756             self.to_stderr(message)
 757         if self.params.get('verbose'):
 758             if tb is None:
 759                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 760                     tb = ''
 761                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 762                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 763                     tb += encode_compat_str(traceback.format_exc())
 764                 else:
 765                     tb_data = traceback.format_list(traceback.extract_stack())
 766                     tb = ''.join(tb_data)
 767             if tb:
 768                 self.to_stderr(tb)
 769         if not self.params.get('ignoreerrors', False):
 770             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 771                 exc_info = sys.exc_info()[1].exc_info
 772             else:
 773                 exc_info = sys.exc_info()
 774             raise DownloadError(message, exc_info)
 775         self._download_retcode = 1
 776
 777     def to_screen(self, message, skip_eol=False):
 778         """Print message to stdout if not in quiet mode"""
 779         self.to_stdout(
 780             message, skip_eol, quiet=self.params.get('quiet', False))
 781
 782     def report_warning(self, message, only_once=False):
 783         '''
 784         Print the message to stderr, it will be prefixed with 'WARNING:'
 785         If stderr is a tty file the 'WARNING:' will be colored
 786         '''
 787         if self.params.get('logger') is not None:
 788             self.params['logger'].warning(message)
 789         else:
 790             if self.params.get('no_warnings'):
 791                 return
 792             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 793                 _msg_header = '\033[0;33mWARNING:\033[0m'
 794             else:
 795                 _msg_header = 'WARNING:'
 796             warning_message = '%s %s' % (_msg_header, message)
 797             self.to_stderr(warning_message, only_once)
 798
 799     def report_error(self, message, tb=None):
 800         '''
 801         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 802         in red if stderr is a tty file.
 803         '''
 804         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 805             _msg_header = '\033[0;31mERROR:\033[0m'
 806         else:
 807             _msg_header = 'ERROR:'
 808         error_message = '%s %s' % (_msg_header, message)
 809         self.trouble(error_message, tb)
 810
 811     def write_debug(self, message, only_once=False):
 812         '''Log debug message or Print message to stderr'''
 813         if not self.params.get('verbose', False):
 814             return
 815         message = '[debug] %s' % message
 816         if self.params.get('logger'):
 817             self.params['logger'].debug(message)
 818         else:
 819             self.to_stderr(message, only_once)
 820
 821     def report_file_already_downloaded(self, file_name):
 822         """Report file has already been fully downloaded."""
 823         try:
 824             self.to_screen('[download] %s has already been downloaded' % file_name)
 825         except UnicodeEncodeError:
 826             self.to_screen('[download] The file has already been downloaded')
 827
 828     def report_file_delete(self, file_name):
 829         """Report that existing file will be deleted."""
 830         try:
 831             self.to_screen('Deleting existing file %s' % file_name)
 832         except UnicodeEncodeError:
 833             self.to_screen('Deleting existing file')
 834
 835     def parse_outtmpl(self):
 836         outtmpl_dict = self.params.get('outtmpl', {})
 837         if not isinstance(outtmpl_dict, dict):
 838             outtmpl_dict = {'default': outtmpl_dict}
 839         outtmpl_dict.update({
 840             k: v for k, v in DEFAULT_OUTTMPL.items()
 841             if not outtmpl_dict.get(k)})
 842         for key, val in outtmpl_dict.items():
 843             if isinstance(val, bytes):
 844                 self.report_warning(
 845                     'Parameter outtmpl is bytes, but should be a unicode string. '
 846                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 847         return outtmpl_dict
 848
 849     def get_output_path(self, dir_type='', filename=None):
 850         paths = self.params.get('paths', {})
 851         assert isinstance(paths, dict)
 852         path = os.path.join(
 853             expand_path(paths.get('home', '').strip()),
 854             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
 855             filename or '')
 856
 857         # Temporary fix for #4787
 858         # 'Treat' all problem characters by passing filename through preferredencoding
 859         # to workaround encoding issues with subprocess on python2 @ Windows
 860         if sys.version_info < (3, 0) and sys.platform == 'win32':
 861             path = encodeFilename(path, True).decode(preferredencoding())
 862         return sanitize_path(path, force=self.params.get('windowsfilenames'))
 863
 864     @staticmethod
 865     def _outtmpl_expandpath(outtmpl):
 866         # expand_path translates '%%' into '%' and '$$' into '$'
 867         # correspondingly that is not what we want since we need to keep
 868         # '%%' intact for template dict substitution step. Working around
 869         # with boundary-alike separator hack.
 870         sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 871         outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 872
 873         # outtmpl should be expand_path'ed before template dict substitution
 874         # because meta fields may contain env variables we don't want to
 875         # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 876         # title "Hello $PATH", we don't want `$PATH` to be expanded.
 877         return expand_path(outtmpl).replace(sep, '')
 878
 879     @staticmethod
 880     def escape_outtmpl(outtmpl):
 881         ''' Escape any remaining strings like %s, %abc% etc. '''
 882         return re.sub(
 883             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
 884             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
 885             outtmpl)
 886
 887     @classmethod
 888     def validate_outtmpl(cls, outtmpl):
 889         ''' @return None or Exception object '''
 890         outtmpl = re.sub(
 891             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljq]'),
 892             lambda mobj: f'{mobj.group(0)[:-1]}s',
 893             cls._outtmpl_expandpath(outtmpl))
 894         try:
 895             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
 896             return None
 897         except ValueError as err:
 898             return err
 899
 900     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
 901         """ Make the template and info_dict suitable for substitution : ydl.outtmpl_escape(outtmpl) % info_dict """
 902         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
 903
 904         info_dict = dict(info_dict)  # Do not sanitize so as not to consume LazyList
 905         for key in ('__original_infodict', '__postprocessors'):
 906             info_dict.pop(key, None)
 907         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
 908             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
 909             if info_dict.get('duration', None) is not None
 910             else None)
 911         info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
 912         if info_dict.get('resolution') is None:
 913             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
 914
 915         # For fields playlist_index and autonumber convert all occurrences
 916         # of %(field)s to %(field)0Nd for backward compatibility
 917         field_size_compat_map = {
 918             'playlist_index': len(str(info_dict.get('_last_playlist_index') or '')),
 919             'autonumber': self.params.get('autonumber_size') or 5,
 920         }
 921
 922         TMPL_DICT = {}
 923         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljq]'))
 924         MATH_FUNCTIONS = {
 925             '+': float.__add__,
 926             '-': float.__sub__,
 927         }
 928         # Field is of the form key1.key2...
 929         # where keys (except first) can be string, int or slice
 930         FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
 931         MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
 932         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
 933         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
 934             (?P<negate>-)?
 935             (?P<fields>{field})
 936             (?P<maths>(?:{math_op}{math_field})*)
 937             (?:>(?P<strf_format>.+?))?
 938             (?:\|(?P<default>.*?))?
 939             $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
 940
 941         def _traverse_infodict(k):
 942             k = k.split('.')
 943             if k[0] == '':
 944                 k.pop(0)
 945             return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
 946
 947         def get_value(mdict):
 948             # Object traversal
 949             value = _traverse_infodict(mdict['fields'])
 950             # Negative
 951             if mdict['negate']:
 952                 value = float_or_none(value)
 953                 if value is not None:
 954                     value *= -1
 955             # Do maths
 956             offset_key = mdict['maths']
 957             if offset_key:
 958                 value = float_or_none(value)
 959                 operator = None
 960                 while offset_key:
 961                     item = re.match(
 962                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
 963                         offset_key).group(0)
 964                     offset_key = offset_key[len(item):]
 965                     if operator is None:
 966                         operator = MATH_FUNCTIONS[item]
 967                         continue
 968                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
 969                     offset = float_or_none(item)
 970                     if offset is None:
 971                         offset = float_or_none(_traverse_infodict(item))
 972                     try:
 973                         value = operator(value, multiplier * offset)
 974                     except (TypeError, ZeroDivisionError):
 975                         return None
 976                     operator = None
 977             # Datetime formatting
 978             if mdict['strf_format']:
 979                 value = strftime_or_none(value, mdict['strf_format'])
 980
 981             return value
 982
 983         na = self.params.get('outtmpl_na_placeholder', 'NA')
 984
 985         def _dumpjson_default(obj):
 986             if isinstance(obj, (set, LazyList)):
 987                 return list(obj)
 988             raise TypeError(f'Object of type {type(obj).__name__} is not JSON serializable')
 989
 990         def create_key(outer_mobj):
 991             if not outer_mobj.group('has_key'):
 992                 return f'%{outer_mobj.group(0)}'
 993             key = outer_mobj.group('key')
 994             mobj = re.match(INTERNAL_FORMAT_RE, key)
 995             if mobj is None:
 996                 value, default, mobj = None, na, {'fields': ''}
 997             else:
 998                 mobj = mobj.groupdict()
 999                 default = mobj['default'] if mobj['default'] is not None else na
1000                 value = get_value(mobj)
1001
1002             fmt = outer_mobj.group('format')
1003             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1004                 fmt = '0{:d}d'.format(field_size_compat_map[key])
1005
1006             value = default if value is None else value
1007
1008             str_fmt = f'{fmt[:-1]}s'
1009             if fmt[-1] == 'l':
1010                 value, fmt = ', '.join(variadic(value)), str_fmt
1011             elif fmt[-1] == 'j':
1012                 value, fmt = json.dumps(value, default=_dumpjson_default), str_fmt
1013             elif fmt[-1] == 'q':
1014                 value, fmt = compat_shlex_quote(str(value)), str_fmt
1015             elif fmt[-1] == 'c':
1016                 value = str(value)
1017                 if value is None:
1018                     value, fmt = default, 's'
1019                 else:
1020                     value = value[0]
1021             elif fmt[-1] not in 'rs':  # numeric
1022                 value = float_or_none(value)
1023                 if value is None:
1024                     value, fmt = default, 's'
1025
1026             if sanitize:
1027                 if fmt[-1] == 'r':
1028                     # If value is an object, sanitize might convert it to a string
1029                     # So we convert it to repr first
1030                     value, fmt = repr(value), str_fmt
1031                 if fmt[-1] in 'csr':
1032                     value = sanitize(mobj['fields'].split('.')[-1], value)
1033
1034             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1035             TMPL_DICT[key] = value
1036             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1037
1038         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1039
1040     def _prepare_filename(self, info_dict, tmpl_type='default'):
1041         try:
1042             sanitize = lambda k, v: sanitize_filename(
1043                 compat_str(v),
1044                 restricted=self.params.get('restrictfilenames'),
1045                 is_id=(k == 'id' or k.endswith('_id')))
1046             outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
1047             outtmpl, template_dict = self.prepare_outtmpl(outtmpl, info_dict, sanitize)
1048             outtmpl = self.escape_outtmpl(self._outtmpl_expandpath(outtmpl))
1049             filename = outtmpl % template_dict
1050
1051             force_ext = OUTTMPL_TYPES.get(tmpl_type)
1052             if force_ext is not None:
1053                 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1054
1055             # https://github.com/blackjack4494/youtube-dlc/issues/85
1056             trim_file_name = self.params.get('trim_file_name', False)
1057             if trim_file_name:
1058                 fn_groups = filename.rsplit('.')
1059                 ext = fn_groups[-1]
1060                 sub_ext = ''
1061                 if len(fn_groups) > 2:
1062                     sub_ext = fn_groups[-2]
1063                 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
1064
1065             return filename
1066         except ValueError as err:
1067             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1068             return None
1069
1070     def prepare_filename(self, info_dict, dir_type='', warn=False):
1071         """Generate the output filename."""
1072
1073         filename = self._prepare_filename(info_dict, dir_type or 'default')
1074
1075         if warn:
1076             if not self.params.get('paths'):
1077                 pass
1078             elif filename == '-':
1079                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1080             elif os.path.isabs(filename):
1081                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1082         if filename == '-' or not filename:
1083             return filename
1084
1085         return self.get_output_path(dir_type, filename)
1086
1087     def _match_entry(self, info_dict, incomplete=False, silent=False):
1088         """ Returns None if the file should be downloaded """
1089
1090         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1091
1092         def check_filter():
1093             if 'title' in info_dict:
1094                 # This can happen when we're just evaluating the playlist
1095                 title = info_dict['title']
1096                 matchtitle = self.params.get('matchtitle', False)
1097                 if matchtitle:
1098                     if not re.search(matchtitle, title, re.IGNORECASE):
1099                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1100                 rejecttitle = self.params.get('rejecttitle', False)
1101                 if rejecttitle:
1102                     if re.search(rejecttitle, title, re.IGNORECASE):
1103                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1104             date = info_dict.get('upload_date')
1105             if date is not None:
1106                 dateRange = self.params.get('daterange', DateRange())
1107                 if date not in dateRange:
1108                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1109             view_count = info_dict.get('view_count')
1110             if view_count is not None:
1111                 min_views = self.params.get('min_views')
1112                 if min_views is not None and view_count < min_views:
1113                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1114                 max_views = self.params.get('max_views')
1115                 if max_views is not None and view_count > max_views:
1116                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1117             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1118                 return 'Skipping "%s" because it is age restricted' % video_title
1119
1120             if not incomplete:
1121                 match_filter = self.params.get('match_filter')
1122                 if match_filter is not None:
1123                     ret = match_filter(info_dict)
1124                     if ret is not None:
1125                         return ret
1126             return None
1127
1128         if self.in_download_archive(info_dict):
1129             reason = '%s has already been recorded in the archive' % video_title
1130             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1131         else:
1132             reason = check_filter()
1133             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1134         if reason is not None:
1135             if not silent:
1136                 self.to_screen('[download] ' + reason)
1137             if self.params.get(break_opt, False):
1138                 raise break_err()
1139         return reason
1140
1141     @staticmethod
1142     def add_extra_info(info_dict, extra_info):
1143         '''Set the keys from extra_info in info dict if they are missing'''
1144         for key, value in extra_info.items():
1145             info_dict.setdefault(key, value)
1146
1147     def extract_info(self, url, download=True, ie_key=None, extra_info={},
1148                      process=True, force_generic_extractor=False):
1149         """
1150         Return a list with a dictionary for each video extracted.
1151
1152         Arguments:
1153         url -- URL to extract
1154
1155         Keyword arguments:
1156         download -- whether to download videos during extraction
1157         ie_key -- extractor key hint
1158         extra_info -- dictionary containing the extra values to add to each result
1159         process -- whether to resolve all unresolved references (URLs, playlist items),
1160             must be True for download to work.
1161         force_generic_extractor -- force using the generic extractor
1162         """
1163
1164         if not ie_key and force_generic_extractor:
1165             ie_key = 'Generic'
1166
1167         if ie_key:
1168             ies = [self.get_info_extractor(ie_key)]
1169         else:
1170             ies = self._ies
1171
1172         for ie in ies:
1173             if not ie.suitable(url):
1174                 continue
1175
1176             ie_key = ie.ie_key()
1177             ie = self.get_info_extractor(ie_key)
1178             if not ie.working():
1179                 self.report_warning('The program functionality for this site has been marked as broken, '
1180                                     'and will probably not work.')
1181
1182             try:
1183                 temp_id = str_or_none(
1184                     ie.extract_id(url) if callable(getattr(ie, 'extract_id', None))
1185                     else ie._match_id(url))
1186             except (AssertionError, IndexError, AttributeError):
1187                 temp_id = None
1188             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1189                 self.to_screen("[%s] %s: has already been recorded in archive" % (
1190                                ie_key, temp_id))
1191                 break
1192             return self.__extract_info(url, ie, download, extra_info, process)
1193         else:
1194             self.report_error('no suitable InfoExtractor for URL %s' % url)
1195
1196     def __handle_extraction_exceptions(func, handle_all_errors=True):
1197         def wrapper(self, *args, **kwargs):
1198             try:
1199                 return func(self, *args, **kwargs)
1200             except GeoRestrictedError as e:
1201                 msg = e.msg
1202                 if e.countries:
1203                     msg += '\nThis video is available in %s.' % ', '.join(
1204                         map(ISO3166Utils.short2full, e.countries))
1205                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1206                 self.report_error(msg)
1207             except ExtractorError as e:  # An error we somewhat expected
1208                 self.report_error(compat_str(e), e.format_traceback())
1209             except ThrottledDownload:
1210                 self.to_stderr('\r')
1211                 self.report_warning('The download speed is below throttle limit. Re-extracting data')
1212                 return wrapper(self, *args, **kwargs)
1213             except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
1214                 raise
1215             except Exception as e:
1216                 if handle_all_errors and self.params.get('ignoreerrors', False):
1217                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
1218                 else:
1219                     raise
1220         return wrapper
1221
1222     @__handle_extraction_exceptions
1223     def __extract_info(self, url, ie, download, extra_info, process):
1224         ie_result = ie.extract(url)
1225         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1226             return
1227         if isinstance(ie_result, list):
1228             # Backwards compatibility: old IE result format
1229             ie_result = {
1230                 '_type': 'compat_list',
1231                 'entries': ie_result,
1232             }
1233         if extra_info.get('original_url'):
1234             ie_result.setdefault('original_url', extra_info['original_url'])
1235         self.add_default_extra_info(ie_result, ie, url)
1236         if process:
1237             return self.process_ie_result(ie_result, download, extra_info)
1238         else:
1239             return ie_result
1240
1241     def add_default_extra_info(self, ie_result, ie, url):
1242         if url is not None:
1243             self.add_extra_info(ie_result, {
1244                 'webpage_url': url,
1245                 'original_url': url,
1246                 'webpage_url_basename': url_basename(url),
1247             })
1248         if ie is not None:
1249             self.add_extra_info(ie_result, {
1250                 'extractor': ie.IE_NAME,
1251                 'extractor_key': ie.ie_key(),
1252             })
1253
1254     def process_ie_result(self, ie_result, download=True, extra_info={}):
1255         """
1256         Take the result of the ie(may be modified) and resolve all unresolved
1257         references (URLs, playlist items).
1258
1259         It will also download the videos if 'download'.
1260         Returns the resolved ie_result.
1261         """
1262         result_type = ie_result.get('_type', 'video')
1263
1264         if result_type in ('url', 'url_transparent'):
1265             ie_result['url'] = sanitize_url(ie_result['url'])
1266             if ie_result.get('original_url'):
1267                 extra_info.setdefault('original_url', ie_result['original_url'])
1268
1269             extract_flat = self.params.get('extract_flat', False)
1270             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1271                     or extract_flat is True):
1272                 info_copy = ie_result.copy()
1273                 self.add_extra_info(info_copy, extra_info)
1274                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1275                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1276                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1277                 return ie_result
1278
1279         if result_type == 'video':
1280             self.add_extra_info(ie_result, extra_info)
1281             ie_result = self.process_video_result(ie_result, download=download)
1282             additional_urls = (ie_result or {}).get('additional_urls')
1283             if additional_urls:
1284                 # TODO: Improve MetadataFromFieldPP to allow setting a list
1285                 if isinstance(additional_urls, compat_str):
1286                     additional_urls = [additional_urls]
1287                 self.to_screen(
1288                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1289                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1290                 ie_result['additional_entries'] = [
1291                     self.extract_info(
1292                         url, download, extra_info,
1293                         force_generic_extractor=self.params.get('force_generic_extractor'))
1294                     for url in additional_urls
1295                 ]
1296             return ie_result
1297         elif result_type == 'url':
1298             # We have to add extra_info to the results because it may be
1299             # contained in a playlist
1300             return self.extract_info(
1301                 ie_result['url'], download,
1302                 ie_key=ie_result.get('ie_key'),
1303                 extra_info=extra_info)
1304         elif result_type == 'url_transparent':
1305             # Use the information from the embedding page
1306             info = self.extract_info(
1307                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1308                 extra_info=extra_info, download=False, process=False)
1309
1310             # extract_info may return None when ignoreerrors is enabled and
1311             # extraction failed with an error, don't crash and return early
1312             # in this case
1313             if not info:
1314                 return info
1315
1316             force_properties = dict(
1317                 (k, v) for k, v in ie_result.items() if v is not None)
1318             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1319                 if f in force_properties:
1320                     del force_properties[f]
1321             new_result = info.copy()
1322             new_result.update(force_properties)
1323
1324             # Extracted info may not be a video result (i.e.
1325             # info.get('_type', 'video') != video) but rather an url or
1326             # url_transparent. In such cases outer metadata (from ie_result)
1327             # should be propagated to inner one (info). For this to happen
1328             # _type of info should be overridden with url_transparent. This
1329             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1330             if new_result.get('_type') == 'url':
1331                 new_result['_type'] = 'url_transparent'
1332
1333             return self.process_ie_result(
1334                 new_result, download=download, extra_info=extra_info)
1335         elif result_type in ('playlist', 'multi_video'):
1336             # Protect from infinite recursion due to recursively nested playlists
1337             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1338             webpage_url = ie_result['webpage_url']
1339             if webpage_url in self._playlist_urls:
1340                 self.to_screen(
1341                     '[download] Skipping already downloaded playlist: %s'
1342                     % ie_result.get('title') or ie_result.get('id'))
1343                 return
1344
1345             self._playlist_level += 1
1346             self._playlist_urls.add(webpage_url)
1347             self._sanitize_thumbnails(ie_result)
1348             try:
1349                 return self.__process_playlist(ie_result, download)
1350             finally:
1351                 self._playlist_level -= 1
1352                 if not self._playlist_level:
1353                     self._playlist_urls.clear()
1354         elif result_type == 'compat_list':
1355             self.report_warning(
1356                 'Extractor %s returned a compat_list result. '
1357                 'It needs to be updated.' % ie_result.get('extractor'))
1358
1359             def _fixup(r):
1360                 self.add_extra_info(r, {
1361                     'extractor': ie_result['extractor'],
1362                     'webpage_url': ie_result['webpage_url'],
1363                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1364                     'extractor_key': ie_result['extractor_key'],
1365                 })
1366                 return r
1367             ie_result['entries'] = [
1368                 self.process_ie_result(_fixup(r), download, extra_info)
1369                 for r in ie_result['entries']
1370             ]
1371             return ie_result
1372         else:
1373             raise Exception('Invalid result type: %s' % result_type)
1374
1375     def _ensure_dir_exists(self, path):
1376         return make_dir(path, self.report_error)
1377
1378     def __process_playlist(self, ie_result, download):
1379         # We process each entry in the playlist
1380         playlist = ie_result.get('title') or ie_result.get('id')
1381         self.to_screen('[download] Downloading playlist: %s' % playlist)
1382
1383         if 'entries' not in ie_result:
1384             raise EntryNotInPlaylist()
1385         incomplete_entries = bool(ie_result.get('requested_entries'))
1386         if incomplete_entries:
1387             def fill_missing_entries(entries, indexes):
1388                 ret = [None] * max(*indexes)
1389                 for i, entry in zip(indexes, entries):
1390                     ret[i - 1] = entry
1391                 return ret
1392             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1393
1394         playlist_results = []
1395
1396         playliststart = self.params.get('playliststart', 1)
1397         playlistend = self.params.get('playlistend')
1398         # For backwards compatibility, interpret -1 as whole list
1399         if playlistend == -1:
1400             playlistend = None
1401
1402         playlistitems_str = self.params.get('playlist_items')
1403         playlistitems = None
1404         if playlistitems_str is not None:
1405             def iter_playlistitems(format):
1406                 for string_segment in format.split(','):
1407                     if '-' in string_segment:
1408                         start, end = string_segment.split('-')
1409                         for item in range(int(start), int(end) + 1):
1410                             yield int(item)
1411                     else:
1412                         yield int(string_segment)
1413             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1414
1415         ie_entries = ie_result['entries']
1416         msg = (
1417             'Downloading %d videos' if not isinstance(ie_entries, list)
1418             else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
1419         if not isinstance(ie_entries, (list, PagedList)):
1420             ie_entries = LazyList(ie_entries)
1421
1422         def get_entry(i):
1423             return YoutubeDL.__handle_extraction_exceptions(
1424                 lambda self, i: ie_entries[i - 1],
1425                 False
1426             )(self, i)
1427
1428         entries = []
1429         for i in playlistitems or itertools.count(playliststart):
1430             if playlistitems is None and playlistend is not None and playlistend < i:
1431                 break
1432             entry = None
1433             try:
1434                 entry = get_entry(i)
1435                 if entry is None:
1436                     raise EntryNotInPlaylist()
1437             except (IndexError, EntryNotInPlaylist):
1438                 if incomplete_entries:
1439                     raise EntryNotInPlaylist()
1440                 elif not playlistitems:
1441                     break
1442             entries.append(entry)
1443             try:
1444                 if entry is not None:
1445                     self._match_entry(entry, incomplete=True, silent=True)
1446             except (ExistingVideoReached, RejectedVideoReached):
1447                 break
1448         ie_result['entries'] = entries
1449
1450         # Save playlist_index before re-ordering
1451         entries = [
1452             ((playlistitems[i - 1] if playlistitems else i), entry)
1453             for i, entry in enumerate(entries, 1)
1454             if entry is not None]
1455         n_entries = len(entries)
1456
1457         if not playlistitems and (playliststart or playlistend):
1458             playlistitems = list(range(playliststart, playliststart + n_entries))
1459         ie_result['requested_entries'] = playlistitems
1460
1461         if self.params.get('allow_playlist_files', True):
1462             ie_copy = {
1463                 'playlist': playlist,
1464                 'playlist_id': ie_result.get('id'),
1465                 'playlist_title': ie_result.get('title'),
1466                 'playlist_uploader': ie_result.get('uploader'),
1467                 'playlist_uploader_id': ie_result.get('uploader_id'),
1468                 'playlist_index': 0,
1469             }
1470             ie_copy.update(dict(ie_result))
1471
1472             if self.params.get('writeinfojson', False):
1473                 infofn = self.prepare_filename(ie_copy, 'pl_infojson')
1474                 if not self._ensure_dir_exists(encodeFilename(infofn)):
1475                     return
1476                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
1477                     self.to_screen('[info] Playlist metadata is already present')
1478                 else:
1479                     self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
1480                     try:
1481                         write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
1482                     except (OSError, IOError):
1483                         self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
1484
1485             # TODO: This should be passed to ThumbnailsConvertor if necessary
1486             self._write_thumbnails(ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1487
1488             if self.params.get('writedescription', False):
1489                 descfn = self.prepare_filename(ie_copy, 'pl_description')
1490                 if not self._ensure_dir_exists(encodeFilename(descfn)):
1491                     return
1492                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
1493                     self.to_screen('[info] Playlist description is already present')
1494                 elif ie_result.get('description') is None:
1495                     self.report_warning('There\'s no playlist description to write.')
1496                 else:
1497                     try:
1498                         self.to_screen('[info] Writing playlist description to: ' + descfn)
1499                         with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1500                             descfile.write(ie_result['description'])
1501                     except (OSError, IOError):
1502                         self.report_error('Cannot write playlist description file ' + descfn)
1503                         return
1504
1505         if self.params.get('playlistreverse', False):
1506             entries = entries[::-1]
1507         if self.params.get('playlistrandom', False):
1508             random.shuffle(entries)
1509
1510         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1511
1512         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1513         failures = 0
1514         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1515         for i, entry_tuple in enumerate(entries, 1):
1516             playlist_index, entry = entry_tuple
1517             if 'playlist_index' in self.params.get('compat_options', []):
1518                 playlist_index = playlistitems[i - 1] if playlistitems else i
1519             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1520             # This __x_forwarded_for_ip thing is a bit ugly but requires
1521             # minimal changes
1522             if x_forwarded_for:
1523                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1524             extra = {
1525                 'n_entries': n_entries,
1526                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1527                 'playlist_index': playlist_index,
1528                 'playlist_autonumber': i,
1529                 'playlist': playlist,
1530                 'playlist_id': ie_result.get('id'),
1531                 'playlist_title': ie_result.get('title'),
1532                 'playlist_uploader': ie_result.get('uploader'),
1533                 'playlist_uploader_id': ie_result.get('uploader_id'),
1534                 'extractor': ie_result['extractor'],
1535                 'webpage_url': ie_result['webpage_url'],
1536                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1537                 'extractor_key': ie_result['extractor_key'],
1538             }
1539
1540             if self._match_entry(entry, incomplete=True) is not None:
1541                 continue
1542
1543             entry_result = self.__process_iterable_entry(entry, download, extra)
1544             if not entry_result:
1545                 failures += 1
1546             if failures >= max_failures:
1547                 self.report_error(
1548                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1549                 break
1550             # TODO: skip failed (empty) entries?
1551             playlist_results.append(entry_result)
1552         ie_result['entries'] = playlist_results
1553         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1554         return ie_result
1555
1556     @__handle_extraction_exceptions
1557     def __process_iterable_entry(self, entry, download, extra_info):
1558         return self.process_ie_result(
1559             entry, download=download, extra_info=extra_info)
1560
1561     def _build_format_filter(self, filter_spec):
1562         " Returns a function to filter the formats according to the filter_spec "
1563
1564         OPERATORS = {
1565             '<': operator.lt,
1566             '<=': operator.le,
1567             '>': operator.gt,
1568             '>=': operator.ge,
1569             '=': operator.eq,
1570             '!=': operator.ne,
1571         }
1572         operator_rex = re.compile(r'''(?x)\s*
1573             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1574             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1575             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1576             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1577         m = operator_rex.fullmatch(filter_spec)
1578         if m:
1579             try:
1580                 comparison_value = int(m.group('value'))
1581             except ValueError:
1582                 comparison_value = parse_filesize(m.group('value'))
1583                 if comparison_value is None:
1584                     comparison_value = parse_filesize(m.group('value') + 'B')
1585                 if comparison_value is None:
1586                     raise ValueError(
1587                         'Invalid value %r in format specification %r' % (
1588                             m.group('value'), filter_spec))
1589             op = OPERATORS[m.group('op')]
1590
1591         if not m:
1592             STR_OPERATORS = {
1593                 '=': operator.eq,
1594                 '^=': lambda attr, value: attr.startswith(value),
1595                 '$=': lambda attr, value: attr.endswith(value),
1596                 '*=': lambda attr, value: value in attr,
1597             }
1598             str_operator_rex = re.compile(r'''(?x)\s*
1599                 (?P<key>[a-zA-Z0-9._-]+)\s*
1600                 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1601                 (?P<value>[a-zA-Z0-9._-]+)\s*
1602                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1603             m = str_operator_rex.fullmatch(filter_spec)
1604             if m:
1605                 comparison_value = m.group('value')
1606                 str_op = STR_OPERATORS[m.group('op')]
1607                 if m.group('negation'):
1608                     op = lambda attr, value: not str_op(attr, value)
1609                 else:
1610                     op = str_op
1611
1612         if not m:
1613             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1614
1615         def _filter(f):
1616             actual_value = f.get(m.group('key'))
1617             if actual_value is None:
1618                 return m.group('none_inclusive')
1619             return op(actual_value, comparison_value)
1620         return _filter
1621
1622     def _default_format_spec(self, info_dict, download=True):
1623
1624         def can_merge():
1625             merger = FFmpegMergerPP(self)
1626             return merger.available and merger.can_merge()
1627
1628         prefer_best = (
1629             not self.params.get('simulate')
1630             and download
1631             and (
1632                 not can_merge()
1633                 or info_dict.get('is_live', False)
1634                 or self.outtmpl_dict['default'] == '-'))
1635         compat = (
1636             prefer_best
1637             or self.params.get('allow_multiple_audio_streams', False)
1638             or 'format-spec' in self.params.get('compat_opts', []))
1639
1640         return (
1641             'best/bestvideo+bestaudio' if prefer_best
1642             else 'bestvideo*+bestaudio/best' if not compat
1643             else 'bestvideo+bestaudio/best')
1644
1645     def build_format_selector(self, format_spec):
1646         def syntax_error(note, start):
1647             message = (
1648                 'Invalid format specification: '
1649                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1650             return SyntaxError(message)
1651
1652         PICKFIRST = 'PICKFIRST'
1653         MERGE = 'MERGE'
1654         SINGLE = 'SINGLE'
1655         GROUP = 'GROUP'
1656         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1657
1658         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1659                                   'video': self.params.get('allow_multiple_video_streams', False)}
1660
1661         check_formats = self.params.get('check_formats')
1662
1663         def _parse_filter(tokens):
1664             filter_parts = []
1665             for type, string, start, _, _ in tokens:
1666                 if type == tokenize.OP and string == ']':
1667                     return ''.join(filter_parts)
1668                 else:
1669                     filter_parts.append(string)
1670
1671         def _remove_unused_ops(tokens):
1672             # Remove operators that we don't use and join them with the surrounding strings
1673             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1674             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1675             last_string, last_start, last_end, last_line = None, None, None, None
1676             for type, string, start, end, line in tokens:
1677                 if type == tokenize.OP and string == '[':
1678                     if last_string:
1679                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1680                         last_string = None
1681                     yield type, string, start, end, line
1682                     # everything inside brackets will be handled by _parse_filter
1683                     for type, string, start, end, line in tokens:
1684                         yield type, string, start, end, line
1685                         if type == tokenize.OP and string == ']':
1686                             break
1687                 elif type == tokenize.OP and string in ALLOWED_OPS:
1688                     if last_string:
1689                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1690                         last_string = None
1691                     yield type, string, start, end, line
1692                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1693                     if not last_string:
1694                         last_string = string
1695                         last_start = start
1696                         last_end = end
1697                     else:
1698                         last_string += string
1699             if last_string:
1700                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1701
1702         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1703             selectors = []
1704             current_selector = None
1705             for type, string, start, _, _ in tokens:
1706                 # ENCODING is only defined in python 3.x
1707                 if type == getattr(tokenize, 'ENCODING', None):
1708                     continue
1709                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1710                     current_selector = FormatSelector(SINGLE, string, [])
1711                 elif type == tokenize.OP:
1712                     if string == ')':
1713                         if not inside_group:
1714                             # ')' will be handled by the parentheses group
1715                             tokens.restore_last_token()
1716                         break
1717                     elif inside_merge and string in ['/', ',']:
1718                         tokens.restore_last_token()
1719                         break
1720                     elif inside_choice and string == ',':
1721                         tokens.restore_last_token()
1722                         break
1723                     elif string == ',':
1724                         if not current_selector:
1725                             raise syntax_error('"," must follow a format selector', start)
1726                         selectors.append(current_selector)
1727                         current_selector = None
1728                     elif string == '/':
1729                         if not current_selector:
1730                             raise syntax_error('"/" must follow a format selector', start)
1731                         first_choice = current_selector
1732                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1733                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1734                     elif string == '[':
1735                         if not current_selector:
1736                             current_selector = FormatSelector(SINGLE, 'best', [])
1737                         format_filter = _parse_filter(tokens)
1738                         current_selector.filters.append(format_filter)
1739                     elif string == '(':
1740                         if current_selector:
1741                             raise syntax_error('Unexpected "("', start)
1742                         group = _parse_format_selection(tokens, inside_group=True)
1743                         current_selector = FormatSelector(GROUP, group, [])
1744                     elif string == '+':
1745                         if not current_selector:
1746                             raise syntax_error('Unexpected "+"', start)
1747                         selector_1 = current_selector
1748                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1749                         if not selector_2:
1750                             raise syntax_error('Expected a selector', start)
1751                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1752                     else:
1753                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1754                 elif type == tokenize.ENDMARKER:
1755                     break
1756             if current_selector:
1757                 selectors.append(current_selector)
1758             return selectors
1759
1760         def _merge(formats_pair):
1761             format_1, format_2 = formats_pair
1762
1763             formats_info = []
1764             formats_info.extend(format_1.get('requested_formats', (format_1,)))
1765             formats_info.extend(format_2.get('requested_formats', (format_2,)))
1766
1767             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1768                 get_no_more = {'video': False, 'audio': False}
1769                 for (i, fmt_info) in enumerate(formats_info):
1770                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
1771                         formats_info.pop(i)
1772                         continue
1773                     for aud_vid in ['audio', 'video']:
1774                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1775                             if get_no_more[aud_vid]:
1776                                 formats_info.pop(i)
1777                                 break
1778                             get_no_more[aud_vid] = True
1779
1780             if len(formats_info) == 1:
1781                 return formats_info[0]
1782
1783             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1784             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1785
1786             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1787             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1788
1789             output_ext = self.params.get('merge_output_format')
1790             if not output_ext:
1791                 if the_only_video:
1792                     output_ext = the_only_video['ext']
1793                 elif the_only_audio and not video_fmts:
1794                     output_ext = the_only_audio['ext']
1795                 else:
1796                     output_ext = 'mkv'
1797
1798             new_dict = {
1799                 'requested_formats': formats_info,
1800                 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1801                 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1802                 'ext': output_ext,
1803             }
1804
1805             if the_only_video:
1806                 new_dict.update({
1807                     'width': the_only_video.get('width'),
1808                     'height': the_only_video.get('height'),
1809                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1810                     'fps': the_only_video.get('fps'),
1811                     'vcodec': the_only_video.get('vcodec'),
1812                     'vbr': the_only_video.get('vbr'),
1813                     'stretched_ratio': the_only_video.get('stretched_ratio'),
1814                 })
1815
1816             if the_only_audio:
1817                 new_dict.update({
1818                     'acodec': the_only_audio.get('acodec'),
1819                     'abr': the_only_audio.get('abr'),
1820                 })
1821
1822             return new_dict
1823
1824         def _check_formats(formats):
1825             if not check_formats:
1826                 yield from formats
1827                 return
1828             for f in formats:
1829                 self.to_screen('[info] Testing format %s' % f['format_id'])
1830                 temp_file = tempfile.NamedTemporaryFile(
1831                     suffix='.tmp', delete=False,
1832                     dir=self.get_output_path('temp') or None)
1833                 temp_file.close()
1834                 try:
1835                     success, _ = self.dl(temp_file.name, f, test=True)
1836                 except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1837                     success = False
1838                 finally:
1839                     if os.path.exists(temp_file.name):
1840                         try:
1841                             os.remove(temp_file.name)
1842                         except OSError:
1843                             self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1844                 if success:
1845                     yield f
1846                 else:
1847                     self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1848
1849         def _build_selector_function(selector):
1850             if isinstance(selector, list):  # ,
1851                 fs = [_build_selector_function(s) for s in selector]
1852
1853                 def selector_function(ctx):
1854                     for f in fs:
1855                         yield from f(ctx)
1856                 return selector_function
1857
1858             elif selector.type == GROUP:  # ()
1859                 selector_function = _build_selector_function(selector.selector)
1860
1861             elif selector.type == PICKFIRST:  # /
1862                 fs = [_build_selector_function(s) for s in selector.selector]
1863
1864                 def selector_function(ctx):
1865                     for f in fs:
1866                         picked_formats = list(f(ctx))
1867                         if picked_formats:
1868                             return picked_formats
1869                     return []
1870
1871             elif selector.type == MERGE:  # +
1872                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1873
1874                 def selector_function(ctx):
1875                     for pair in itertools.product(
1876                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1877                         yield _merge(pair)
1878
1879             elif selector.type == SINGLE:  # atom
1880                 format_spec = selector.selector or 'best'
1881
1882                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
1883                 if format_spec == 'all':
1884                     def selector_function(ctx):
1885                         yield from _check_formats(ctx['formats'])
1886                 elif format_spec == 'mergeall':
1887                     def selector_function(ctx):
1888                         formats = list(_check_formats(ctx['formats']))
1889                         if not formats:
1890                             return
1891                         merged_format = formats[-1]
1892                         for f in formats[-2::-1]:
1893                             merged_format = _merge((merged_format, f))
1894                         yield merged_format
1895
1896                 else:
1897                     format_fallback, format_reverse, format_idx = False, True, 1
1898                     mobj = re.match(
1899                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
1900                         format_spec)
1901                     if mobj is not None:
1902                         format_idx = int_or_none(mobj.group('n'), default=1)
1903                         format_reverse = mobj.group('bw')[0] == 'b'
1904                         format_type = (mobj.group('type') or [None])[0]
1905                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
1906                         format_modified = mobj.group('mod') is not None
1907
1908                         format_fallback = not format_type and not format_modified  # for b, w
1909                         _filter_f = (
1910                             (lambda f: f.get('%scodec' % format_type) != 'none')
1911                             if format_type and format_modified  # bv*, ba*, wv*, wa*
1912                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
1913                             if format_type  # bv, ba, wv, wa
1914                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1915                             if not format_modified  # b, w
1916                             else lambda f: True)  # b*, w*
1917                         filter_f = lambda f: _filter_f(f) and (
1918                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
1919                     else:
1920                         filter_f = ((lambda f: f.get('ext') == format_spec)
1921                                     if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']  # extension
1922                                     else (lambda f: f.get('format_id') == format_spec))  # id
1923
1924                     def selector_function(ctx):
1925                         formats = list(ctx['formats'])
1926                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
1927                         if format_fallback and ctx['incomplete_formats'] and not matches:
1928                             # for extractors with incomplete formats (audio only (soundcloud)
1929                             # or video only (imgur)) best/worst will fallback to
1930                             # best/worst {video,audio}-only format
1931                             matches = formats
1932                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
1933                         try:
1934                             yield matches[format_idx - 1]
1935                         except IndexError:
1936                             return
1937
1938             filters = [self._build_format_filter(f) for f in selector.filters]
1939
1940             def final_selector(ctx):
1941                 ctx_copy = copy.deepcopy(ctx)
1942                 for _filter in filters:
1943                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1944                 return selector_function(ctx_copy)
1945             return final_selector
1946
1947         stream = io.BytesIO(format_spec.encode('utf-8'))
1948         try:
1949             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1950         except tokenize.TokenError:
1951             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1952
1953         class TokenIterator(object):
1954             def __init__(self, tokens):
1955                 self.tokens = tokens
1956                 self.counter = 0
1957
1958             def __iter__(self):
1959                 return self
1960
1961             def __next__(self):
1962                 if self.counter >= len(self.tokens):
1963                     raise StopIteration()
1964                 value = self.tokens[self.counter]
1965                 self.counter += 1
1966                 return value
1967
1968             next = __next__
1969
1970             def restore_last_token(self):
1971                 self.counter -= 1
1972
1973         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1974         return _build_selector_function(parsed_selector)
1975
1976     def _calc_headers(self, info_dict):
1977         res = std_headers.copy()
1978
1979         add_headers = info_dict.get('http_headers')
1980         if add_headers:
1981             res.update(add_headers)
1982
1983         cookies = self._calc_cookies(info_dict)
1984         if cookies:
1985             res['Cookie'] = cookies
1986
1987         if 'X-Forwarded-For' not in res:
1988             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1989             if x_forwarded_for_ip:
1990                 res['X-Forwarded-For'] = x_forwarded_for_ip
1991
1992         return res
1993
1994     def _calc_cookies(self, info_dict):
1995         pr = sanitized_Request(info_dict['url'])
1996         self.cookiejar.add_cookie_header(pr)
1997         return pr.get_header('Cookie')
1998
1999     def _sanitize_thumbnails(self, info_dict):
2000         thumbnails = info_dict.get('thumbnails')
2001         if thumbnails is None:
2002             thumbnail = info_dict.get('thumbnail')
2003             if thumbnail:
2004                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2005         if thumbnails:
2006             thumbnails.sort(key=lambda t: (
2007                 t.get('preference') if t.get('preference') is not None else -1,
2008                 t.get('width') if t.get('width') is not None else -1,
2009                 t.get('height') if t.get('height') is not None else -1,
2010                 t.get('id') if t.get('id') is not None else '',
2011                 t.get('url')))
2012
2013             def thumbnail_tester():
2014                 if self.params.get('check_formats'):
2015                     test_all = True
2016                     to_screen = lambda msg: self.to_screen(f'[info] {msg}')
2017                 else:
2018                     test_all = False
2019                     to_screen = self.write_debug
2020
2021                 def test_thumbnail(t):
2022                     if not test_all and not t.get('_test_url'):
2023                         return True
2024                     to_screen('Testing thumbnail %s' % t['id'])
2025                     try:
2026                         self.urlopen(HEADRequest(t['url']))
2027                     except network_exceptions as err:
2028                         to_screen('Unable to connect to thumbnail %s URL "%s" - %s. Skipping...' % (
2029                             t['id'], t['url'], error_to_compat_str(err)))
2030                         return False
2031                     return True
2032
2033                 return test_thumbnail
2034
2035             for i, t in enumerate(thumbnails):
2036                 if t.get('id') is None:
2037                     t['id'] = '%d' % i
2038                 if t.get('width') and t.get('height'):
2039                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
2040                 t['url'] = sanitize_url(t['url'])
2041
2042             if self.params.get('check_formats') is not False:
2043                 info_dict['thumbnails'] = LazyList(filter(thumbnail_tester(), thumbnails[::-1])).reverse()
2044             else:
2045                 info_dict['thumbnails'] = thumbnails
2046
2047     def process_video_result(self, info_dict, download=True):
2048         assert info_dict.get('_type', 'video') == 'video'
2049
2050         if 'id' not in info_dict:
2051             raise ExtractorError('Missing "id" field in extractor result')
2052         if 'title' not in info_dict:
2053             raise ExtractorError('Missing "title" field in extractor result')
2054
2055         def report_force_conversion(field, field_not, conversion):
2056             self.report_warning(
2057                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2058                 % (field, field_not, conversion))
2059
2060         def sanitize_string_field(info, string_field):
2061             field = info.get(string_field)
2062             if field is None or isinstance(field, compat_str):
2063                 return
2064             report_force_conversion(string_field, 'a string', 'string')
2065             info[string_field] = compat_str(field)
2066
2067         def sanitize_numeric_fields(info):
2068             for numeric_field in self._NUMERIC_FIELDS:
2069                 field = info.get(numeric_field)
2070                 if field is None or isinstance(field, compat_numeric_types):
2071                     continue
2072                 report_force_conversion(numeric_field, 'numeric', 'int')
2073                 info[numeric_field] = int_or_none(field)
2074
2075         sanitize_string_field(info_dict, 'id')
2076         sanitize_numeric_fields(info_dict)
2077
2078         if 'playlist' not in info_dict:
2079             # It isn't part of a playlist
2080             info_dict['playlist'] = None
2081             info_dict['playlist_index'] = None
2082
2083         self._sanitize_thumbnails(info_dict)
2084
2085         thumbnail = info_dict.get('thumbnail')
2086         thumbnails = info_dict.get('thumbnails')
2087         if thumbnail:
2088             info_dict['thumbnail'] = sanitize_url(thumbnail)
2089         elif thumbnails:
2090             info_dict['thumbnail'] = thumbnails[-1]['url']
2091
2092         if info_dict.get('display_id') is None and 'id' in info_dict:
2093             info_dict['display_id'] = info_dict['id']
2094
2095         for ts_key, date_key in (
2096                 ('timestamp', 'upload_date'),
2097                 ('release_timestamp', 'release_date'),
2098         ):
2099             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2100                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2101                 # see http://bugs.python.org/issue1646728)
2102                 try:
2103                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2104                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2105                 except (ValueError, OverflowError, OSError):
2106                     pass
2107
2108         live_keys = ('is_live', 'was_live')
2109         live_status = info_dict.get('live_status')
2110         if live_status is None:
2111             for key in live_keys:
2112                 if info_dict.get(key) is False:
2113                     continue
2114                 if info_dict.get(key):
2115                     live_status = key
2116                 break
2117             if all(info_dict.get(key) is False for key in live_keys):
2118                 live_status = 'not_live'
2119         if live_status:
2120             info_dict['live_status'] = live_status
2121             for key in live_keys:
2122                 if info_dict.get(key) is None:
2123                     info_dict[key] = (live_status == key)
2124
2125         # Auto generate title fields corresponding to the *_number fields when missing
2126         # in order to always have clean titles. This is very common for TV series.
2127         for field in ('chapter', 'season', 'episode'):
2128             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2129                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2130
2131         for cc_kind in ('subtitles', 'automatic_captions'):
2132             cc = info_dict.get(cc_kind)
2133             if cc:
2134                 for _, subtitle in cc.items():
2135                     for subtitle_format in subtitle:
2136                         if subtitle_format.get('url'):
2137                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2138                         if subtitle_format.get('ext') is None:
2139                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2140
2141         automatic_captions = info_dict.get('automatic_captions')
2142         subtitles = info_dict.get('subtitles')
2143
2144         info_dict['requested_subtitles'] = self.process_subtitles(
2145             info_dict['id'], subtitles, automatic_captions)
2146
2147         # We now pick which formats have to be downloaded
2148         if info_dict.get('formats') is None:
2149             # There's only one format available
2150             formats = [info_dict]
2151         else:
2152             formats = info_dict['formats']
2153
2154         if not formats:
2155             if not self.params.get('ignore_no_formats_error'):
2156                 raise ExtractorError('No video formats found!')
2157             else:
2158                 self.report_warning('No video formats found!')
2159
2160         def is_wellformed(f):
2161             url = f.get('url')
2162             if not url:
2163                 self.report_warning(
2164                     '"url" field is missing or empty - skipping format, '
2165                     'there is an error in extractor')
2166                 return False
2167             if isinstance(url, bytes):
2168                 sanitize_string_field(f, 'url')
2169             return True
2170
2171         # Filter out malformed formats for better extraction robustness
2172         formats = list(filter(is_wellformed, formats))
2173
2174         formats_dict = {}
2175
2176         # We check that all the formats have the format and format_id fields
2177         for i, format in enumerate(formats):
2178             sanitize_string_field(format, 'format_id')
2179             sanitize_numeric_fields(format)
2180             format['url'] = sanitize_url(format['url'])
2181             if not format.get('format_id'):
2182                 format['format_id'] = compat_str(i)
2183             else:
2184                 # Sanitize format_id from characters used in format selector expression
2185                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2186             format_id = format['format_id']
2187             if format_id not in formats_dict:
2188                 formats_dict[format_id] = []
2189             formats_dict[format_id].append(format)
2190
2191         # Make sure all formats have unique format_id
2192         for format_id, ambiguous_formats in formats_dict.items():
2193             if len(ambiguous_formats) > 1:
2194                 for i, format in enumerate(ambiguous_formats):
2195                     format['format_id'] = '%s-%d' % (format_id, i)
2196
2197         for i, format in enumerate(formats):
2198             if format.get('format') is None:
2199                 format['format'] = '{id} - {res}{note}'.format(
2200                     id=format['format_id'],
2201                     res=self.format_resolution(format),
2202                     note=format_field(format, 'format_note', ' (%s)'),
2203                 )
2204             # Automatically determine file extension if missing
2205             if format.get('ext') is None:
2206                 format['ext'] = determine_ext(format['url']).lower()
2207             # Automatically determine protocol if missing (useful for format
2208             # selection purposes)
2209             if format.get('protocol') is None:
2210                 format['protocol'] = determine_protocol(format)
2211             # Add HTTP headers, so that external programs can use them from the
2212             # json output
2213             full_format_info = info_dict.copy()
2214             full_format_info.update(format)
2215             format['http_headers'] = self._calc_headers(full_format_info)
2216         # Remove private housekeeping stuff
2217         if '__x_forwarded_for_ip' in info_dict:
2218             del info_dict['__x_forwarded_for_ip']
2219
2220         # TODO Central sorting goes here
2221
2222         if formats and formats[0] is not info_dict:
2223             # only set the 'formats' fields if the original info_dict list them
2224             # otherwise we end up with a circular reference, the first (and unique)
2225             # element in the 'formats' field in info_dict is info_dict itself,
2226             # which can't be exported to json
2227             info_dict['formats'] = formats
2228
2229         info_dict, _ = self.pre_process(info_dict)
2230
2231         if self.params.get('list_thumbnails'):
2232             self.list_thumbnails(info_dict)
2233         if self.params.get('listformats'):
2234             if not info_dict.get('formats'):
2235                 raise ExtractorError('No video formats found', expected=True)
2236             self.list_formats(info_dict)
2237         if self.params.get('listsubtitles'):
2238             if 'automatic_captions' in info_dict:
2239                 self.list_subtitles(
2240                     info_dict['id'], automatic_captions, 'automatic captions')
2241             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2242         list_only = self.params.get('simulate') is None and (
2243             self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2244         if list_only:
2245             # Without this printing, -F --print-json will not work
2246             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2247             return
2248
2249         format_selector = self.format_selector
2250         if format_selector is None:
2251             req_format = self._default_format_spec(info_dict, download=download)
2252             self.write_debug('Default format spec: %s' % req_format)
2253             format_selector = self.build_format_selector(req_format)
2254
2255         # While in format selection we may need to have an access to the original
2256         # format set in order to calculate some metrics or do some processing.
2257         # For now we need to be able to guess whether original formats provided
2258         # by extractor are incomplete or not (i.e. whether extractor provides only
2259         # video-only or audio-only formats) for proper formats selection for
2260         # extractors with such incomplete formats (see
2261         # https://github.com/ytdl-org/youtube-dl/pull/5556).
2262         # Since formats may be filtered during format selection and may not match
2263         # the original formats the results may be incorrect. Thus original formats
2264         # or pre-calculated metrics should be passed to format selection routines
2265         # as well.
2266         # We will pass a context object containing all necessary additional data
2267         # instead of just formats.
2268         # This fixes incorrect format selection issue (see
2269         # https://github.com/ytdl-org/youtube-dl/issues/10083).
2270         incomplete_formats = (
2271             # All formats are video-only or
2272             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2273             # all formats are audio-only
2274             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2275
2276         ctx = {
2277             'formats': formats,
2278             'incomplete_formats': incomplete_formats,
2279         }
2280
2281         formats_to_download = list(format_selector(ctx))
2282         if not formats_to_download:
2283             if not self.params.get('ignore_no_formats_error'):
2284                 raise ExtractorError('Requested format is not available', expected=True)
2285             else:
2286                 self.report_warning('Requested format is not available')
2287                 # Process what we can, even without any available formats.
2288                 self.process_info(dict(info_dict))
2289         elif download:
2290             self.to_screen(
2291                 '[info] %s: Downloading %d format(s): %s' % (
2292                     info_dict['id'], len(formats_to_download),
2293                     ", ".join([f['format_id'] for f in formats_to_download])))
2294             for fmt in formats_to_download:
2295                 new_info = dict(info_dict)
2296                 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2297                 new_info['__original_infodict'] = info_dict
2298                 new_info.update(fmt)
2299                 self.process_info(new_info)
2300         # We update the info dict with the best quality format (backwards compatibility)
2301         if formats_to_download:
2302             info_dict.update(formats_to_download[-1])
2303         return info_dict
2304
2305     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2306         """Select the requested subtitles and their format"""
2307         available_subs = {}
2308         if normal_subtitles and self.params.get('writesubtitles'):
2309             available_subs.update(normal_subtitles)
2310         if automatic_captions and self.params.get('writeautomaticsub'):
2311             for lang, cap_info in automatic_captions.items():
2312                 if lang not in available_subs:
2313                     available_subs[lang] = cap_info
2314
2315         if (not self.params.get('writesubtitles') and not
2316                 self.params.get('writeautomaticsub') or not
2317                 available_subs):
2318             return None
2319
2320         all_sub_langs = available_subs.keys()
2321         if self.params.get('allsubtitles', False):
2322             requested_langs = all_sub_langs
2323         elif self.params.get('subtitleslangs', False):
2324             requested_langs = set()
2325             for lang in self.params.get('subtitleslangs'):
2326                 if lang == 'all':
2327                     requested_langs.update(all_sub_langs)
2328                     continue
2329                 discard = lang[0] == '-'
2330                 if discard:
2331                     lang = lang[1:]
2332                 current_langs = filter(re.compile(lang + '$').match, all_sub_langs)
2333                 if discard:
2334                     for lang in current_langs:
2335                         requested_langs.discard(lang)
2336                 else:
2337                     requested_langs.update(current_langs)
2338         elif 'en' in available_subs:
2339             requested_langs = ['en']
2340         else:
2341             requested_langs = [list(all_sub_langs)[0]]
2342         self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2343
2344         formats_query = self.params.get('subtitlesformat', 'best')
2345         formats_preference = formats_query.split('/') if formats_query else []
2346         subs = {}
2347         for lang in requested_langs:
2348             formats = available_subs.get(lang)
2349             if formats is None:
2350                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2351                 continue
2352             for ext in formats_preference:
2353                 if ext == 'best':
2354                     f = formats[-1]
2355                     break
2356                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2357                 if matches:
2358                     f = matches[-1]
2359                     break
2360             else:
2361                 f = formats[-1]
2362                 self.report_warning(
2363                     'No subtitle format found matching "%s" for language %s, '
2364                     'using %s' % (formats_query, lang, f['ext']))
2365             subs[lang] = f
2366         return subs
2367
2368     def __forced_printings(self, info_dict, filename, incomplete):
2369         def print_mandatory(field, actual_field=None):
2370             if actual_field is None:
2371                 actual_field = field
2372             if (self.params.get('force%s' % field, False)
2373                     and (not incomplete or info_dict.get(actual_field) is not None)):
2374                 self.to_stdout(info_dict[actual_field])
2375
2376         def print_optional(field):
2377             if (self.params.get('force%s' % field, False)
2378                     and info_dict.get(field) is not None):
2379                 self.to_stdout(info_dict[field])
2380
2381         info_dict = info_dict.copy()
2382         if filename is not None:
2383             info_dict['filename'] = filename
2384         if info_dict.get('requested_formats') is not None:
2385             # For RTMP URLs, also include the playpath
2386             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2387         elif 'url' in info_dict:
2388             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2389
2390         if self.params.get('forceprint') or self.params.get('forcejson'):
2391             self.post_extract(info_dict)
2392         for tmpl in self.params.get('forceprint', []):
2393             if re.match(r'\w+$', tmpl):
2394                 tmpl = '%({})s'.format(tmpl)
2395             tmpl, info_copy = self.prepare_outtmpl(tmpl, info_dict)
2396             self.to_stdout(self.escape_outtmpl(tmpl) % info_copy)
2397
2398         print_mandatory('title')
2399         print_mandatory('id')
2400         print_mandatory('url', 'urls')
2401         print_optional('thumbnail')
2402         print_optional('description')
2403         print_optional('filename')
2404         if self.params.get('forceduration') and info_dict.get('duration') is not None:
2405             self.to_stdout(formatSeconds(info_dict['duration']))
2406         print_mandatory('format')
2407
2408         if self.params.get('forcejson'):
2409             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2410
2411     def dl(self, name, info, subtitle=False, test=False):
2412
2413         if test:
2414             verbose = self.params.get('verbose')
2415             params = {
2416                 'test': True,
2417                 'quiet': not verbose,
2418                 'verbose': verbose,
2419                 'noprogress': not verbose,
2420                 'nopart': True,
2421                 'skip_unavailable_fragments': False,
2422                 'keep_fragments': False,
2423                 'overwrites': True,
2424                 '_no_ytdl_file': True,
2425             }
2426         else:
2427             params = self.params
2428         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2429         if not test:
2430             for ph in self._progress_hooks:
2431                 fd.add_progress_hook(ph)
2432             urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2433             self.write_debug('Invoking downloader on "%s"' % urls)
2434         new_info = dict(info)
2435         if new_info.get('http_headers') is None:
2436             new_info['http_headers'] = self._calc_headers(new_info)
2437         return fd.download(name, new_info, subtitle)
2438
2439     def process_info(self, info_dict):
2440         """Process a single resolved IE result."""
2441
2442         assert info_dict.get('_type', 'video') == 'video'
2443
2444         max_downloads = self.params.get('max_downloads')
2445         if max_downloads is not None:
2446             if self._num_downloads >= int(max_downloads):
2447                 raise MaxDownloadsReached()
2448
2449         # TODO: backward compatibility, to be removed
2450         info_dict['fulltitle'] = info_dict['title']
2451
2452         if 'format' not in info_dict and 'ext' in info_dict:
2453             info_dict['format'] = info_dict['ext']
2454
2455         if self._match_entry(info_dict) is not None:
2456             return
2457
2458         self.post_extract(info_dict)
2459         self._num_downloads += 1
2460
2461         # info_dict['_filename'] needs to be set for backward compatibility
2462         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2463         temp_filename = self.prepare_filename(info_dict, 'temp')
2464         files_to_move = {}
2465
2466         # Forced printings
2467         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2468
2469         if self.params.get('simulate'):
2470             if self.params.get('force_write_download_archive', False):
2471                 self.record_download_archive(info_dict)
2472
2473             # Do nothing else if in simulate mode
2474             return
2475
2476         if full_filename is None:
2477             return
2478
2479         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2480             return
2481         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2482             return
2483
2484         if self.params.get('writedescription', False):
2485             descfn = self.prepare_filename(info_dict, 'description')
2486             if not self._ensure_dir_exists(encodeFilename(descfn)):
2487                 return
2488             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
2489                 self.to_screen('[info] Video description is already present')
2490             elif info_dict.get('description') is None:
2491                 self.report_warning('There\'s no description to write.')
2492             else:
2493                 try:
2494                     self.to_screen('[info] Writing video description to: ' + descfn)
2495                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
2496                         descfile.write(info_dict['description'])
2497                 except (OSError, IOError):
2498                     self.report_error('Cannot write description file ' + descfn)
2499                     return
2500
2501         if self.params.get('writeannotations', False):
2502             annofn = self.prepare_filename(info_dict, 'annotation')
2503             if not self._ensure_dir_exists(encodeFilename(annofn)):
2504                 return
2505             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2506                 self.to_screen('[info] Video annotations are already present')
2507             elif not info_dict.get('annotations'):
2508                 self.report_warning('There are no annotations to write.')
2509             else:
2510                 try:
2511                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2512                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2513                         annofile.write(info_dict['annotations'])
2514                 except (KeyError, TypeError):
2515                     self.report_warning('There are no annotations to write.')
2516                 except (OSError, IOError):
2517                     self.report_error('Cannot write annotations file: ' + annofn)
2518                     return
2519
2520         subtitles_are_requested = any([self.params.get('writesubtitles', False),
2521                                        self.params.get('writeautomaticsub')])
2522
2523         if subtitles_are_requested and info_dict.get('requested_subtitles'):
2524             # subtitles download errors are already managed as troubles in relevant IE
2525             # that way it will silently go on when used with unsupporting IE
2526             subtitles = info_dict['requested_subtitles']
2527             # ie = self.get_info_extractor(info_dict['extractor_key'])
2528             for sub_lang, sub_info in subtitles.items():
2529                 sub_format = sub_info['ext']
2530                 sub_filename = subtitles_filename(temp_filename, sub_lang, sub_format, info_dict.get('ext'))
2531                 sub_filename_final = subtitles_filename(
2532                     self.prepare_filename(info_dict, 'subtitle'), sub_lang, sub_format, info_dict.get('ext'))
2533                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
2534                     self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
2535                     sub_info['filepath'] = sub_filename
2536                     files_to_move[sub_filename] = sub_filename_final
2537                 else:
2538                     self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
2539                     if sub_info.get('data') is not None:
2540                         try:
2541                             # Use newline='' to prevent conversion of newline characters
2542                             # See https://github.com/ytdl-org/youtube-dl/issues/10268
2543                             with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
2544                                 subfile.write(sub_info['data'])
2545                             sub_info['filepath'] = sub_filename
2546                             files_to_move[sub_filename] = sub_filename_final
2547                         except (OSError, IOError):
2548                             self.report_error('Cannot write subtitles file ' + sub_filename)
2549                             return
2550                     else:
2551                         try:
2552                             self.dl(sub_filename, sub_info.copy(), subtitle=True)
2553                             sub_info['filepath'] = sub_filename
2554                             files_to_move[sub_filename] = sub_filename_final
2555                         except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
2556                             self.report_warning('Unable to download subtitle for "%s": %s' %
2557                                                 (sub_lang, error_to_compat_str(err)))
2558                             continue
2559
2560         if self.params.get('writeinfojson', False):
2561             infofn = self.prepare_filename(info_dict, 'infojson')
2562             if not self._ensure_dir_exists(encodeFilename(infofn)):
2563                 return
2564             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
2565                 self.to_screen('[info] Video metadata is already present')
2566             else:
2567                 self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
2568                 try:
2569                     write_json_file(self.sanitize_info(info_dict, self.params.get('clean_infojson', True)), infofn)
2570                 except (OSError, IOError):
2571                     self.report_error('Cannot write video metadata to JSON file ' + infofn)
2572                     return
2573             info_dict['__infojson_filename'] = infofn
2574
2575         for thumb_ext in self._write_thumbnails(info_dict, temp_filename):
2576             thumb_filename_temp = replace_extension(temp_filename, thumb_ext, info_dict.get('ext'))
2577             thumb_filename = replace_extension(
2578                 self.prepare_filename(info_dict, 'thumbnail'), thumb_ext, info_dict.get('ext'))
2579             files_to_move[thumb_filename_temp] = thumb_filename
2580
2581         # Write internet shortcut files
2582         url_link = webloc_link = desktop_link = False
2583         if self.params.get('writelink', False):
2584             if sys.platform == "darwin":  # macOS.
2585                 webloc_link = True
2586             elif sys.platform.startswith("linux"):
2587                 desktop_link = True
2588             else:  # if sys.platform in ['win32', 'cygwin']:
2589                 url_link = True
2590         if self.params.get('writeurllink', False):
2591             url_link = True
2592         if self.params.get('writewebloclink', False):
2593             webloc_link = True
2594         if self.params.get('writedesktoplink', False):
2595             desktop_link = True
2596
2597         if url_link or webloc_link or desktop_link:
2598             if 'webpage_url' not in info_dict:
2599                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2600                 return
2601             ascii_url = iri_to_uri(info_dict['webpage_url'])
2602
2603         def _write_link_file(extension, template, newline, embed_filename):
2604             linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
2605             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2606                 self.to_screen('[info] Internet shortcut is already present')
2607             else:
2608                 try:
2609                     self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2610                     with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2611                         template_vars = {'url': ascii_url}
2612                         if embed_filename:
2613                             template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2614                         linkfile.write(template % template_vars)
2615                 except (OSError, IOError):
2616                     self.report_error('Cannot write internet shortcut ' + linkfn)
2617                     return False
2618             return True
2619
2620         if url_link:
2621             if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2622                 return
2623         if webloc_link:
2624             if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2625                 return
2626         if desktop_link:
2627             if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2628                 return
2629
2630         try:
2631             info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2632         except PostProcessingError as err:
2633             self.report_error('Preprocessing: %s' % str(err))
2634             return
2635
2636         must_record_download_archive = False
2637         if self.params.get('skip_download', False):
2638             info_dict['filepath'] = temp_filename
2639             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2640             info_dict['__files_to_move'] = files_to_move
2641             info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2642         else:
2643             # Download
2644             info_dict.setdefault('__postprocessors', [])
2645             try:
2646
2647                 def existing_file(*filepaths):
2648                     ext = info_dict.get('ext')
2649                     final_ext = self.params.get('final_ext', ext)
2650                     existing_files = []
2651                     for file in orderedSet(filepaths):
2652                         if final_ext != ext:
2653                             converted = replace_extension(file, final_ext, ext)
2654                             if os.path.exists(encodeFilename(converted)):
2655                                 existing_files.append(converted)
2656                         if os.path.exists(encodeFilename(file)):
2657                             existing_files.append(file)
2658
2659                     if not existing_files or self.params.get('overwrites', False):
2660                         for file in orderedSet(existing_files):
2661                             self.report_file_delete(file)
2662                             os.remove(encodeFilename(file))
2663                         return None
2664
2665                     self.report_file_already_downloaded(existing_files[0])
2666                     info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2667                     return existing_files[0]
2668
2669                 success = True
2670                 if info_dict.get('requested_formats') is not None:
2671
2672                     def compatible_formats(formats):
2673                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2674                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2675                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2676                         if len(video_formats) > 2 or len(audio_formats) > 2:
2677                             return False
2678
2679                         # Check extension
2680                         exts = set(format.get('ext') for format in formats)
2681                         COMPATIBLE_EXTS = (
2682                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2683                             set(('webm',)),
2684                         )
2685                         for ext_sets in COMPATIBLE_EXTS:
2686                             if ext_sets.issuperset(exts):
2687                                 return True
2688                         # TODO: Check acodec/vcodec
2689                         return False
2690
2691                     requested_formats = info_dict['requested_formats']
2692                     old_ext = info_dict['ext']
2693                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
2694                         info_dict['ext'] = 'mkv'
2695                         self.report_warning(
2696                             'Requested formats are incompatible for merge and will be merged into mkv.')
2697                     new_ext = info_dict['ext']
2698
2699                     def correct_ext(filename, ext=new_ext):
2700                         if filename == '-':
2701                             return filename
2702                         filename_real_ext = os.path.splitext(filename)[1][1:]
2703                         filename_wo_ext = (
2704                             os.path.splitext(filename)[0]
2705                             if filename_real_ext in (old_ext, new_ext)
2706                             else filename)
2707                         return '%s.%s' % (filename_wo_ext, ext)
2708
2709                     # Ensure filename always has a correct extension for successful merge
2710                     full_filename = correct_ext(full_filename)
2711                     temp_filename = correct_ext(temp_filename)
2712                     dl_filename = existing_file(full_filename, temp_filename)
2713                     info_dict['__real_download'] = False
2714
2715                     _protocols = set(determine_protocol(f) for f in requested_formats)
2716                     if len(_protocols) == 1:  # All requested formats have same protocol
2717                         info_dict['protocol'] = _protocols.pop()
2718                     directly_mergable = FFmpegFD.can_merge_formats(info_dict)
2719                     if dl_filename is not None:
2720                         pass
2721                     elif (directly_mergable and get_suitable_downloader(
2722                             info_dict, self.params, to_stdout=(temp_filename == '-')) == FFmpegFD):
2723                         info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
2724                         success, real_download = self.dl(temp_filename, info_dict)
2725                         info_dict['__real_download'] = real_download
2726                     else:
2727                         downloaded = []
2728                         merger = FFmpegMergerPP(self)
2729                         if self.params.get('allow_unplayable_formats'):
2730                             self.report_warning(
2731                                 'You have requested merging of multiple formats '
2732                                 'while also allowing unplayable formats to be downloaded. '
2733                                 'The formats won\'t be merged to prevent data corruption.')
2734                         elif not merger.available:
2735                             self.report_warning(
2736                                 'You have requested merging of multiple formats but ffmpeg is not installed. '
2737                                 'The formats won\'t be merged.')
2738
2739                         if temp_filename == '-':
2740                             reason = ('using a downloader other than ffmpeg' if directly_mergable
2741                                       else 'but the formats are incompatible for simultaneous download' if merger.available
2742                                       else 'but ffmpeg is not installed')
2743                             self.report_warning(
2744                                 f'You have requested downloading multiple formats to stdout {reason}. '
2745                                 'The formats will be streamed one after the other')
2746                             fname = temp_filename
2747                         for f in requested_formats:
2748                             new_info = dict(info_dict)
2749                             del new_info['requested_formats']
2750                             new_info.update(f)
2751                             if temp_filename != '-':
2752                                 fname = prepend_extension(
2753                                     correct_ext(temp_filename, new_info['ext']),
2754                                     'f%s' % f['format_id'], new_info['ext'])
2755                                 if not self._ensure_dir_exists(fname):
2756                                     return
2757                                 downloaded.append(fname)
2758                             partial_success, real_download = self.dl(fname, new_info)
2759                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
2760                             success = success and partial_success
2761                         if merger.available and not self.params.get('allow_unplayable_formats'):
2762                             info_dict['__postprocessors'].append(merger)
2763                             info_dict['__files_to_merge'] = downloaded
2764                             # Even if there were no downloads, it is being merged only now
2765                             info_dict['__real_download'] = True
2766                         else:
2767                             for file in downloaded:
2768                                 files_to_move[file] = None
2769                 else:
2770                     # Just a single file
2771                     dl_filename = existing_file(full_filename, temp_filename)
2772                     if dl_filename is None:
2773                         success, real_download = self.dl(temp_filename, info_dict)
2774                         info_dict['__real_download'] = real_download
2775
2776                 dl_filename = dl_filename or temp_filename
2777                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2778
2779             except network_exceptions as err:
2780                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2781                 return
2782             except (OSError, IOError) as err:
2783                 raise UnavailableVideoError(err)
2784             except (ContentTooShortError, ) as err:
2785                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2786                 return
2787
2788             if success and full_filename != '-':
2789
2790                 def fixup():
2791                     do_fixup = True
2792                     fixup_policy = self.params.get('fixup')
2793                     vid = info_dict['id']
2794
2795                     if fixup_policy in ('ignore', 'never'):
2796                         return
2797                     elif fixup_policy == 'warn':
2798                         do_fixup = False
2799                     elif fixup_policy != 'force':
2800                         assert fixup_policy in ('detect_or_warn', None)
2801                         if not info_dict.get('__real_download'):
2802                             do_fixup = False
2803
2804                     def ffmpeg_fixup(cndn, msg, cls):
2805                         if not cndn:
2806                             return
2807                         if not do_fixup:
2808                             self.report_warning(f'{vid}: {msg}')
2809                             return
2810                         pp = cls(self)
2811                         if pp.available:
2812                             info_dict['__postprocessors'].append(pp)
2813                         else:
2814                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
2815
2816                     stretched_ratio = info_dict.get('stretched_ratio')
2817                     ffmpeg_fixup(
2818                         stretched_ratio not in (1, None),
2819                         f'Non-uniform pixel ratio {stretched_ratio}',
2820                         FFmpegFixupStretchedPP)
2821
2822                     ffmpeg_fixup(
2823                         (info_dict.get('requested_formats') is None
2824                          and info_dict.get('container') == 'm4a_dash'
2825                          and info_dict.get('ext') == 'm4a'),
2826                         'writing DASH m4a. Only some players support this container',
2827                         FFmpegFixupM4aPP)
2828
2829                     downloader = (get_suitable_downloader(info_dict, self.params).__name__
2830                                   if 'protocol' in info_dict else None)
2831                     ffmpeg_fixup(downloader == 'HlsFD', 'malformed AAC bitstream detected', FFmpegFixupM3u8PP)
2832                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP)
2833                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed duration detected', FFmpegFixupDurationPP)
2834
2835                 fixup()
2836                 try:
2837                     info_dict = self.post_process(dl_filename, info_dict, files_to_move)
2838                 except PostProcessingError as err:
2839                     self.report_error('Postprocessing: %s' % str(err))
2840                     return
2841                 try:
2842                     for ph in self._post_hooks:
2843                         ph(info_dict['filepath'])
2844                 except Exception as err:
2845                     self.report_error('post hooks: %s' % str(err))
2846                     return
2847                 must_record_download_archive = True
2848
2849         if must_record_download_archive or self.params.get('force_write_download_archive', False):
2850             self.record_download_archive(info_dict)
2851         max_downloads = self.params.get('max_downloads')
2852         if max_downloads is not None and self._num_downloads >= int(max_downloads):
2853             raise MaxDownloadsReached()
2854
2855     def download(self, url_list):
2856         """Download a given list of URLs."""
2857         outtmpl = self.outtmpl_dict['default']
2858         if (len(url_list) > 1
2859                 and outtmpl != '-'
2860                 and '%' not in outtmpl
2861                 and self.params.get('max_downloads') != 1):
2862             raise SameFileError(outtmpl)
2863
2864         for url in url_list:
2865             try:
2866                 # It also downloads the videos
2867                 res = self.extract_info(
2868                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2869             except UnavailableVideoError:
2870                 self.report_error('unable to download video')
2871             except MaxDownloadsReached:
2872                 self.to_screen('[info] Maximum number of downloaded files reached')
2873                 raise
2874             except ExistingVideoReached:
2875                 self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
2876                 raise
2877             except RejectedVideoReached:
2878                 self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
2879                 raise
2880             else:
2881                 if self.params.get('dump_single_json', False):
2882                     self.post_extract(res)
2883                     self.to_stdout(json.dumps(self.sanitize_info(res)))
2884
2885         return self._download_retcode
2886
2887     def download_with_info_file(self, info_filename):
2888         with contextlib.closing(fileinput.FileInput(
2889                 [info_filename], mode='r',
2890                 openhook=fileinput.hook_encoded('utf-8'))) as f:
2891             # FileInput doesn't have a read method, we can't call json.load
2892             info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
2893         try:
2894             self.process_ie_result(info, download=True)
2895         except (DownloadError, EntryNotInPlaylist, ThrottledDownload):
2896             webpage_url = info.get('webpage_url')
2897             if webpage_url is not None:
2898                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2899                 return self.download([webpage_url])
2900             else:
2901                 raise
2902         return self._download_retcode
2903
2904     @staticmethod
2905     def sanitize_info(info_dict, remove_private_keys=False):
2906         ''' Sanitize the infodict for converting to json '''
2907         info_dict.setdefault('epoch', int(time.time()))
2908         remove_keys = {'__original_infodict'}  # Always remove this since this may contain a copy of the entire dict
2909         keep_keys = ['_type'],  # Always keep this to facilitate load-info-json
2910         if remove_private_keys:
2911             remove_keys |= {
2912                 'requested_formats', 'requested_subtitles', 'requested_entries',
2913                 'filepath', 'entries', 'original_url', 'playlist_autonumber',
2914             }
2915             empty_values = (None, {}, [], set(), tuple())
2916             reject = lambda k, v: k not in keep_keys and (
2917                 k.startswith('_') or k in remove_keys or v in empty_values)
2918         else:
2919             reject = lambda k, v: k in remove_keys
2920         filter_fn = lambda obj: (
2921             list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))
2922             else obj if not isinstance(obj, dict)
2923             else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
2924         return filter_fn(info_dict)
2925
2926     @staticmethod
2927     def filter_requested_info(info_dict, actually_filter=True):
2928         ''' Alias of sanitize_info for backward compatibility '''
2929         return YoutubeDL.sanitize_info(info_dict, actually_filter)
2930
2931     def run_pp(self, pp, infodict):
2932         files_to_delete = []
2933         if '__files_to_move' not in infodict:
2934             infodict['__files_to_move'] = {}
2935         files_to_delete, infodict = pp.run(infodict)
2936         if not files_to_delete:
2937             return infodict
2938
2939         if self.params.get('keepvideo', False):
2940             for f in files_to_delete:
2941                 infodict['__files_to_move'].setdefault(f, '')
2942         else:
2943             for old_filename in set(files_to_delete):
2944                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2945                 try:
2946                     os.remove(encodeFilename(old_filename))
2947                 except (IOError, OSError):
2948                     self.report_warning('Unable to remove downloaded original file')
2949                 if old_filename in infodict['__files_to_move']:
2950                     del infodict['__files_to_move'][old_filename]
2951         return infodict
2952
2953     @staticmethod
2954     def post_extract(info_dict):
2955         def actual_post_extract(info_dict):
2956             if info_dict.get('_type') in ('playlist', 'multi_video'):
2957                 for video_dict in info_dict.get('entries', {}):
2958                     actual_post_extract(video_dict or {})
2959                 return
2960
2961             post_extractor = info_dict.get('__post_extractor') or (lambda: {})
2962             extra = post_extractor().items()
2963             info_dict.update(extra)
2964             info_dict.pop('__post_extractor', None)
2965
2966             original_infodict = info_dict.get('__original_infodict') or {}
2967             original_infodict.update(extra)
2968             original_infodict.pop('__post_extractor', None)
2969
2970         actual_post_extract(info_dict or {})
2971
2972     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
2973         info = dict(ie_info)
2974         info['__files_to_move'] = files_to_move or {}
2975         for pp in self._pps[key]:
2976             info = self.run_pp(pp, info)
2977         return info, info.pop('__files_to_move', None)
2978
2979     def post_process(self, filename, ie_info, files_to_move=None):
2980         """Run all the postprocessors on the given file."""
2981         info = dict(ie_info)
2982         info['filepath'] = filename
2983         info['__files_to_move'] = files_to_move or {}
2984
2985         for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
2986             info = self.run_pp(pp, info)
2987         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
2988         del info['__files_to_move']
2989         for pp in self._pps['after_move']:
2990             info = self.run_pp(pp, info)
2991         return info
2992
2993     def _make_archive_id(self, info_dict):
2994         video_id = info_dict.get('id')
2995         if not video_id:
2996             return
2997         # Future-proof against any change in case
2998         # and backwards compatibility with prior versions
2999         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3000         if extractor is None:
3001             url = str_or_none(info_dict.get('url'))
3002             if not url:
3003                 return
3004             # Try to find matching extractor for the URL and take its ie_key
3005             for ie in self._ies:
3006                 if ie.suitable(url):
3007                     extractor = ie.ie_key()
3008                     break
3009             else:
3010                 return
3011         return '%s %s' % (extractor.lower(), video_id)
3012
3013     def in_download_archive(self, info_dict):
3014         fn = self.params.get('download_archive')
3015         if fn is None:
3016             return False
3017
3018         vid_id = self._make_archive_id(info_dict)
3019         if not vid_id:
3020             return False  # Incomplete video information
3021
3022         return vid_id in self.archive
3023
3024     def record_download_archive(self, info_dict):
3025         fn = self.params.get('download_archive')
3026         if fn is None:
3027             return
3028         vid_id = self._make_archive_id(info_dict)
3029         assert vid_id
3030         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3031             archive_file.write(vid_id + '\n')
3032         self.archive.add(vid_id)
3033
3034     @staticmethod
3035     def format_resolution(format, default='unknown'):
3036         if format.get('vcodec') == 'none':
3037             if format.get('acodec') == 'none':
3038                 return 'images'
3039             return 'audio only'
3040         if format.get('resolution') is not None:
3041             return format['resolution']
3042         if format.get('width') and format.get('height'):
3043             res = '%dx%d' % (format['width'], format['height'])
3044         elif format.get('height'):
3045             res = '%sp' % format['height']
3046         elif format.get('width'):
3047             res = '%dx?' % format['width']
3048         else:
3049             res = default
3050         return res
3051
3052     def _format_note(self, fdict):
3053         res = ''
3054         if fdict.get('ext') in ['f4f', 'f4m']:
3055             res += '(unsupported) '
3056         if fdict.get('language'):
3057             if res:
3058                 res += ' '
3059             res += '[%s] ' % fdict['language']
3060         if fdict.get('format_note') is not None:
3061             res += fdict['format_note'] + ' '
3062         if fdict.get('tbr') is not None:
3063             res += '%4dk ' % fdict['tbr']
3064         if fdict.get('container') is not None:
3065             if res:
3066                 res += ', '
3067             res += '%s container' % fdict['container']
3068         if (fdict.get('vcodec') is not None
3069                 and fdict.get('vcodec') != 'none'):
3070             if res:
3071                 res += ', '
3072             res += fdict['vcodec']
3073             if fdict.get('vbr') is not None:
3074                 res += '@'
3075         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3076             res += 'video@'
3077         if fdict.get('vbr') is not None:
3078             res += '%4dk' % fdict['vbr']
3079         if fdict.get('fps') is not None:
3080             if res:
3081                 res += ', '
3082             res += '%sfps' % fdict['fps']
3083         if fdict.get('acodec') is not None:
3084             if res:
3085                 res += ', '
3086             if fdict['acodec'] == 'none':
3087                 res += 'video only'
3088             else:
3089                 res += '%-5s' % fdict['acodec']
3090         elif fdict.get('abr') is not None:
3091             if res:
3092                 res += ', '
3093             res += 'audio'
3094         if fdict.get('abr') is not None:
3095             res += '@%3dk' % fdict['abr']
3096         if fdict.get('asr') is not None:
3097             res += ' (%5dHz)' % fdict['asr']
3098         if fdict.get('filesize') is not None:
3099             if res:
3100                 res += ', '
3101             res += format_bytes(fdict['filesize'])
3102         elif fdict.get('filesize_approx') is not None:
3103             if res:
3104                 res += ', '
3105             res += '~' + format_bytes(fdict['filesize_approx'])
3106         return res
3107
3108     def list_formats(self, info_dict):
3109         formats = info_dict.get('formats', [info_dict])
3110         new_format = (
3111             'list-formats' not in self.params.get('compat_opts', [])
3112             and self.params.get('listformats_table', True) is not False)
3113         if new_format:
3114             table = [
3115                 [
3116                     format_field(f, 'format_id'),
3117                     format_field(f, 'ext'),
3118                     self.format_resolution(f),
3119                     format_field(f, 'fps', '%d'),
3120                     '|',
3121                     format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
3122                     format_field(f, 'tbr', '%4dk'),
3123                     shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
3124                     '|',
3125                     format_field(f, 'vcodec', default='unknown').replace('none', ''),
3126                     format_field(f, 'vbr', '%4dk'),
3127                     format_field(f, 'acodec', default='unknown').replace('none', ''),
3128                     format_field(f, 'abr', '%3dk'),
3129                     format_field(f, 'asr', '%5dHz'),
3130                     ', '.join(filter(None, (
3131                         'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
3132                         format_field(f, 'language', '[%s]'),
3133                         format_field(f, 'format_note'),
3134                         format_field(f, 'container', ignore=(None, f.get('ext'))),
3135                     ))),
3136                 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3137             header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', '  TBR', 'PROTO',
3138                            '|', 'VCODEC', '  VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO']
3139         else:
3140             table = [
3141                 [
3142                     format_field(f, 'format_id'),
3143                     format_field(f, 'ext'),
3144                     self.format_resolution(f),
3145                     self._format_note(f)]
3146                 for f in formats
3147                 if f.get('preference') is None or f['preference'] >= -1000]
3148             header_line = ['format code', 'extension', 'resolution', 'note']
3149
3150         self.to_screen(
3151             '[info] Available formats for %s:' % info_dict['id'])
3152         self.to_stdout(render_table(
3153             header_line, table, delim=new_format, extraGap=(0 if new_format else 1), hideEmpty=new_format))
3154
3155     def list_thumbnails(self, info_dict):
3156         thumbnails = list(info_dict.get('thumbnails'))
3157         if not thumbnails:
3158             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3159             return
3160
3161         self.to_screen(
3162             '[info] Thumbnails for %s:' % info_dict['id'])
3163         self.to_stdout(render_table(
3164             ['ID', 'width', 'height', 'URL'],
3165             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
3166
3167     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3168         if not subtitles:
3169             self.to_screen('%s has no %s' % (video_id, name))
3170             return
3171         self.to_screen(
3172             'Available %s for %s:' % (name, video_id))
3173
3174         def _row(lang, formats):
3175             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3176             if len(set(names)) == 1:
3177                 names = [] if names[0] == 'unknown' else names[:1]
3178             return [lang, ', '.join(names), ', '.join(exts)]
3179
3180         self.to_stdout(render_table(
3181             ['Language', 'Name', 'Formats'],
3182             [_row(lang, formats) for lang, formats in subtitles.items()],
3183             hideEmpty=True))
3184
3185     def urlopen(self, req):
3186         """ Start an HTTP download """
3187         if isinstance(req, compat_basestring):
3188             req = sanitized_Request(req)
3189         return self._opener.open(req, timeout=self._socket_timeout)
3190
3191     def print_debug_header(self):
3192         if not self.params.get('verbose'):
3193             return
3194
3195         if type('') is not compat_str:
3196             # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
3197             self.report_warning(
3198                 'Your Python is broken! Update to a newer and supported version')
3199
3200         stdout_encoding = getattr(
3201             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
3202         encoding_str = (
3203             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
3204                 locale.getpreferredencoding(),
3205                 sys.getfilesystemencoding(),
3206                 stdout_encoding,
3207                 self.get_encoding()))
3208         write_string(encoding_str, encoding=None)
3209
3210         source = (
3211             '(exe)' if hasattr(sys, 'frozen')
3212             else '(zip)' if isinstance(globals().get('__loader__'), zipimporter)
3213             else '(source)' if os.path.basename(sys.argv[0]) == '__main__.py'
3214             else '')
3215         self._write_string('[debug] yt-dlp version %s %s\n' % (__version__, source))
3216         if _LAZY_LOADER:
3217             self._write_string('[debug] Lazy loading extractors enabled\n')
3218         if _PLUGIN_CLASSES:
3219             self._write_string(
3220                 '[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])
3221         if self.params.get('compat_opts'):
3222             self._write_string(
3223                 '[debug] Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts')))
3224         try:
3225             sp = subprocess.Popen(
3226                 ['git', 'rev-parse', '--short', 'HEAD'],
3227                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3228                 cwd=os.path.dirname(os.path.abspath(__file__)))
3229             out, err = process_communicate_or_kill(sp)
3230             out = out.decode().strip()
3231             if re.match('[0-9a-f]+', out):
3232                 self._write_string('[debug] Git HEAD: %s\n' % out)
3233         except Exception:
3234             try:
3235                 sys.exc_clear()
3236             except Exception:
3237                 pass
3238
3239         def python_implementation():
3240             impl_name = platform.python_implementation()
3241             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3242                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3243             return impl_name
3244
3245         self._write_string('[debug] Python version %s (%s %s) - %s\n' % (
3246             platform.python_version(),
3247             python_implementation(),
3248             platform.architecture()[0],
3249             platform_name()))
3250
3251         exe_versions = FFmpegPostProcessor.get_versions(self)
3252         exe_versions['rtmpdump'] = rtmpdump_version()
3253         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3254         exe_str = ', '.join(
3255             '%s %s' % (exe, v)
3256             for exe, v in sorted(exe_versions.items())
3257             if v
3258         )
3259         if not exe_str:
3260             exe_str = 'none'
3261         self._write_string('[debug] exe versions: %s\n' % exe_str)
3262
3263         proxy_map = {}
3264         for handler in self._opener.handlers:
3265             if hasattr(handler, 'proxies'):
3266                 proxy_map.update(handler.proxies)
3267         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
3268
3269         if self.params.get('call_home', False):
3270             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3271             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
3272             return
3273             latest_version = self.urlopen(
3274                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3275             if version_tuple(latest_version) > version_tuple(__version__):
3276                 self.report_warning(
3277                     'You are using an outdated version (newest version: %s)! '
3278                     'See https://yt-dl.org/update if you need help updating.' %
3279                     latest_version)
3280
3281     def _setup_opener(self):
3282         timeout_val = self.params.get('socket_timeout')
3283         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
3284
3285         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3286         opts_cookiefile = self.params.get('cookiefile')
3287         opts_proxy = self.params.get('proxy')
3288
3289         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3290
3291         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3292         if opts_proxy is not None:
3293             if opts_proxy == '':
3294                 proxies = {}
3295             else:
3296                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3297         else:
3298             proxies = compat_urllib_request.getproxies()
3299             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3300             if 'http' in proxies and 'https' not in proxies:
3301                 proxies['https'] = proxies['http']
3302         proxy_handler = PerRequestProxyHandler(proxies)
3303
3304         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3305         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3306         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3307         redirect_handler = YoutubeDLRedirectHandler()
3308         data_handler = compat_urllib_request_DataHandler()
3309
3310         # When passing our own FileHandler instance, build_opener won't add the
3311         # default FileHandler and allows us to disable the file protocol, which
3312         # can be used for malicious purposes (see
3313         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3314         file_handler = compat_urllib_request.FileHandler()
3315
3316         def file_open(*args, **kwargs):
3317             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3318         file_handler.file_open = file_open
3319
3320         opener = compat_urllib_request.build_opener(
3321             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3322
3323         # Delete the default user-agent header, which would otherwise apply in
3324         # cases where our custom HTTP handler doesn't come into play
3325         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3326         opener.addheaders = []
3327         self._opener = opener
3328
3329     def encode(self, s):
3330         if isinstance(s, bytes):
3331             return s  # Already encoded
3332
3333         try:
3334             return s.encode(self.get_encoding())
3335         except UnicodeEncodeError as err:
3336             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3337             raise
3338
3339     def get_encoding(self):
3340         encoding = self.params.get('encoding')
3341         if encoding is None:
3342             encoding = preferredencoding()
3343         return encoding
3344
3345     def _write_thumbnails(self, info_dict, filename):  # return the extensions
3346         write_all = self.params.get('write_all_thumbnails', False)
3347         thumbnails = []
3348         if write_all or self.params.get('writethumbnail', False):
3349             thumbnails = info_dict.get('thumbnails') or []
3350         multiple = write_all and len(thumbnails) > 1
3351
3352         ret = []
3353         for t in thumbnails[::-1]:
3354             thumb_ext = determine_ext(t['url'], 'jpg')
3355             suffix = '%s.' % t['id'] if multiple else ''
3356             thumb_display_id = '%s ' % t['id'] if multiple else ''
3357             thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
3358
3359             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
3360                 ret.append(suffix + thumb_ext)
3361                 t['filepath'] = thumb_filename
3362                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
3363                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
3364             else:
3365                 self.to_screen('[%s] %s: Downloading thumbnail %s ...' %
3366                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
3367                 try:
3368                     uf = self.urlopen(t['url'])
3369                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3370                         shutil.copyfileobj(uf, thumbf)
3371                     ret.append(suffix + thumb_ext)
3372                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
3373                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
3374                     t['filepath'] = thumb_filename
3375                 except network_exceptions as err:
3376                     self.report_warning('Unable to download thumbnail "%s": %s' %
3377                                         (t['url'], error_to_compat_str(err)))
3378             if ret and not write_all:
3379                 break
3380         return ret