yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import sys
  23 import tempfile
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28
  29 from string import ascii_letters
  30 from zipimport import zipimporter
  31
  32 from .compat import (
  33     compat_basestring,
  34     compat_get_terminal_size,
  35     compat_kwargs,
  36     compat_numeric_types,
  37     compat_os_name,
  38     compat_shlex_quote,
  39     compat_str,
  40     compat_tokenize_tokenize,
  41     compat_urllib_error,
  42     compat_urllib_request,
  43     compat_urllib_request_DataHandler,
  44 )
  45 from .cookies import load_cookies
  46 from .utils import (
  47     age_restricted,
  48     args_to_str,
  49     ContentTooShortError,
  50     date_from_str,
  51     DateRange,
  52     DEFAULT_OUTTMPL,
  53     determine_ext,
  54     determine_protocol,
  55     DOT_DESKTOP_LINK_TEMPLATE,
  56     DOT_URL_LINK_TEMPLATE,
  57     DOT_WEBLOC_LINK_TEMPLATE,
  58     DownloadError,
  59     encode_compat_str,
  60     encodeFilename,
  61     EntryNotInPlaylist,
  62     error_to_compat_str,
  63     ExistingVideoReached,
  64     expand_path,
  65     ExtractorError,
  66     float_or_none,
  67     format_bytes,
  68     format_field,
  69     STR_FORMAT_RE_TMPL,
  70     STR_FORMAT_TYPES,
  71     formatSeconds,
  72     GeoRestrictedError,
  73     HEADRequest,
  74     int_or_none,
  75     iri_to_uri,
  76     ISO3166Utils,
  77     LazyList,
  78     locked_file,
  79     make_dir,
  80     make_HTTPS_handler,
  81     MaxDownloadsReached,
  82     network_exceptions,
  83     orderedSet,
  84     OUTTMPL_TYPES,
  85     PagedList,
  86     parse_filesize,
  87     PerRequestProxyHandler,
  88     platform_name,
  89     PostProcessingError,
  90     preferredencoding,
  91     prepend_extension,
  92     process_communicate_or_kill,
  93     register_socks_protocols,
  94     RejectedVideoReached,
  95     render_table,
  96     replace_extension,
  97     SameFileError,
  98     sanitize_filename,
  99     sanitize_path,
 100     sanitize_url,
 101     sanitized_Request,
 102     std_headers,
 103     str_or_none,
 104     strftime_or_none,
 105     subtitles_filename,
 106     ThrottledDownload,
 107     to_high_limit_path,
 108     traverse_obj,
 109     try_get,
 110     UnavailableVideoError,
 111     url_basename,
 112     variadic,
 113     version_tuple,
 114     write_json_file,
 115     write_string,
 116     YoutubeDLCookieProcessor,
 117     YoutubeDLHandler,
 118     YoutubeDLRedirectHandler,
 119 )
 120 from .cache import Cache
 121 from .extractor import (
 122     gen_extractor_classes,
 123     get_info_extractor,
 124     _LAZY_LOADER,
 125     _PLUGIN_CLASSES
 126 )
 127 from .extractor.openload import PhantomJSwrapper
 128 from .downloader import (
 129     get_suitable_downloader,
 130     shorten_protocol_name
 131 )
 132 from .downloader.rtmp import rtmpdump_version
 133 from .postprocessor import (
 134     get_postprocessor,
 135     FFmpegFixupDurationPP,
 136     FFmpegFixupM3u8PP,
 137     FFmpegFixupM4aPP,
 138     FFmpegFixupStretchedPP,
 139     FFmpegFixupTimestampPP,
 140     FFmpegMergerPP,
 141     FFmpegPostProcessor,
 142     MoveFilesAfterDownloadPP,
 143 )
 144 from .version import __version__
 145
 146 if compat_os_name == 'nt':
 147     import ctypes
 148
 149
 150 class YoutubeDL(object):
 151     """YoutubeDL class.
 152
 153     YoutubeDL objects are the ones responsible of downloading the
 154     actual video file and writing it to disk if the user has requested
 155     it, among some other tasks. In most cases there should be one per
 156     program. As, given a video URL, the downloader doesn't know how to
 157     extract all the needed information, task that InfoExtractors do, it
 158     has to pass the URL to one of them.
 159
 160     For this, YoutubeDL objects have a method that allows
 161     InfoExtractors to be registered in a given order. When it is passed
 162     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 163     finds that reports being able to handle it. The InfoExtractor extracts
 164     all the information about the video or videos the URL refers to, and
 165     YoutubeDL process the extracted information, possibly using a File
 166     Downloader to download the video.
 167
 168     YoutubeDL objects accept a lot of parameters. In order not to saturate
 169     the object constructor with arguments, it receives a dictionary of
 170     options instead. These options are available through the params
 171     attribute for the InfoExtractors to use. The YoutubeDL also
 172     registers itself as the downloader in charge for the InfoExtractors
 173     that are added to it, so this is a "mutual registration".
 174
 175     Available options:
 176
 177     username:          Username for authentication purposes.
 178     password:          Password for authentication purposes.
 179     videopassword:     Password for accessing a video.
 180     ap_mso:            Adobe Pass multiple-system operator identifier.
 181     ap_username:       Multiple-system operator account username.
 182     ap_password:       Multiple-system operator account password.
 183     usenetrc:          Use netrc for authentication instead.
 184     verbose:           Print additional info to stdout.
 185     quiet:             Do not print messages to stdout.
 186     no_warnings:       Do not print out anything for warnings.
 187     forceprint:        A list of templates to force print
 188     forceurl:          Force printing final URL. (Deprecated)
 189     forcetitle:        Force printing title. (Deprecated)
 190     forceid:           Force printing ID. (Deprecated)
 191     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 192     forcedescription:  Force printing description. (Deprecated)
 193     forcefilename:     Force printing final filename. (Deprecated)
 194     forceduration:     Force printing duration. (Deprecated)
 195     forcejson:         Force printing info_dict as JSON.
 196     dump_single_json:  Force printing the info_dict of the whole playlist
 197                        (or video) as a single JSON line.
 198     force_write_download_archive: Force writing download archive regardless
 199                        of 'skip_download' or 'simulate'.
 200     simulate:          Do not download the video files.
 201     format:            Video format code. see "FORMAT SELECTION" for more details.
 202     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 203     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 204                        extracting metadata even if the video is not actually
 205                        available for download (experimental)
 206     format_sort:       How to sort the video formats. see "Sorting Formats"
 207                        for more details.
 208     format_sort_force: Force the given format_sort. see "Sorting Formats"
 209                        for more details.
 210     allow_multiple_video_streams:   Allow multiple video streams to be merged
 211                        into a single file
 212     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 213                        into a single file
 214     check_formats      Whether to test if the formats are downloadable.
 215                        Can be True (check all), False (check none)
 216                        or None (check only if requested by extractor)
 217     paths:             Dictionary of output paths. The allowed keys are 'home'
 218                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 219     outtmpl:           Dictionary of templates for output names. Allowed keys
 220                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 221                        A string a also accepted for backward compatibility
 222     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 223     restrictfilenames: Do not allow "&" and spaces in file names
 224     trim_file_name:    Limit length of filename (extension excluded)
 225     windowsfilenames:  Force the filenames to be windows compatible
 226     ignoreerrors:      Do not stop on download errors
 227                        (Default True when running yt-dlp,
 228                        but False when directly accessing YoutubeDL class)
 229     skip_playlist_after_errors: Number of allowed failures until the rest of
 230                        the playlist is skipped
 231     force_generic_extractor: Force downloader to use the generic extractor
 232     overwrites:        Overwrite all video and metadata files if True,
 233                        overwrite only non-video files if None
 234                        and don't overwrite any file if False
 235     playliststart:     Playlist item to start at.
 236     playlistend:       Playlist item to end at.
 237     playlist_items:    Specific indices of playlist to download.
 238     playlistreverse:   Download playlist items in reverse order.
 239     playlistrandom:    Download playlist items in random order.
 240     matchtitle:        Download only matching titles.
 241     rejecttitle:       Reject downloads for matching titles.
 242     logger:            Log messages to a logging.Logger instance.
 243     logtostderr:       Log messages to stderr instead of stdout.
 244     writedescription:  Write the video description to a .description file
 245     writeinfojson:     Write the video description to a .info.json file
 246     clean_infojson:    Remove private fields from the infojson
 247     writecomments:     Extract video comments. This will not be written to disk
 248                        unless writeinfojson is also given
 249     writeannotations:  Write the video annotations to a .annotations.xml file
 250     writethumbnail:    Write the thumbnail image to a file
 251     allow_playlist_files: Whether to write playlists' description, infojson etc
 252                        also to disk when using the 'write*' options
 253     write_all_thumbnails:  Write all thumbnail formats to files
 254     writelink:         Write an internet shortcut file, depending on the
 255                        current platform (.url/.webloc/.desktop)
 256     writeurllink:      Write a Windows internet shortcut file (.url)
 257     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 258     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 259     writesubtitles:    Write the video subtitles to a file
 260     writeautomaticsub: Write the automatically generated subtitles to a file
 261     allsubtitles:      Deprecated - Use subtitleslangs = ['all']
 262                        Downloads all the subtitles of the video
 263                        (requires writesubtitles or writeautomaticsub)
 264     listsubtitles:     Lists all available subtitles for the video
 265     subtitlesformat:   The format code for subtitles
 266     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 267                        The list may contain "all" to refer to all the available
 268                        subtitles. The language can be prefixed with a "-" to
 269                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 270     keepvideo:         Keep the video file after post-processing
 271     daterange:         A DateRange object, download only if the upload_date is in the range.
 272     skip_download:     Skip the actual download of the video file
 273     cachedir:          Location of the cache files in the filesystem.
 274                        False to disable filesystem cache.
 275     noplaylist:        Download single video instead of a playlist if in doubt.
 276     age_limit:         An integer representing the user's age in years.
 277                        Unsuitable videos for the given age are skipped.
 278     min_views:         An integer representing the minimum view count the video
 279                        must have in order to not be skipped.
 280                        Videos without view count information are always
 281                        downloaded. None for no limit.
 282     max_views:         An integer representing the maximum view count.
 283                        Videos that are more popular than that are not
 284                        downloaded.
 285                        Videos without view count information are always
 286                        downloaded. None for no limit.
 287     download_archive:  File name of a file where all downloads are recorded.
 288                        Videos already present in the file are not downloaded
 289                        again.
 290     break_on_existing: Stop the download process after attempting to download a
 291                        file that is in the archive.
 292     break_on_reject:   Stop the download process when encountering a video that
 293                        has been filtered out.
 294     cookiefile:        File name where cookies should be read from and dumped to
 295     cookiesfrombrowser: A tuple containing the name of the browser and the profile
 296                        name/path from where cookies are loaded.
 297                        Eg: ('chrome', ) or (vivaldi, 'default')
 298     nocheckcertificate:Do not verify SSL certificates
 299     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 300                        At the moment, this is only supported by YouTube.
 301     proxy:             URL of the proxy server to use
 302     geo_verification_proxy:  URL of the proxy to use for IP address verification
 303                        on geo-restricted sites.
 304     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 305     bidi_workaround:   Work around buggy terminals without bidirectional text
 306                        support, using fridibi
 307     debug_printtraffic:Print out sent and received HTTP traffic
 308     include_ads:       Download ads as well
 309     default_search:    Prepend this string if an input url is not valid.
 310                        'auto' for elaborate guessing
 311     encoding:          Use this encoding instead of the system-specified.
 312     extract_flat:      Do not resolve URLs, return the immediate result.
 313                        Pass in 'in_playlist' to only show this behavior for
 314                        playlist items.
 315     postprocessors:    A list of dictionaries, each with an entry
 316                        * key:  The name of the postprocessor. See
 317                                yt_dlp/postprocessor/__init__.py for a list.
 318                        * when: When to run the postprocessor. Can be one of
 319                                pre_process|before_dl|post_process|after_move.
 320                                Assumed to be 'post_process' if not given
 321     post_hooks:        A list of functions that get called as the final step
 322                        for each video file, after all postprocessors have been
 323                        called. The filename will be passed as the only argument.
 324     progress_hooks:    A list of functions that get called on download
 325                        progress, with a dictionary with the entries
 326                        * status: One of "downloading", "error", or "finished".
 327                                  Check this first and ignore unknown values.
 328                        * info_dict: The extracted info_dict
 329
 330                        If status is one of "downloading", or "finished", the
 331                        following properties may also be present:
 332                        * filename: The final filename (always present)
 333                        * tmpfilename: The filename we're currently writing to
 334                        * downloaded_bytes: Bytes on disk
 335                        * total_bytes: Size of the whole file, None if unknown
 336                        * total_bytes_estimate: Guess of the eventual file size,
 337                                                None if unavailable.
 338                        * elapsed: The number of seconds since download started.
 339                        * eta: The estimated time in seconds, None if unknown
 340                        * speed: The download speed in bytes/second, None if
 341                                 unknown
 342                        * fragment_index: The counter of the currently
 343                                          downloaded video fragment.
 344                        * fragment_count: The number of fragments (= individual
 345                                          files that will be merged)
 346
 347                        Progress hooks are guaranteed to be called at least once
 348                        (with status "finished") if the download is successful.
 349     merge_output_format: Extension to use when merging formats.
 350     final_ext:         Expected final extension; used to detect when the file was
 351                        already downloaded and converted. "merge_output_format" is
 352                        replaced by this extension when given
 353     fixup:             Automatically correct known faults of the file.
 354                        One of:
 355                        - "never": do nothing
 356                        - "warn": only emit a warning
 357                        - "detect_or_warn": check whether we can do anything
 358                                            about it, warn otherwise (default)
 359     source_address:    Client-side IP address to bind to.
 360     call_home:         Boolean, true iff we are allowed to contact the
 361                        yt-dlp servers for debugging. (BROKEN)
 362     sleep_interval_requests: Number of seconds to sleep between requests
 363                        during extraction
 364     sleep_interval:    Number of seconds to sleep before each download when
 365                        used alone or a lower bound of a range for randomized
 366                        sleep before each download (minimum possible number
 367                        of seconds to sleep) when used along with
 368                        max_sleep_interval.
 369     max_sleep_interval:Upper bound of a range for randomized sleep before each
 370                        download (maximum possible number of seconds to sleep).
 371                        Must only be used along with sleep_interval.
 372                        Actual sleep time will be a random float from range
 373                        [sleep_interval; max_sleep_interval].
 374     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 375     listformats:       Print an overview of available video formats and exit.
 376     list_thumbnails:   Print a table of all thumbnails and exit.
 377     match_filter:      A function that gets called with the info_dict of
 378                        every video.
 379                        If it returns a message, the video is ignored.
 380                        If it returns None, the video is downloaded.
 381                        match_filter_func in utils.py is one example for this.
 382     no_color:          Do not emit color codes in output.
 383     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 384                        HTTP header
 385     geo_bypass_country:
 386                        Two-letter ISO 3166-2 country code that will be used for
 387                        explicit geographic restriction bypassing via faking
 388                        X-Forwarded-For HTTP header
 389     geo_bypass_ip_block:
 390                        IP range in CIDR notation that will be used similarly to
 391                        geo_bypass_country
 392
 393     The following options determine which downloader is picked:
 394     external_downloader: A dictionary of protocol keys and the executable of the
 395                        external downloader to use for it. The allowed protocols
 396                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 397                        Set the value to 'native' to use the native downloader
 398     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 399                        or {'m3u8': 'ffmpeg'} instead.
 400                        Use the native HLS downloader instead of ffmpeg/avconv
 401                        if True, otherwise use ffmpeg/avconv if False, otherwise
 402                        use downloader suggested by extractor if None.
 403     compat_opts:       Compatibility options. See "Differences in default behavior".
 404                        The following options do not work when used through the API:
 405                        filename, abort-on-error, multistreams, no-live-chat,
 406                        no-playlist-metafiles. Refer __init__.py for their implementation
 407
 408     The following parameters are not used by YoutubeDL itself, they are used by
 409     the downloader (see yt_dlp/downloader/common.py):
 410     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 411     max_filesize, test, noresizebuffer, retries, continuedl, noprogress, consoletitle,
 412     xattr_set_filesize, external_downloader_args, hls_use_mpegts, http_chunk_size.
 413
 414     The following options are used by the post processors:
 415     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 416                        otherwise prefer ffmpeg. (avconv support is deprecated)
 417     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 418                        to the binary or its containing directory.
 419     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 420                         and a list of additional command-line arguments for the
 421                         postprocessor/executable. The dict can also have "PP+EXE" keys
 422                         which are used when the given exe is used by the given PP.
 423                         Use 'default' as the name for arguments to passed to all PP
 424
 425     The following options are used by the extractors:
 426     extractor_retries: Number of times to retry for known errors
 427     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 428     hls_split_discontinuity: Split HLS playlists to different formats at
 429                        discontinuities such as ad breaks (default: False)
 430     extractor_args:    A dictionary of arguments to be passed to the extractors.
 431                        See "EXTRACTOR ARGUMENTS" for details.
 432                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 433     youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
 434                        If True (default), DASH manifests and related
 435                        data will be downloaded and processed by extractor.
 436                        You can reduce network I/O by disabling it if you don't
 437                        care about DASH. (only for youtube)
 438     youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
 439                        If True (default), HLS manifests and related
 440                        data will be downloaded and processed by extractor.
 441                        You can reduce network I/O by disabling it if you don't
 442                        care about HLS. (only for youtube)
 443     """
 444
 445     _NUMERIC_FIELDS = set((
 446         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 447         'timestamp', 'upload_year', 'upload_month', 'upload_day',
 448         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 449         'average_rating', 'comment_count', 'age_limit',
 450         'start_time', 'end_time',
 451         'chapter_number', 'season_number', 'episode_number',
 452         'track_number', 'disc_number', 'release_year',
 453         'playlist_index',
 454     ))
 455
 456     params = None
 457     _ies = []
 458     _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 459     _printed_messages = set()
 460     _first_webpage_request = True
 461     _download_retcode = None
 462     _num_downloads = None
 463     _playlist_level = 0
 464     _playlist_urls = set()
 465     _screen_file = None
 466
 467     def __init__(self, params=None, auto_init=True):
 468         """Create a FileDownloader object with the given options."""
 469         if params is None:
 470             params = {}
 471         self._ies = []
 472         self._ies_instances = {}
 473         self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 474         self._printed_messages = set()
 475         self._first_webpage_request = True
 476         self._post_hooks = []
 477         self._progress_hooks = []
 478         self._download_retcode = 0
 479         self._num_downloads = 0
 480         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 481         self._err_file = sys.stderr
 482         self.params = {
 483             # Default parameters
 484             'nocheckcertificate': False,
 485         }
 486         self.params.update(params)
 487         self.cache = Cache(self)
 488
 489         if sys.version_info < (3, 6):
 490             self.report_warning(
 491                 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 492
 493         def check_deprecated(param, option, suggestion):
 494             if self.params.get(param) is not None:
 495                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 496                 return True
 497             return False
 498
 499         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 500             if self.params.get('geo_verification_proxy') is None:
 501                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 502
 503         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 504         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 505         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 506
 507         for msg in self.params.get('warnings', []):
 508             self.report_warning(msg)
 509
 510         if self.params.get('final_ext'):
 511             if self.params.get('merge_output_format'):
 512                 self.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
 513             self.params['merge_output_format'] = self.params['final_ext']
 514
 515         if 'overwrites' in self.params and self.params['overwrites'] is None:
 516             del self.params['overwrites']
 517
 518         if params.get('bidi_workaround', False):
 519             try:
 520                 import pty
 521                 master, slave = pty.openpty()
 522                 width = compat_get_terminal_size().columns
 523                 if width is None:
 524                     width_args = []
 525                 else:
 526                     width_args = ['-w', str(width)]
 527                 sp_kwargs = dict(
 528                     stdin=subprocess.PIPE,
 529                     stdout=slave,
 530                     stderr=self._err_file)
 531                 try:
 532                     self._output_process = subprocess.Popen(
 533                         ['bidiv'] + width_args, **sp_kwargs
 534                     )
 535                 except OSError:
 536                     self._output_process = subprocess.Popen(
 537                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 538                 self._output_channel = os.fdopen(master, 'rb')
 539             except OSError as ose:
 540                 if ose.errno == errno.ENOENT:
 541                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 542                 else:
 543                     raise
 544
 545         if (sys.platform != 'win32'
 546                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 547                 and not params.get('restrictfilenames', False)):
 548             # Unicode filesystem API will throw errors (#1474, #13027)
 549             self.report_warning(
 550                 'Assuming --restrict-filenames since file system encoding '
 551                 'cannot encode all characters. '
 552                 'Set the LC_ALL environment variable to fix this.')
 553             self.params['restrictfilenames'] = True
 554
 555         self.outtmpl_dict = self.parse_outtmpl()
 556
 557         # Creating format selector here allows us to catch syntax errors before the extraction
 558         self.format_selector = (
 559             None if self.params.get('format') is None
 560             else self.build_format_selector(self.params['format']))
 561
 562         self._setup_opener()
 563
 564         """Preload the archive, if any is specified"""
 565         def preload_download_archive(fn):
 566             if fn is None:
 567                 return False
 568             self.write_debug('Loading archive file %r\n' % fn)
 569             try:
 570                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 571                     for line in archive_file:
 572                         self.archive.add(line.strip())
 573             except IOError as ioe:
 574                 if ioe.errno != errno.ENOENT:
 575                     raise
 576                 return False
 577             return True
 578
 579         self.archive = set()
 580         preload_download_archive(self.params.get('download_archive'))
 581
 582         if auto_init:
 583             self.print_debug_header()
 584             self.add_default_info_extractors()
 585
 586         for pp_def_raw in self.params.get('postprocessors', []):
 587             pp_def = dict(pp_def_raw)
 588             when = pp_def.pop('when', 'post_process')
 589             pp_class = get_postprocessor(pp_def.pop('key'))
 590             pp = pp_class(self, **compat_kwargs(pp_def))
 591             self.add_post_processor(pp, when=when)
 592
 593         for ph in self.params.get('post_hooks', []):
 594             self.add_post_hook(ph)
 595
 596         for ph in self.params.get('progress_hooks', []):
 597             self.add_progress_hook(ph)
 598
 599         register_socks_protocols()
 600
 601     def warn_if_short_id(self, argv):
 602         # short YouTube ID starting with dash?
 603         idxs = [
 604             i for i, a in enumerate(argv)
 605             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 606         if idxs:
 607             correct_argv = (
 608                 ['yt-dlp']
 609                 + [a for i, a in enumerate(argv) if i not in idxs]
 610                 + ['--'] + [argv[i] for i in idxs]
 611             )
 612             self.report_warning(
 613                 'Long argument string detected. '
 614                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 615                 args_to_str(correct_argv))
 616
 617     def add_info_extractor(self, ie):
 618         """Add an InfoExtractor object to the end of the list."""
 619         self._ies.append(ie)
 620         if not isinstance(ie, type):
 621             self._ies_instances[ie.ie_key()] = ie
 622             ie.set_downloader(self)
 623
 624     def get_info_extractor(self, ie_key):
 625         """
 626         Get an instance of an IE with name ie_key, it will try to get one from
 627         the _ies list, if there's no instance it will create a new one and add
 628         it to the extractor list.
 629         """
 630         ie = self._ies_instances.get(ie_key)
 631         if ie is None:
 632             ie = get_info_extractor(ie_key)()
 633             self.add_info_extractor(ie)
 634         return ie
 635
 636     def add_default_info_extractors(self):
 637         """
 638         Add the InfoExtractors returned by gen_extractors to the end of the list
 639         """
 640         for ie in gen_extractor_classes():
 641             self.add_info_extractor(ie)
 642
 643     def add_post_processor(self, pp, when='post_process'):
 644         """Add a PostProcessor object to the end of the chain."""
 645         self._pps[when].append(pp)
 646         pp.set_downloader(self)
 647
 648     def add_post_hook(self, ph):
 649         """Add the post hook"""
 650         self._post_hooks.append(ph)
 651
 652     def add_progress_hook(self, ph):
 653         """Add the progress hook (currently only for the file downloader)"""
 654         self._progress_hooks.append(ph)
 655
 656     def _bidi_workaround(self, message):
 657         if not hasattr(self, '_output_channel'):
 658             return message
 659
 660         assert hasattr(self, '_output_process')
 661         assert isinstance(message, compat_str)
 662         line_count = message.count('\n') + 1
 663         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 664         self._output_process.stdin.flush()
 665         res = ''.join(self._output_channel.readline().decode('utf-8')
 666                       for _ in range(line_count))
 667         return res[:-len('\n')]
 668
 669     def _write_string(self, message, out=None, only_once=False):
 670         if only_once:
 671             if message in self._printed_messages:
 672                 return
 673             self._printed_messages.add(message)
 674         write_string(message, out=out, encoding=self.params.get('encoding'))
 675
 676     def to_stdout(self, message, skip_eol=False, quiet=False):
 677         """Print message to stdout"""
 678         if self.params.get('logger'):
 679             self.params['logger'].debug(message)
 680         elif not quiet or self.params.get('verbose'):
 681             self._write_string(
 682                 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 683                 self._err_file if quiet else self._screen_file)
 684
 685     def to_stderr(self, message, only_once=False):
 686         """Print message to stderr"""
 687         assert isinstance(message, compat_str)
 688         if self.params.get('logger'):
 689             self.params['logger'].error(message)
 690         else:
 691             self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
 692
 693     def to_console_title(self, message):
 694         if not self.params.get('consoletitle', False):
 695             return
 696         if compat_os_name == 'nt':
 697             if ctypes.windll.kernel32.GetConsoleWindow():
 698                 # c_wchar_p() might not be necessary if `message` is
 699                 # already of type unicode()
 700                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 701         elif 'TERM' in os.environ:
 702             self._write_string('\033]0;%s\007' % message, self._screen_file)
 703
 704     def save_console_title(self):
 705         if not self.params.get('consoletitle', False):
 706             return
 707         if self.params.get('simulate', False):
 708             return
 709         if compat_os_name != 'nt' and 'TERM' in os.environ:
 710             # Save the title on stack
 711             self._write_string('\033[22;0t', self._screen_file)
 712
 713     def restore_console_title(self):
 714         if not self.params.get('consoletitle', False):
 715             return
 716         if self.params.get('simulate', False):
 717             return
 718         if compat_os_name != 'nt' and 'TERM' in os.environ:
 719             # Restore the title from stack
 720             self._write_string('\033[23;0t', self._screen_file)
 721
 722     def __enter__(self):
 723         self.save_console_title()
 724         return self
 725
 726     def __exit__(self, *args):
 727         self.restore_console_title()
 728
 729         if self.params.get('cookiefile') is not None:
 730             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 731
 732     def trouble(self, message=None, tb=None):
 733         """Determine action to take when a download problem appears.
 734
 735         Depending on if the downloader has been configured to ignore
 736         download errors or not, this method may throw an exception or
 737         not when errors are found, after printing the message.
 738
 739         tb, if given, is additional traceback information.
 740         """
 741         if message is not None:
 742             self.to_stderr(message)
 743         if self.params.get('verbose'):
 744             if tb is None:
 745                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 746                     tb = ''
 747                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 748                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 749                     tb += encode_compat_str(traceback.format_exc())
 750                 else:
 751                     tb_data = traceback.format_list(traceback.extract_stack())
 752                     tb = ''.join(tb_data)
 753             if tb:
 754                 self.to_stderr(tb)
 755         if not self.params.get('ignoreerrors', False):
 756             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 757                 exc_info = sys.exc_info()[1].exc_info
 758             else:
 759                 exc_info = sys.exc_info()
 760             raise DownloadError(message, exc_info)
 761         self._download_retcode = 1
 762
 763     def to_screen(self, message, skip_eol=False):
 764         """Print message to stdout if not in quiet mode"""
 765         self.to_stdout(
 766             message, skip_eol, quiet=self.params.get('quiet', False))
 767
 768     def report_warning(self, message, only_once=False):
 769         '''
 770         Print the message to stderr, it will be prefixed with 'WARNING:'
 771         If stderr is a tty file the 'WARNING:' will be colored
 772         '''
 773         if self.params.get('logger') is not None:
 774             self.params['logger'].warning(message)
 775         else:
 776             if self.params.get('no_warnings'):
 777                 return
 778             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 779                 _msg_header = '\033[0;33mWARNING:\033[0m'
 780             else:
 781                 _msg_header = 'WARNING:'
 782             warning_message = '%s %s' % (_msg_header, message)
 783             self.to_stderr(warning_message, only_once)
 784
 785     def report_error(self, message, tb=None):
 786         '''
 787         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 788         in red if stderr is a tty file.
 789         '''
 790         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 791             _msg_header = '\033[0;31mERROR:\033[0m'
 792         else:
 793             _msg_header = 'ERROR:'
 794         error_message = '%s %s' % (_msg_header, message)
 795         self.trouble(error_message, tb)
 796
 797     def write_debug(self, message, only_once=False):
 798         '''Log debug message or Print message to stderr'''
 799         if not self.params.get('verbose', False):
 800             return
 801         message = '[debug] %s' % message
 802         if self.params.get('logger'):
 803             self.params['logger'].debug(message)
 804         else:
 805             self.to_stderr(message, only_once)
 806
 807     def report_file_already_downloaded(self, file_name):
 808         """Report file has already been fully downloaded."""
 809         try:
 810             self.to_screen('[download] %s has already been downloaded' % file_name)
 811         except UnicodeEncodeError:
 812             self.to_screen('[download] The file has already been downloaded')
 813
 814     def report_file_delete(self, file_name):
 815         """Report that existing file will be deleted."""
 816         try:
 817             self.to_screen('Deleting existing file %s' % file_name)
 818         except UnicodeEncodeError:
 819             self.to_screen('Deleting existing file')
 820
 821     def parse_outtmpl(self):
 822         outtmpl_dict = self.params.get('outtmpl', {})
 823         if not isinstance(outtmpl_dict, dict):
 824             outtmpl_dict = {'default': outtmpl_dict}
 825         outtmpl_dict.update({
 826             k: v for k, v in DEFAULT_OUTTMPL.items()
 827             if not outtmpl_dict.get(k)})
 828         for key, val in outtmpl_dict.items():
 829             if isinstance(val, bytes):
 830                 self.report_warning(
 831                     'Parameter outtmpl is bytes, but should be a unicode string. '
 832                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 833         return outtmpl_dict
 834
 835     def get_output_path(self, dir_type='', filename=None):
 836         paths = self.params.get('paths', {})
 837         assert isinstance(paths, dict)
 838         path = os.path.join(
 839             expand_path(paths.get('home', '').strip()),
 840             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
 841             filename or '')
 842
 843         # Temporary fix for #4787
 844         # 'Treat' all problem characters by passing filename through preferredencoding
 845         # to workaround encoding issues with subprocess on python2 @ Windows
 846         if sys.version_info < (3, 0) and sys.platform == 'win32':
 847             path = encodeFilename(path, True).decode(preferredencoding())
 848         return sanitize_path(path, force=self.params.get('windowsfilenames'))
 849
 850     @staticmethod
 851     def _outtmpl_expandpath(outtmpl):
 852         # expand_path translates '%%' into '%' and '$$' into '$'
 853         # correspondingly that is not what we want since we need to keep
 854         # '%%' intact for template dict substitution step. Working around
 855         # with boundary-alike separator hack.
 856         sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 857         outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 858
 859         # outtmpl should be expand_path'ed before template dict substitution
 860         # because meta fields may contain env variables we don't want to
 861         # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 862         # title "Hello $PATH", we don't want `$PATH` to be expanded.
 863         return expand_path(outtmpl).replace(sep, '')
 864
 865     @staticmethod
 866     def escape_outtmpl(outtmpl):
 867         ''' Escape any remaining strings like %s, %abc% etc. '''
 868         return re.sub(
 869             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
 870             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
 871             outtmpl)
 872
 873     @classmethod
 874     def validate_outtmpl(cls, outtmpl):
 875         ''' @return None or Exception object '''
 876         outtmpl = re.sub(
 877             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljq]'),
 878             lambda mobj: f'{mobj.group(0)[:-1]}s',
 879             cls._outtmpl_expandpath(outtmpl))
 880         try:
 881             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
 882             return None
 883         except ValueError as err:
 884             return err
 885
 886     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
 887         """ Make the template and info_dict suitable for substitution : ydl.outtmpl_escape(outtmpl) % info_dict """
 888         info_dict = dict(info_dict)
 889         na = self.params.get('outtmpl_na_placeholder', 'NA')
 890
 891         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
 892             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
 893             if info_dict.get('duration', None) is not None
 894             else None)
 895         info_dict['epoch'] = int(time.time())
 896         info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
 897         if info_dict.get('resolution') is None:
 898             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
 899
 900         # For fields playlist_index and autonumber convert all occurrences
 901         # of %(field)s to %(field)0Nd for backward compatibility
 902         field_size_compat_map = {
 903             'playlist_index': len(str(info_dict.get('_last_playlist_index') or '')),
 904             'autonumber': self.params.get('autonumber_size') or 5,
 905         }
 906
 907         TMPL_DICT = {}
 908         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljq]'))
 909         MATH_FUNCTIONS = {
 910             '+': float.__add__,
 911             '-': float.__sub__,
 912         }
 913         # Field is of the form key1.key2...
 914         # where keys (except first) can be string, int or slice
 915         FIELD_RE = r'\w+(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
 916         MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
 917         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
 918         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
 919             (?P<negate>-)?
 920             (?P<fields>{field})
 921             (?P<maths>(?:{math_op}{math_field})*)
 922             (?:>(?P<strf_format>.+?))?
 923             (?:\|(?P<default>.*?))?
 924             $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
 925
 926         get_key = lambda k: traverse_obj(
 927             info_dict, k.split('.'), is_user_input=True, traverse_string=True)
 928
 929         def get_value(mdict):
 930             # Object traversal
 931             value = get_key(mdict['fields'])
 932             # Negative
 933             if mdict['negate']:
 934                 value = float_or_none(value)
 935                 if value is not None:
 936                     value *= -1
 937             # Do maths
 938             offset_key = mdict['maths']
 939             if offset_key:
 940                 value = float_or_none(value)
 941                 operator = None
 942                 while offset_key:
 943                     item = re.match(
 944                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
 945                         offset_key).group(0)
 946                     offset_key = offset_key[len(item):]
 947                     if operator is None:
 948                         operator = MATH_FUNCTIONS[item]
 949                         continue
 950                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
 951                     offset = float_or_none(item)
 952                     if offset is None:
 953                         offset = float_or_none(get_key(item))
 954                     try:
 955                         value = operator(value, multiplier * offset)
 956                     except (TypeError, ZeroDivisionError):
 957                         return None
 958                     operator = None
 959             # Datetime formatting
 960             if mdict['strf_format']:
 961                 value = strftime_or_none(value, mdict['strf_format'])
 962
 963             return value
 964
 965         def create_key(outer_mobj):
 966             if not outer_mobj.group('has_key'):
 967                 return f'%{outer_mobj.group(0)}'
 968
 969             prefix = outer_mobj.group('prefix')
 970             key = outer_mobj.group('key')
 971             original_fmt = fmt = outer_mobj.group('format')
 972             mobj = re.match(INTERNAL_FORMAT_RE, key)
 973             if mobj is None:
 974                 value, default, mobj = None, na, {'fields': ''}
 975             else:
 976                 mobj = mobj.groupdict()
 977                 default = mobj['default'] if mobj['default'] is not None else na
 978                 value = get_value(mobj)
 979
 980             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
 981                 fmt = '0{:d}d'.format(field_size_compat_map[key])
 982
 983             value = default if value is None else value
 984
 985             str_fmt = f'{fmt[:-1]}s'
 986             if fmt[-1] == 'l':
 987                 value, fmt = ', '.join(variadic(value)), str_fmt
 988             elif fmt[-1] == 'j':
 989                 value, fmt = json.dumps(value), str_fmt
 990             elif fmt[-1] == 'q':
 991                 value, fmt = compat_shlex_quote(str(value)), str_fmt
 992             elif fmt[-1] == 'c':
 993                 value = str(value)
 994                 if value is None:
 995                     value, fmt = default, 's'
 996                 else:
 997                     value = value[0]
 998             elif fmt[-1] not in 'rs':  # numeric
 999                 value = float_or_none(value)
1000                 if value is None:
1001                     value, fmt = default, 's'
1002
1003             if sanitize:
1004                 if fmt[-1] == 'r':
1005                     # If value is an object, sanitize might convert it to a string
1006                     # So we convert it to repr first
1007                     value, fmt = repr(value), str_fmt
1008                 if fmt[-1] in 'csr':
1009                     value = sanitize(mobj['fields'].split('.')[-1], value)
1010
1011             key = '%s\0%s' % (key.replace('%', '%\0'), original_fmt)
1012             TMPL_DICT[key] = value
1013             return f'{prefix}%({key}){fmt}'
1014
1015         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1016
1017     def _prepare_filename(self, info_dict, tmpl_type='default'):
1018         try:
1019             sanitize = lambda k, v: sanitize_filename(
1020                 compat_str(v),
1021                 restricted=self.params.get('restrictfilenames'),
1022                 is_id=(k == 'id' or k.endswith('_id')))
1023             outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
1024             outtmpl, template_dict = self.prepare_outtmpl(outtmpl, info_dict, sanitize)
1025             outtmpl = self.escape_outtmpl(self._outtmpl_expandpath(outtmpl))
1026             filename = outtmpl % template_dict
1027
1028             force_ext = OUTTMPL_TYPES.get(tmpl_type)
1029             if force_ext is not None:
1030                 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1031
1032             # https://github.com/blackjack4494/youtube-dlc/issues/85
1033             trim_file_name = self.params.get('trim_file_name', False)
1034             if trim_file_name:
1035                 fn_groups = filename.rsplit('.')
1036                 ext = fn_groups[-1]
1037                 sub_ext = ''
1038                 if len(fn_groups) > 2:
1039                     sub_ext = fn_groups[-2]
1040                 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
1041
1042             return filename
1043         except ValueError as err:
1044             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1045             return None
1046
1047     def prepare_filename(self, info_dict, dir_type='', warn=False):
1048         """Generate the output filename."""
1049
1050         filename = self._prepare_filename(info_dict, dir_type or 'default')
1051
1052         if warn:
1053             if not self.params.get('paths'):
1054                 pass
1055             elif filename == '-':
1056                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1057             elif os.path.isabs(filename):
1058                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1059             self.__prepare_filename_warned = True
1060         if filename == '-' or not filename:
1061             return filename
1062
1063         return self.get_output_path(dir_type, filename)
1064
1065     def _match_entry(self, info_dict, incomplete=False, silent=False):
1066         """ Returns None if the file should be downloaded """
1067
1068         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1069
1070         def check_filter():
1071             if 'title' in info_dict:
1072                 # This can happen when we're just evaluating the playlist
1073                 title = info_dict['title']
1074                 matchtitle = self.params.get('matchtitle', False)
1075                 if matchtitle:
1076                     if not re.search(matchtitle, title, re.IGNORECASE):
1077                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1078                 rejecttitle = self.params.get('rejecttitle', False)
1079                 if rejecttitle:
1080                     if re.search(rejecttitle, title, re.IGNORECASE):
1081                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1082             date = info_dict.get('upload_date')
1083             if date is not None:
1084                 dateRange = self.params.get('daterange', DateRange())
1085                 if date not in dateRange:
1086                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1087             view_count = info_dict.get('view_count')
1088             if view_count is not None:
1089                 min_views = self.params.get('min_views')
1090                 if min_views is not None and view_count < min_views:
1091                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1092                 max_views = self.params.get('max_views')
1093                 if max_views is not None and view_count > max_views:
1094                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1095             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1096                 return 'Skipping "%s" because it is age restricted' % video_title
1097
1098             if not incomplete:
1099                 match_filter = self.params.get('match_filter')
1100                 if match_filter is not None:
1101                     ret = match_filter(info_dict)
1102                     if ret is not None:
1103                         return ret
1104             return None
1105
1106         if self.in_download_archive(info_dict):
1107             reason = '%s has already been recorded in the archive' % video_title
1108             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1109         else:
1110             reason = check_filter()
1111             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1112         if reason is not None:
1113             if not silent:
1114                 self.to_screen('[download] ' + reason)
1115             if self.params.get(break_opt, False):
1116                 raise break_err()
1117         return reason
1118
1119     @staticmethod
1120     def add_extra_info(info_dict, extra_info):
1121         '''Set the keys from extra_info in info dict if they are missing'''
1122         for key, value in extra_info.items():
1123             info_dict.setdefault(key, value)
1124
1125     def extract_info(self, url, download=True, ie_key=None, extra_info={},
1126                      process=True, force_generic_extractor=False):
1127         """
1128         Return a list with a dictionary for each video extracted.
1129
1130         Arguments:
1131         url -- URL to extract
1132
1133         Keyword arguments:
1134         download -- whether to download videos during extraction
1135         ie_key -- extractor key hint
1136         extra_info -- dictionary containing the extra values to add to each result
1137         process -- whether to resolve all unresolved references (URLs, playlist items),
1138             must be True for download to work.
1139         force_generic_extractor -- force using the generic extractor
1140         """
1141
1142         if not ie_key and force_generic_extractor:
1143             ie_key = 'Generic'
1144
1145         if ie_key:
1146             ies = [self.get_info_extractor(ie_key)]
1147         else:
1148             ies = self._ies
1149
1150         for ie in ies:
1151             if not ie.suitable(url):
1152                 continue
1153
1154             ie_key = ie.ie_key()
1155             ie = self.get_info_extractor(ie_key)
1156             if not ie.working():
1157                 self.report_warning('The program functionality for this site has been marked as broken, '
1158                                     'and will probably not work.')
1159
1160             try:
1161                 temp_id = str_or_none(
1162                     ie.extract_id(url) if callable(getattr(ie, 'extract_id', None))
1163                     else ie._match_id(url))
1164             except (AssertionError, IndexError, AttributeError):
1165                 temp_id = None
1166             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1167                 self.to_screen("[%s] %s: has already been recorded in archive" % (
1168                                ie_key, temp_id))
1169                 break
1170             return self.__extract_info(url, ie, download, extra_info, process)
1171         else:
1172             self.report_error('no suitable InfoExtractor for URL %s' % url)
1173
1174     def __handle_extraction_exceptions(func, handle_all_errors=True):
1175         def wrapper(self, *args, **kwargs):
1176             try:
1177                 return func(self, *args, **kwargs)
1178             except GeoRestrictedError as e:
1179                 msg = e.msg
1180                 if e.countries:
1181                     msg += '\nThis video is available in %s.' % ', '.join(
1182                         map(ISO3166Utils.short2full, e.countries))
1183                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1184                 self.report_error(msg)
1185             except ExtractorError as e:  # An error we somewhat expected
1186                 self.report_error(compat_str(e), e.format_traceback())
1187             except ThrottledDownload:
1188                 self.to_stderr('\r')
1189                 self.report_warning('The download speed is below throttle limit. Re-extracting data')
1190                 return wrapper(self, *args, **kwargs)
1191             except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
1192                 raise
1193             except Exception as e:
1194                 if handle_all_errors and self.params.get('ignoreerrors', False):
1195                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
1196                 else:
1197                     raise
1198         return wrapper
1199
1200     @__handle_extraction_exceptions
1201     def __extract_info(self, url, ie, download, extra_info, process):
1202         ie_result = ie.extract(url)
1203         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1204             return
1205         if isinstance(ie_result, list):
1206             # Backwards compatibility: old IE result format
1207             ie_result = {
1208                 '_type': 'compat_list',
1209                 'entries': ie_result,
1210             }
1211         if extra_info.get('original_url'):
1212             ie_result.setdefault('original_url', extra_info['original_url'])
1213         self.add_default_extra_info(ie_result, ie, url)
1214         if process:
1215             return self.process_ie_result(ie_result, download, extra_info)
1216         else:
1217             return ie_result
1218
1219     def add_default_extra_info(self, ie_result, ie, url):
1220         if url is not None:
1221             self.add_extra_info(ie_result, {
1222                 'webpage_url': url,
1223                 'original_url': url,
1224                 'webpage_url_basename': url_basename(url),
1225             })
1226         if ie is not None:
1227             self.add_extra_info(ie_result, {
1228                 'extractor': ie.IE_NAME,
1229                 'extractor_key': ie.ie_key(),
1230             })
1231
1232     def process_ie_result(self, ie_result, download=True, extra_info={}):
1233         """
1234         Take the result of the ie(may be modified) and resolve all unresolved
1235         references (URLs, playlist items).
1236
1237         It will also download the videos if 'download'.
1238         Returns the resolved ie_result.
1239         """
1240         result_type = ie_result.get('_type', 'video')
1241
1242         if result_type in ('url', 'url_transparent'):
1243             ie_result['url'] = sanitize_url(ie_result['url'])
1244             if ie_result.get('original_url'):
1245                 extra_info.setdefault('original_url', ie_result['original_url'])
1246
1247             extract_flat = self.params.get('extract_flat', False)
1248             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1249                     or extract_flat is True):
1250                 info_copy = ie_result.copy()
1251                 self.add_extra_info(info_copy, extra_info)
1252                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1253                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1254                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1255                 return ie_result
1256
1257         if result_type == 'video':
1258             self.add_extra_info(ie_result, extra_info)
1259             ie_result = self.process_video_result(ie_result, download=download)
1260             additional_urls = (ie_result or {}).get('additional_urls')
1261             if additional_urls:
1262                 # TODO: Improve MetadataFromFieldPP to allow setting a list
1263                 if isinstance(additional_urls, compat_str):
1264                     additional_urls = [additional_urls]
1265                 self.to_screen(
1266                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1267                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1268                 ie_result['additional_entries'] = [
1269                     self.extract_info(
1270                         url, download, extra_info,
1271                         force_generic_extractor=self.params.get('force_generic_extractor'))
1272                     for url in additional_urls
1273                 ]
1274             return ie_result
1275         elif result_type == 'url':
1276             # We have to add extra_info to the results because it may be
1277             # contained in a playlist
1278             return self.extract_info(
1279                 ie_result['url'], download,
1280                 ie_key=ie_result.get('ie_key'),
1281                 extra_info=extra_info)
1282         elif result_type == 'url_transparent':
1283             # Use the information from the embedding page
1284             info = self.extract_info(
1285                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1286                 extra_info=extra_info, download=False, process=False)
1287
1288             # extract_info may return None when ignoreerrors is enabled and
1289             # extraction failed with an error, don't crash and return early
1290             # in this case
1291             if not info:
1292                 return info
1293
1294             force_properties = dict(
1295                 (k, v) for k, v in ie_result.items() if v is not None)
1296             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1297                 if f in force_properties:
1298                     del force_properties[f]
1299             new_result = info.copy()
1300             new_result.update(force_properties)
1301
1302             # Extracted info may not be a video result (i.e.
1303             # info.get('_type', 'video') != video) but rather an url or
1304             # url_transparent. In such cases outer metadata (from ie_result)
1305             # should be propagated to inner one (info). For this to happen
1306             # _type of info should be overridden with url_transparent. This
1307             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1308             if new_result.get('_type') == 'url':
1309                 new_result['_type'] = 'url_transparent'
1310
1311             return self.process_ie_result(
1312                 new_result, download=download, extra_info=extra_info)
1313         elif result_type in ('playlist', 'multi_video'):
1314             # Protect from infinite recursion due to recursively nested playlists
1315             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1316             webpage_url = ie_result['webpage_url']
1317             if webpage_url in self._playlist_urls:
1318                 self.to_screen(
1319                     '[download] Skipping already downloaded playlist: %s'
1320                     % ie_result.get('title') or ie_result.get('id'))
1321                 return
1322
1323             self._playlist_level += 1
1324             self._playlist_urls.add(webpage_url)
1325             self._sanitize_thumbnails(ie_result)
1326             try:
1327                 return self.__process_playlist(ie_result, download)
1328             finally:
1329                 self._playlist_level -= 1
1330                 if not self._playlist_level:
1331                     self._playlist_urls.clear()
1332         elif result_type == 'compat_list':
1333             self.report_warning(
1334                 'Extractor %s returned a compat_list result. '
1335                 'It needs to be updated.' % ie_result.get('extractor'))
1336
1337             def _fixup(r):
1338                 self.add_extra_info(
1339                     r,
1340                     {
1341                         'extractor': ie_result['extractor'],
1342                         'webpage_url': ie_result['webpage_url'],
1343                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
1344                         'extractor_key': ie_result['extractor_key'],
1345                     }
1346                 )
1347                 return r
1348             ie_result['entries'] = [
1349                 self.process_ie_result(_fixup(r), download, extra_info)
1350                 for r in ie_result['entries']
1351             ]
1352             return ie_result
1353         else:
1354             raise Exception('Invalid result type: %s' % result_type)
1355
1356     def _ensure_dir_exists(self, path):
1357         return make_dir(path, self.report_error)
1358
1359     def __process_playlist(self, ie_result, download):
1360         # We process each entry in the playlist
1361         playlist = ie_result.get('title') or ie_result.get('id')
1362         self.to_screen('[download] Downloading playlist: %s' % playlist)
1363
1364         if 'entries' not in ie_result:
1365             raise EntryNotInPlaylist()
1366         incomplete_entries = bool(ie_result.get('requested_entries'))
1367         if incomplete_entries:
1368             def fill_missing_entries(entries, indexes):
1369                 ret = [None] * max(*indexes)
1370                 for i, entry in zip(indexes, entries):
1371                     ret[i - 1] = entry
1372                 return ret
1373             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1374
1375         playlist_results = []
1376
1377         playliststart = self.params.get('playliststart', 1)
1378         playlistend = self.params.get('playlistend')
1379         # For backwards compatibility, interpret -1 as whole list
1380         if playlistend == -1:
1381             playlistend = None
1382
1383         playlistitems_str = self.params.get('playlist_items')
1384         playlistitems = None
1385         if playlistitems_str is not None:
1386             def iter_playlistitems(format):
1387                 for string_segment in format.split(','):
1388                     if '-' in string_segment:
1389                         start, end = string_segment.split('-')
1390                         for item in range(int(start), int(end) + 1):
1391                             yield int(item)
1392                     else:
1393                         yield int(string_segment)
1394             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1395
1396         ie_entries = ie_result['entries']
1397         msg = (
1398             'Downloading %d videos' if not isinstance(ie_entries, list)
1399             else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
1400         if not isinstance(ie_entries, (list, PagedList)):
1401             ie_entries = LazyList(ie_entries)
1402
1403         def get_entry(i):
1404             return YoutubeDL.__handle_extraction_exceptions(
1405                 lambda self, i: ie_entries[i - 1],
1406                 False
1407             )(self, i)
1408
1409         entries = []
1410         for i in playlistitems or itertools.count(playliststart):
1411             if playlistitems is None and playlistend is not None and playlistend < i:
1412                 break
1413             entry = None
1414             try:
1415                 entry = get_entry(i)
1416                 if entry is None:
1417                     raise EntryNotInPlaylist()
1418             except (IndexError, EntryNotInPlaylist):
1419                 if incomplete_entries:
1420                     raise EntryNotInPlaylist()
1421                 elif not playlistitems:
1422                     break
1423             entries.append(entry)
1424             try:
1425                 if entry is not None:
1426                     self._match_entry(entry, incomplete=True, silent=True)
1427             except (ExistingVideoReached, RejectedVideoReached):
1428                 break
1429         ie_result['entries'] = entries
1430
1431         # Save playlist_index before re-ordering
1432         entries = [
1433             ((playlistitems[i - 1] if playlistitems else i), entry)
1434             for i, entry in enumerate(entries, 1)
1435             if entry is not None]
1436         n_entries = len(entries)
1437
1438         if not playlistitems and (playliststart or playlistend):
1439             playlistitems = list(range(playliststart, playliststart + n_entries))
1440         ie_result['requested_entries'] = playlistitems
1441
1442         if self.params.get('allow_playlist_files', True):
1443             ie_copy = {
1444                 'playlist': playlist,
1445                 'playlist_id': ie_result.get('id'),
1446                 'playlist_title': ie_result.get('title'),
1447                 'playlist_uploader': ie_result.get('uploader'),
1448                 'playlist_uploader_id': ie_result.get('uploader_id'),
1449                 'playlist_index': 0,
1450             }
1451             ie_copy.update(dict(ie_result))
1452
1453             if self.params.get('writeinfojson', False):
1454                 infofn = self.prepare_filename(ie_copy, 'pl_infojson')
1455                 if not self._ensure_dir_exists(encodeFilename(infofn)):
1456                     return
1457                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
1458                     self.to_screen('[info] Playlist metadata is already present')
1459                 else:
1460                     self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
1461                     try:
1462                         write_json_file(self.filter_requested_info(ie_result, self.params.get('clean_infojson', True)), infofn)
1463                     except (OSError, IOError):
1464                         self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
1465
1466             # TODO: This should be passed to ThumbnailsConvertor if necessary
1467             self._write_thumbnails(ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1468
1469             if self.params.get('writedescription', False):
1470                 descfn = self.prepare_filename(ie_copy, 'pl_description')
1471                 if not self._ensure_dir_exists(encodeFilename(descfn)):
1472                     return
1473                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
1474                     self.to_screen('[info] Playlist description is already present')
1475                 elif ie_result.get('description') is None:
1476                     self.report_warning('There\'s no playlist description to write.')
1477                 else:
1478                     try:
1479                         self.to_screen('[info] Writing playlist description to: ' + descfn)
1480                         with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1481                             descfile.write(ie_result['description'])
1482                     except (OSError, IOError):
1483                         self.report_error('Cannot write playlist description file ' + descfn)
1484                         return
1485
1486         if self.params.get('playlistreverse', False):
1487             entries = entries[::-1]
1488         if self.params.get('playlistrandom', False):
1489             random.shuffle(entries)
1490
1491         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1492
1493         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1494         failures = 0
1495         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1496         for i, entry_tuple in enumerate(entries, 1):
1497             playlist_index, entry = entry_tuple
1498             if 'playlist_index' in self.params.get('compat_options', []):
1499                 playlist_index = playlistitems[i - 1] if playlistitems else i
1500             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1501             # This __x_forwarded_for_ip thing is a bit ugly but requires
1502             # minimal changes
1503             if x_forwarded_for:
1504                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1505             extra = {
1506                 'n_entries': n_entries,
1507                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1508                 'playlist_index': playlist_index,
1509                 'playlist_autonumber': i,
1510                 'playlist': playlist,
1511                 'playlist_id': ie_result.get('id'),
1512                 'playlist_title': ie_result.get('title'),
1513                 'playlist_uploader': ie_result.get('uploader'),
1514                 'playlist_uploader_id': ie_result.get('uploader_id'),
1515                 'extractor': ie_result['extractor'],
1516                 'webpage_url': ie_result['webpage_url'],
1517                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1518                 'extractor_key': ie_result['extractor_key'],
1519             }
1520
1521             if self._match_entry(entry, incomplete=True) is not None:
1522                 continue
1523
1524             entry_result = self.__process_iterable_entry(entry, download, extra)
1525             if not entry_result:
1526                 failures += 1
1527             if failures >= max_failures:
1528                 self.report_error(
1529                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1530                 break
1531             # TODO: skip failed (empty) entries?
1532             playlist_results.append(entry_result)
1533         ie_result['entries'] = playlist_results
1534         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1535         return ie_result
1536
1537     @__handle_extraction_exceptions
1538     def __process_iterable_entry(self, entry, download, extra_info):
1539         return self.process_ie_result(
1540             entry, download=download, extra_info=extra_info)
1541
1542     def _build_format_filter(self, filter_spec):
1543         " Returns a function to filter the formats according to the filter_spec "
1544
1545         OPERATORS = {
1546             '<': operator.lt,
1547             '<=': operator.le,
1548             '>': operator.gt,
1549             '>=': operator.ge,
1550             '=': operator.eq,
1551             '!=': operator.ne,
1552         }
1553         operator_rex = re.compile(r'''(?x)\s*
1554             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1555             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1556             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1557             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1558         m = operator_rex.fullmatch(filter_spec)
1559         if m:
1560             try:
1561                 comparison_value = int(m.group('value'))
1562             except ValueError:
1563                 comparison_value = parse_filesize(m.group('value'))
1564                 if comparison_value is None:
1565                     comparison_value = parse_filesize(m.group('value') + 'B')
1566                 if comparison_value is None:
1567                     raise ValueError(
1568                         'Invalid value %r in format specification %r' % (
1569                             m.group('value'), filter_spec))
1570             op = OPERATORS[m.group('op')]
1571
1572         if not m:
1573             STR_OPERATORS = {
1574                 '=': operator.eq,
1575                 '^=': lambda attr, value: attr.startswith(value),
1576                 '$=': lambda attr, value: attr.endswith(value),
1577                 '*=': lambda attr, value: value in attr,
1578             }
1579             str_operator_rex = re.compile(r'''(?x)\s*
1580                 (?P<key>[a-zA-Z0-9._-]+)\s*
1581                 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1582                 (?P<value>[a-zA-Z0-9._-]+)\s*
1583                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1584             m = str_operator_rex.fullmatch(filter_spec)
1585             if m:
1586                 comparison_value = m.group('value')
1587                 str_op = STR_OPERATORS[m.group('op')]
1588                 if m.group('negation'):
1589                     op = lambda attr, value: not str_op(attr, value)
1590                 else:
1591                     op = str_op
1592
1593         if not m:
1594             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1595
1596         def _filter(f):
1597             actual_value = f.get(m.group('key'))
1598             if actual_value is None:
1599                 return m.group('none_inclusive')
1600             return op(actual_value, comparison_value)
1601         return _filter
1602
1603     def _default_format_spec(self, info_dict, download=True):
1604
1605         def can_merge():
1606             merger = FFmpegMergerPP(self)
1607             return merger.available and merger.can_merge()
1608
1609         prefer_best = (
1610             not self.params.get('simulate', False)
1611             and download
1612             and (
1613                 not can_merge()
1614                 or info_dict.get('is_live', False)
1615                 or self.outtmpl_dict['default'] == '-'))
1616         compat = (
1617             prefer_best
1618             or self.params.get('allow_multiple_audio_streams', False)
1619             or 'format-spec' in self.params.get('compat_opts', []))
1620
1621         return (
1622             'best/bestvideo+bestaudio' if prefer_best
1623             else 'bestvideo*+bestaudio/best' if not compat
1624             else 'bestvideo+bestaudio/best')
1625
1626     def build_format_selector(self, format_spec):
1627         def syntax_error(note, start):
1628             message = (
1629                 'Invalid format specification: '
1630                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1631             return SyntaxError(message)
1632
1633         PICKFIRST = 'PICKFIRST'
1634         MERGE = 'MERGE'
1635         SINGLE = 'SINGLE'
1636         GROUP = 'GROUP'
1637         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1638
1639         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1640                                   'video': self.params.get('allow_multiple_video_streams', False)}
1641
1642         check_formats = self.params.get('check_formats')
1643
1644         def _parse_filter(tokens):
1645             filter_parts = []
1646             for type, string, start, _, _ in tokens:
1647                 if type == tokenize.OP and string == ']':
1648                     return ''.join(filter_parts)
1649                 else:
1650                     filter_parts.append(string)
1651
1652         def _remove_unused_ops(tokens):
1653             # Remove operators that we don't use and join them with the surrounding strings
1654             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1655             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1656             last_string, last_start, last_end, last_line = None, None, None, None
1657             for type, string, start, end, line in tokens:
1658                 if type == tokenize.OP and string == '[':
1659                     if last_string:
1660                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1661                         last_string = None
1662                     yield type, string, start, end, line
1663                     # everything inside brackets will be handled by _parse_filter
1664                     for type, string, start, end, line in tokens:
1665                         yield type, string, start, end, line
1666                         if type == tokenize.OP and string == ']':
1667                             break
1668                 elif type == tokenize.OP and string in ALLOWED_OPS:
1669                     if last_string:
1670                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1671                         last_string = None
1672                     yield type, string, start, end, line
1673                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1674                     if not last_string:
1675                         last_string = string
1676                         last_start = start
1677                         last_end = end
1678                     else:
1679                         last_string += string
1680             if last_string:
1681                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1682
1683         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1684             selectors = []
1685             current_selector = None
1686             for type, string, start, _, _ in tokens:
1687                 # ENCODING is only defined in python 3.x
1688                 if type == getattr(tokenize, 'ENCODING', None):
1689                     continue
1690                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1691                     current_selector = FormatSelector(SINGLE, string, [])
1692                 elif type == tokenize.OP:
1693                     if string == ')':
1694                         if not inside_group:
1695                             # ')' will be handled by the parentheses group
1696                             tokens.restore_last_token()
1697                         break
1698                     elif inside_merge and string in ['/', ',']:
1699                         tokens.restore_last_token()
1700                         break
1701                     elif inside_choice and string == ',':
1702                         tokens.restore_last_token()
1703                         break
1704                     elif string == ',':
1705                         if not current_selector:
1706                             raise syntax_error('"," must follow a format selector', start)
1707                         selectors.append(current_selector)
1708                         current_selector = None
1709                     elif string == '/':
1710                         if not current_selector:
1711                             raise syntax_error('"/" must follow a format selector', start)
1712                         first_choice = current_selector
1713                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1714                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1715                     elif string == '[':
1716                         if not current_selector:
1717                             current_selector = FormatSelector(SINGLE, 'best', [])
1718                         format_filter = _parse_filter(tokens)
1719                         current_selector.filters.append(format_filter)
1720                     elif string == '(':
1721                         if current_selector:
1722                             raise syntax_error('Unexpected "("', start)
1723                         group = _parse_format_selection(tokens, inside_group=True)
1724                         current_selector = FormatSelector(GROUP, group, [])
1725                     elif string == '+':
1726                         if not current_selector:
1727                             raise syntax_error('Unexpected "+"', start)
1728                         selector_1 = current_selector
1729                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1730                         if not selector_2:
1731                             raise syntax_error('Expected a selector', start)
1732                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1733                     else:
1734                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1735                 elif type == tokenize.ENDMARKER:
1736                     break
1737             if current_selector:
1738                 selectors.append(current_selector)
1739             return selectors
1740
1741         def _merge(formats_pair):
1742             format_1, format_2 = formats_pair
1743
1744             formats_info = []
1745             formats_info.extend(format_1.get('requested_formats', (format_1,)))
1746             formats_info.extend(format_2.get('requested_formats', (format_2,)))
1747
1748             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1749                 get_no_more = {'video': False, 'audio': False}
1750                 for (i, fmt_info) in enumerate(formats_info):
1751                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
1752                         formats_info.pop(i)
1753                         continue
1754                     for aud_vid in ['audio', 'video']:
1755                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1756                             if get_no_more[aud_vid]:
1757                                 formats_info.pop(i)
1758                             get_no_more[aud_vid] = True
1759
1760             if len(formats_info) == 1:
1761                 return formats_info[0]
1762
1763             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1764             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1765
1766             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1767             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1768
1769             output_ext = self.params.get('merge_output_format')
1770             if not output_ext:
1771                 if the_only_video:
1772                     output_ext = the_only_video['ext']
1773                 elif the_only_audio and not video_fmts:
1774                     output_ext = the_only_audio['ext']
1775                 else:
1776                     output_ext = 'mkv'
1777
1778             new_dict = {
1779                 'requested_formats': formats_info,
1780                 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1781                 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1782                 'ext': output_ext,
1783             }
1784
1785             if the_only_video:
1786                 new_dict.update({
1787                     'width': the_only_video.get('width'),
1788                     'height': the_only_video.get('height'),
1789                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1790                     'fps': the_only_video.get('fps'),
1791                     'vcodec': the_only_video.get('vcodec'),
1792                     'vbr': the_only_video.get('vbr'),
1793                     'stretched_ratio': the_only_video.get('stretched_ratio'),
1794                 })
1795
1796             if the_only_audio:
1797                 new_dict.update({
1798                     'acodec': the_only_audio.get('acodec'),
1799                     'abr': the_only_audio.get('abr'),
1800                 })
1801
1802             return new_dict
1803
1804         def _check_formats(formats):
1805             if not check_formats:
1806                 yield from formats
1807                 return
1808             for f in formats:
1809                 self.to_screen('[info] Testing format %s' % f['format_id'])
1810                 temp_file = tempfile.NamedTemporaryFile(
1811                     suffix='.tmp', delete=False,
1812                     dir=self.get_output_path('temp') or None)
1813                 temp_file.close()
1814                 try:
1815                     success, _ = self.dl(temp_file.name, f, test=True)
1816                 except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1817                     success = False
1818                 finally:
1819                     if os.path.exists(temp_file.name):
1820                         try:
1821                             os.remove(temp_file.name)
1822                         except OSError:
1823                             self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1824                 if success:
1825                     yield f
1826                 else:
1827                     self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1828
1829         def _build_selector_function(selector):
1830             if isinstance(selector, list):  # ,
1831                 fs = [_build_selector_function(s) for s in selector]
1832
1833                 def selector_function(ctx):
1834                     for f in fs:
1835                         yield from f(ctx)
1836                 return selector_function
1837
1838             elif selector.type == GROUP:  # ()
1839                 selector_function = _build_selector_function(selector.selector)
1840
1841             elif selector.type == PICKFIRST:  # /
1842                 fs = [_build_selector_function(s) for s in selector.selector]
1843
1844                 def selector_function(ctx):
1845                     for f in fs:
1846                         picked_formats = list(f(ctx))
1847                         if picked_formats:
1848                             return picked_formats
1849                     return []
1850
1851             elif selector.type == MERGE:  # +
1852                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1853
1854                 def selector_function(ctx):
1855                     for pair in itertools.product(
1856                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1857                         yield _merge(pair)
1858
1859             elif selector.type == SINGLE:  # atom
1860                 format_spec = selector.selector or 'best'
1861
1862                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
1863                 if format_spec == 'all':
1864                     def selector_function(ctx):
1865                         yield from _check_formats(ctx['formats'])
1866                 elif format_spec == 'mergeall':
1867                     def selector_function(ctx):
1868                         formats = list(_check_formats(ctx['formats']))
1869                         if not formats:
1870                             return
1871                         merged_format = formats[-1]
1872                         for f in formats[-2::-1]:
1873                             merged_format = _merge((merged_format, f))
1874                         yield merged_format
1875
1876                 else:
1877                     format_fallback, format_reverse, format_idx = False, True, 1
1878                     mobj = re.match(
1879                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
1880                         format_spec)
1881                     if mobj is not None:
1882                         format_idx = int_or_none(mobj.group('n'), default=1)
1883                         format_reverse = mobj.group('bw')[0] == 'b'
1884                         format_type = (mobj.group('type') or [None])[0]
1885                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
1886                         format_modified = mobj.group('mod') is not None
1887
1888                         format_fallback = not format_type and not format_modified  # for b, w
1889                         _filter_f = (
1890                             (lambda f: f.get('%scodec' % format_type) != 'none')
1891                             if format_type and format_modified  # bv*, ba*, wv*, wa*
1892                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
1893                             if format_type  # bv, ba, wv, wa
1894                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1895                             if not format_modified  # b, w
1896                             else lambda f: True)  # b*, w*
1897                         filter_f = lambda f: _filter_f(f) and (
1898                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
1899                     else:
1900                         filter_f = ((lambda f: f.get('ext') == format_spec)
1901                                     if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']  # extension
1902                                     else (lambda f: f.get('format_id') == format_spec))  # id
1903
1904                     def selector_function(ctx):
1905                         formats = list(ctx['formats'])
1906                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
1907                         if format_fallback and ctx['incomplete_formats'] and not matches:
1908                             # for extractors with incomplete formats (audio only (soundcloud)
1909                             # or video only (imgur)) best/worst will fallback to
1910                             # best/worst {video,audio}-only format
1911                             matches = formats
1912                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
1913                         try:
1914                             yield matches[format_idx - 1]
1915                         except IndexError:
1916                             return
1917
1918             filters = [self._build_format_filter(f) for f in selector.filters]
1919
1920             def final_selector(ctx):
1921                 ctx_copy = copy.deepcopy(ctx)
1922                 for _filter in filters:
1923                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1924                 return selector_function(ctx_copy)
1925             return final_selector
1926
1927         stream = io.BytesIO(format_spec.encode('utf-8'))
1928         try:
1929             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1930         except tokenize.TokenError:
1931             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1932
1933         class TokenIterator(object):
1934             def __init__(self, tokens):
1935                 self.tokens = tokens
1936                 self.counter = 0
1937
1938             def __iter__(self):
1939                 return self
1940
1941             def __next__(self):
1942                 if self.counter >= len(self.tokens):
1943                     raise StopIteration()
1944                 value = self.tokens[self.counter]
1945                 self.counter += 1
1946                 return value
1947
1948             next = __next__
1949
1950             def restore_last_token(self):
1951                 self.counter -= 1
1952
1953         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1954         return _build_selector_function(parsed_selector)
1955
1956     def _calc_headers(self, info_dict):
1957         res = std_headers.copy()
1958
1959         add_headers = info_dict.get('http_headers')
1960         if add_headers:
1961             res.update(add_headers)
1962
1963         cookies = self._calc_cookies(info_dict)
1964         if cookies:
1965             res['Cookie'] = cookies
1966
1967         if 'X-Forwarded-For' not in res:
1968             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1969             if x_forwarded_for_ip:
1970                 res['X-Forwarded-For'] = x_forwarded_for_ip
1971
1972         return res
1973
1974     def _calc_cookies(self, info_dict):
1975         pr = sanitized_Request(info_dict['url'])
1976         self.cookiejar.add_cookie_header(pr)
1977         return pr.get_header('Cookie')
1978
1979     def _sanitize_thumbnails(self, info_dict):
1980         thumbnails = info_dict.get('thumbnails')
1981         if thumbnails is None:
1982             thumbnail = info_dict.get('thumbnail')
1983             if thumbnail:
1984                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1985         if thumbnails:
1986             thumbnails.sort(key=lambda t: (
1987                 t.get('preference') if t.get('preference') is not None else -1,
1988                 t.get('width') if t.get('width') is not None else -1,
1989                 t.get('height') if t.get('height') is not None else -1,
1990                 t.get('id') if t.get('id') is not None else '',
1991                 t.get('url')))
1992
1993             def thumbnail_tester():
1994                 if self.params.get('check_formats'):
1995                     test_all = True
1996                     to_screen = lambda msg: self.to_screen(f'[info] {msg}')
1997                 else:
1998                     test_all = False
1999                     to_screen = self.write_debug
2000
2001                 def test_thumbnail(t):
2002                     if not test_all and not t.get('_test_url'):
2003                         return True
2004                     to_screen('Testing thumbnail %s' % t['id'])
2005                     try:
2006                         self.urlopen(HEADRequest(t['url']))
2007                     except network_exceptions as err:
2008                         to_screen('Unable to connect to thumbnail %s URL "%s" - %s. Skipping...' % (
2009                             t['id'], t['url'], error_to_compat_str(err)))
2010                         return False
2011                     return True
2012
2013                 return test_thumbnail
2014
2015             for i, t in enumerate(thumbnails):
2016                 if t.get('id') is None:
2017                     t['id'] = '%d' % i
2018                 if t.get('width') and t.get('height'):
2019                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
2020                 t['url'] = sanitize_url(t['url'])
2021
2022             if self.params.get('check_formats') is not False:
2023                 info_dict['thumbnails'] = LazyList(filter(thumbnail_tester(), thumbnails[::-1])).reverse()
2024             else:
2025                 info_dict['thumbnails'] = thumbnails
2026
2027     def process_video_result(self, info_dict, download=True):
2028         assert info_dict.get('_type', 'video') == 'video'
2029
2030         if 'id' not in info_dict:
2031             raise ExtractorError('Missing "id" field in extractor result')
2032         if 'title' not in info_dict:
2033             raise ExtractorError('Missing "title" field in extractor result')
2034
2035         def report_force_conversion(field, field_not, conversion):
2036             self.report_warning(
2037                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2038                 % (field, field_not, conversion))
2039
2040         def sanitize_string_field(info, string_field):
2041             field = info.get(string_field)
2042             if field is None or isinstance(field, compat_str):
2043                 return
2044             report_force_conversion(string_field, 'a string', 'string')
2045             info[string_field] = compat_str(field)
2046
2047         def sanitize_numeric_fields(info):
2048             for numeric_field in self._NUMERIC_FIELDS:
2049                 field = info.get(numeric_field)
2050                 if field is None or isinstance(field, compat_numeric_types):
2051                     continue
2052                 report_force_conversion(numeric_field, 'numeric', 'int')
2053                 info[numeric_field] = int_or_none(field)
2054
2055         sanitize_string_field(info_dict, 'id')
2056         sanitize_numeric_fields(info_dict)
2057
2058         if 'playlist' not in info_dict:
2059             # It isn't part of a playlist
2060             info_dict['playlist'] = None
2061             info_dict['playlist_index'] = None
2062
2063         self._sanitize_thumbnails(info_dict)
2064
2065         thumbnail = info_dict.get('thumbnail')
2066         thumbnails = info_dict.get('thumbnails')
2067         if thumbnail:
2068             info_dict['thumbnail'] = sanitize_url(thumbnail)
2069         elif thumbnails:
2070             info_dict['thumbnail'] = thumbnails[-1]['url']
2071
2072         if info_dict.get('display_id') is None and 'id' in info_dict:
2073             info_dict['display_id'] = info_dict['id']
2074
2075         for ts_key, date_key in (
2076                 ('timestamp', 'upload_date'),
2077                 ('release_timestamp', 'release_date'),
2078         ):
2079             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2080                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2081                 # see http://bugs.python.org/issue1646728)
2082                 try:
2083                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2084                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2085                 except (ValueError, OverflowError, OSError):
2086                     pass
2087
2088         live_keys = ('is_live', 'was_live')
2089         live_status = info_dict.get('live_status')
2090         if live_status is None:
2091             for key in live_keys:
2092                 if info_dict.get(key) is False:
2093                     continue
2094                 if info_dict.get(key):
2095                     live_status = key
2096                 break
2097             if all(info_dict.get(key) is False for key in live_keys):
2098                 live_status = 'not_live'
2099         if live_status:
2100             info_dict['live_status'] = live_status
2101             for key in live_keys:
2102                 if info_dict.get(key) is None:
2103                     info_dict[key] = (live_status == key)
2104
2105         # Auto generate title fields corresponding to the *_number fields when missing
2106         # in order to always have clean titles. This is very common for TV series.
2107         for field in ('chapter', 'season', 'episode'):
2108             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2109                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2110
2111         for cc_kind in ('subtitles', 'automatic_captions'):
2112             cc = info_dict.get(cc_kind)
2113             if cc:
2114                 for _, subtitle in cc.items():
2115                     for subtitle_format in subtitle:
2116                         if subtitle_format.get('url'):
2117                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2118                         if subtitle_format.get('ext') is None:
2119                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2120
2121         automatic_captions = info_dict.get('automatic_captions')
2122         subtitles = info_dict.get('subtitles')
2123
2124         info_dict['requested_subtitles'] = self.process_subtitles(
2125             info_dict['id'], subtitles, automatic_captions)
2126
2127         # We now pick which formats have to be downloaded
2128         if info_dict.get('formats') is None:
2129             # There's only one format available
2130             formats = [info_dict]
2131         else:
2132             formats = info_dict['formats']
2133
2134         if not formats:
2135             if not self.params.get('ignore_no_formats_error'):
2136                 raise ExtractorError('No video formats found!')
2137             else:
2138                 self.report_warning('No video formats found!')
2139
2140         def is_wellformed(f):
2141             url = f.get('url')
2142             if not url:
2143                 self.report_warning(
2144                     '"url" field is missing or empty - skipping format, '
2145                     'there is an error in extractor')
2146                 return False
2147             if isinstance(url, bytes):
2148                 sanitize_string_field(f, 'url')
2149             return True
2150
2151         # Filter out malformed formats for better extraction robustness
2152         formats = list(filter(is_wellformed, formats))
2153
2154         formats_dict = {}
2155
2156         # We check that all the formats have the format and format_id fields
2157         for i, format in enumerate(formats):
2158             sanitize_string_field(format, 'format_id')
2159             sanitize_numeric_fields(format)
2160             format['url'] = sanitize_url(format['url'])
2161             if not format.get('format_id'):
2162                 format['format_id'] = compat_str(i)
2163             else:
2164                 # Sanitize format_id from characters used in format selector expression
2165                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2166             format_id = format['format_id']
2167             if format_id not in formats_dict:
2168                 formats_dict[format_id] = []
2169             formats_dict[format_id].append(format)
2170
2171         # Make sure all formats have unique format_id
2172         for format_id, ambiguous_formats in formats_dict.items():
2173             if len(ambiguous_formats) > 1:
2174                 for i, format in enumerate(ambiguous_formats):
2175                     format['format_id'] = '%s-%d' % (format_id, i)
2176
2177         for i, format in enumerate(formats):
2178             if format.get('format') is None:
2179                 format['format'] = '{id} - {res}{note}'.format(
2180                     id=format['format_id'],
2181                     res=self.format_resolution(format),
2182                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
2183                 )
2184             # Automatically determine file extension if missing
2185             if format.get('ext') is None:
2186                 format['ext'] = determine_ext(format['url']).lower()
2187             # Automatically determine protocol if missing (useful for format
2188             # selection purposes)
2189             if format.get('protocol') is None:
2190                 format['protocol'] = determine_protocol(format)
2191             # Add HTTP headers, so that external programs can use them from the
2192             # json output
2193             full_format_info = info_dict.copy()
2194             full_format_info.update(format)
2195             format['http_headers'] = self._calc_headers(full_format_info)
2196         # Remove private housekeeping stuff
2197         if '__x_forwarded_for_ip' in info_dict:
2198             del info_dict['__x_forwarded_for_ip']
2199
2200         # TODO Central sorting goes here
2201
2202         if formats and formats[0] is not info_dict:
2203             # only set the 'formats' fields if the original info_dict list them
2204             # otherwise we end up with a circular reference, the first (and unique)
2205             # element in the 'formats' field in info_dict is info_dict itself,
2206             # which can't be exported to json
2207             info_dict['formats'] = formats
2208
2209         info_dict, _ = self.pre_process(info_dict)
2210
2211         list_only = self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles')
2212         if list_only:
2213             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2214             if self.params.get('list_thumbnails'):
2215                 self.list_thumbnails(info_dict)
2216             if self.params.get('listformats'):
2217                 if not info_dict.get('formats'):
2218                     raise ExtractorError('No video formats found', expected=True)
2219                 self.list_formats(info_dict)
2220             if self.params.get('listsubtitles'):
2221                 if 'automatic_captions' in info_dict:
2222                     self.list_subtitles(
2223                         info_dict['id'], automatic_captions, 'automatic captions')
2224                 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2225             return
2226
2227         format_selector = self.format_selector
2228         if format_selector is None:
2229             req_format = self._default_format_spec(info_dict, download=download)
2230             self.write_debug('Default format spec: %s' % req_format)
2231             format_selector = self.build_format_selector(req_format)
2232
2233         # While in format selection we may need to have an access to the original
2234         # format set in order to calculate some metrics or do some processing.
2235         # For now we need to be able to guess whether original formats provided
2236         # by extractor are incomplete or not (i.e. whether extractor provides only
2237         # video-only or audio-only formats) for proper formats selection for
2238         # extractors with such incomplete formats (see
2239         # https://github.com/ytdl-org/youtube-dl/pull/5556).
2240         # Since formats may be filtered during format selection and may not match
2241         # the original formats the results may be incorrect. Thus original formats
2242         # or pre-calculated metrics should be passed to format selection routines
2243         # as well.
2244         # We will pass a context object containing all necessary additional data
2245         # instead of just formats.
2246         # This fixes incorrect format selection issue (see
2247         # https://github.com/ytdl-org/youtube-dl/issues/10083).
2248         incomplete_formats = (
2249             # All formats are video-only or
2250             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2251             # all formats are audio-only
2252             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2253
2254         ctx = {
2255             'formats': formats,
2256             'incomplete_formats': incomplete_formats,
2257         }
2258
2259         formats_to_download = list(format_selector(ctx))
2260         if not formats_to_download:
2261             if not self.params.get('ignore_no_formats_error'):
2262                 raise ExtractorError('Requested format is not available', expected=True)
2263             else:
2264                 self.report_warning('Requested format is not available')
2265                 # Process what we can, even without any available formats.
2266                 self.process_info(dict(info_dict))
2267         elif download:
2268             self.to_screen(
2269                 '[info] %s: Downloading %d format(s): %s' % (
2270                     info_dict['id'], len(formats_to_download),
2271                     ", ".join([f['format_id'] for f in formats_to_download])))
2272             for fmt in formats_to_download:
2273                 new_info = dict(info_dict)
2274                 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2275                 new_info['__original_infodict'] = info_dict
2276                 new_info.update(fmt)
2277                 self.process_info(new_info)
2278         # We update the info dict with the best quality format (backwards compatibility)
2279         if formats_to_download:
2280             info_dict.update(formats_to_download[-1])
2281         return info_dict
2282
2283     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2284         """Select the requested subtitles and their format"""
2285         available_subs = {}
2286         if normal_subtitles and self.params.get('writesubtitles'):
2287             available_subs.update(normal_subtitles)
2288         if automatic_captions and self.params.get('writeautomaticsub'):
2289             for lang, cap_info in automatic_captions.items():
2290                 if lang not in available_subs:
2291                     available_subs[lang] = cap_info
2292
2293         if (not self.params.get('writesubtitles') and not
2294                 self.params.get('writeautomaticsub') or not
2295                 available_subs):
2296             return None
2297
2298         all_sub_langs = available_subs.keys()
2299         if self.params.get('allsubtitles', False):
2300             requested_langs = all_sub_langs
2301         elif self.params.get('subtitleslangs', False):
2302             requested_langs = set()
2303             for lang in self.params.get('subtitleslangs'):
2304                 if lang == 'all':
2305                     requested_langs.update(all_sub_langs)
2306                     continue
2307                 discard = lang[0] == '-'
2308                 if discard:
2309                     lang = lang[1:]
2310                 current_langs = filter(re.compile(lang + '$').match, all_sub_langs)
2311                 if discard:
2312                     for lang in current_langs:
2313                         requested_langs.discard(lang)
2314                 else:
2315                     requested_langs.update(current_langs)
2316         elif 'en' in available_subs:
2317             requested_langs = ['en']
2318         else:
2319             requested_langs = [list(all_sub_langs)[0]]
2320         self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2321
2322         formats_query = self.params.get('subtitlesformat', 'best')
2323         formats_preference = formats_query.split('/') if formats_query else []
2324         subs = {}
2325         for lang in requested_langs:
2326             formats = available_subs.get(lang)
2327             if formats is None:
2328                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2329                 continue
2330             for ext in formats_preference:
2331                 if ext == 'best':
2332                     f = formats[-1]
2333                     break
2334                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2335                 if matches:
2336                     f = matches[-1]
2337                     break
2338             else:
2339                 f = formats[-1]
2340                 self.report_warning(
2341                     'No subtitle format found matching "%s" for language %s, '
2342                     'using %s' % (formats_query, lang, f['ext']))
2343             subs[lang] = f
2344         return subs
2345
2346     def __forced_printings(self, info_dict, filename, incomplete):
2347         def print_mandatory(field, actual_field=None):
2348             if actual_field is None:
2349                 actual_field = field
2350             if (self.params.get('force%s' % field, False)
2351                     and (not incomplete or info_dict.get(actual_field) is not None)):
2352                 self.to_stdout(info_dict[actual_field])
2353
2354         def print_optional(field):
2355             if (self.params.get('force%s' % field, False)
2356                     and info_dict.get(field) is not None):
2357                 self.to_stdout(info_dict[field])
2358
2359         info_dict = info_dict.copy()
2360         if filename is not None:
2361             info_dict['filename'] = filename
2362         if info_dict.get('requested_formats') is not None:
2363             # For RTMP URLs, also include the playpath
2364             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2365         elif 'url' in info_dict:
2366             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2367
2368         for tmpl in self.params.get('forceprint', []):
2369             if re.match(r'\w+$', tmpl):
2370                 tmpl = '%({})s'.format(tmpl)
2371             tmpl, info_copy = self.prepare_outtmpl(tmpl, info_dict)
2372             self.to_stdout(self.escape_outtmpl(tmpl) % info_copy)
2373
2374         print_mandatory('title')
2375         print_mandatory('id')
2376         print_mandatory('url', 'urls')
2377         print_optional('thumbnail')
2378         print_optional('description')
2379         print_optional('filename')
2380         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
2381             self.to_stdout(formatSeconds(info_dict['duration']))
2382         print_mandatory('format')
2383
2384         if self.params.get('forcejson', False):
2385             self.post_extract(info_dict)
2386             self.to_stdout(json.dumps(info_dict, default=repr))
2387
2388     def dl(self, name, info, subtitle=False, test=False):
2389
2390         if test:
2391             verbose = self.params.get('verbose')
2392             params = {
2393                 'test': True,
2394                 'quiet': not verbose,
2395                 'verbose': verbose,
2396                 'noprogress': not verbose,
2397                 'nopart': True,
2398                 'skip_unavailable_fragments': False,
2399                 'keep_fragments': False,
2400                 'overwrites': True,
2401                 '_no_ytdl_file': True,
2402             }
2403         else:
2404             params = self.params
2405         fd = get_suitable_downloader(info, params)(self, params)
2406         if not test:
2407             for ph in self._progress_hooks:
2408                 fd.add_progress_hook(ph)
2409             urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2410             self.write_debug('Invoking downloader on "%s"' % urls)
2411         new_info = dict(info)
2412         if new_info.get('http_headers') is None:
2413             new_info['http_headers'] = self._calc_headers(new_info)
2414         return fd.download(name, new_info, subtitle)
2415
2416     def process_info(self, info_dict):
2417         """Process a single resolved IE result."""
2418
2419         assert info_dict.get('_type', 'video') == 'video'
2420
2421         info_dict.setdefault('__postprocessors', [])
2422
2423         max_downloads = self.params.get('max_downloads')
2424         if max_downloads is not None:
2425             if self._num_downloads >= int(max_downloads):
2426                 raise MaxDownloadsReached()
2427
2428         # TODO: backward compatibility, to be removed
2429         info_dict['fulltitle'] = info_dict['title']
2430
2431         if 'format' not in info_dict and 'ext' in info_dict:
2432             info_dict['format'] = info_dict['ext']
2433
2434         if self._match_entry(info_dict) is not None:
2435             return
2436
2437         self.post_extract(info_dict)
2438         self._num_downloads += 1
2439
2440         # info_dict['_filename'] needs to be set for backward compatibility
2441         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2442         temp_filename = self.prepare_filename(info_dict, 'temp')
2443         files_to_move = {}
2444
2445         # Forced printings
2446         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2447
2448         if self.params.get('simulate', False):
2449             if self.params.get('force_write_download_archive', False):
2450                 self.record_download_archive(info_dict)
2451
2452             # Do nothing else if in simulate mode
2453             return
2454
2455         if full_filename is None:
2456             return
2457
2458         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2459             return
2460         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2461             return
2462
2463         if self.params.get('writedescription', False):
2464             descfn = self.prepare_filename(info_dict, 'description')
2465             if not self._ensure_dir_exists(encodeFilename(descfn)):
2466                 return
2467             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
2468                 self.to_screen('[info] Video description is already present')
2469             elif info_dict.get('description') is None:
2470                 self.report_warning('There\'s no description to write.')
2471             else:
2472                 try:
2473                     self.to_screen('[info] Writing video description to: ' + descfn)
2474                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
2475                         descfile.write(info_dict['description'])
2476                 except (OSError, IOError):
2477                     self.report_error('Cannot write description file ' + descfn)
2478                     return
2479
2480         if self.params.get('writeannotations', False):
2481             annofn = self.prepare_filename(info_dict, 'annotation')
2482             if not self._ensure_dir_exists(encodeFilename(annofn)):
2483                 return
2484             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2485                 self.to_screen('[info] Video annotations are already present')
2486             elif not info_dict.get('annotations'):
2487                 self.report_warning('There are no annotations to write.')
2488             else:
2489                 try:
2490                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2491                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2492                         annofile.write(info_dict['annotations'])
2493                 except (KeyError, TypeError):
2494                     self.report_warning('There are no annotations to write.')
2495                 except (OSError, IOError):
2496                     self.report_error('Cannot write annotations file: ' + annofn)
2497                     return
2498
2499         subtitles_are_requested = any([self.params.get('writesubtitles', False),
2500                                        self.params.get('writeautomaticsub')])
2501
2502         if subtitles_are_requested and info_dict.get('requested_subtitles'):
2503             # subtitles download errors are already managed as troubles in relevant IE
2504             # that way it will silently go on when used with unsupporting IE
2505             subtitles = info_dict['requested_subtitles']
2506             # ie = self.get_info_extractor(info_dict['extractor_key'])
2507             for sub_lang, sub_info in subtitles.items():
2508                 sub_format = sub_info['ext']
2509                 sub_filename = subtitles_filename(temp_filename, sub_lang, sub_format, info_dict.get('ext'))
2510                 sub_filename_final = subtitles_filename(
2511                     self.prepare_filename(info_dict, 'subtitle'), sub_lang, sub_format, info_dict.get('ext'))
2512                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
2513                     self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
2514                     sub_info['filepath'] = sub_filename
2515                     files_to_move[sub_filename] = sub_filename_final
2516                 else:
2517                     self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
2518                     if sub_info.get('data') is not None:
2519                         try:
2520                             # Use newline='' to prevent conversion of newline characters
2521                             # See https://github.com/ytdl-org/youtube-dl/issues/10268
2522                             with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
2523                                 subfile.write(sub_info['data'])
2524                             sub_info['filepath'] = sub_filename
2525                             files_to_move[sub_filename] = sub_filename_final
2526                         except (OSError, IOError):
2527                             self.report_error('Cannot write subtitles file ' + sub_filename)
2528                             return
2529                     else:
2530                         try:
2531                             self.dl(sub_filename, sub_info.copy(), subtitle=True)
2532                             sub_info['filepath'] = sub_filename
2533                             files_to_move[sub_filename] = sub_filename_final
2534                         except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
2535                             self.report_warning('Unable to download subtitle for "%s": %s' %
2536                                                 (sub_lang, error_to_compat_str(err)))
2537                             continue
2538
2539         if self.params.get('writeinfojson', False):
2540             infofn = self.prepare_filename(info_dict, 'infojson')
2541             if not self._ensure_dir_exists(encodeFilename(infofn)):
2542                 return
2543             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
2544                 self.to_screen('[info] Video metadata is already present')
2545             else:
2546                 self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
2547                 try:
2548                     write_json_file(self.filter_requested_info(info_dict, self.params.get('clean_infojson', True)), infofn)
2549                 except (OSError, IOError):
2550                     self.report_error('Cannot write video metadata to JSON file ' + infofn)
2551                     return
2552             info_dict['__infojson_filename'] = infofn
2553
2554         for thumb_ext in self._write_thumbnails(info_dict, temp_filename):
2555             thumb_filename_temp = replace_extension(temp_filename, thumb_ext, info_dict.get('ext'))
2556             thumb_filename = replace_extension(
2557                 self.prepare_filename(info_dict, 'thumbnail'), thumb_ext, info_dict.get('ext'))
2558             files_to_move[thumb_filename_temp] = thumb_filename
2559
2560         # Write internet shortcut files
2561         url_link = webloc_link = desktop_link = False
2562         if self.params.get('writelink', False):
2563             if sys.platform == "darwin":  # macOS.
2564                 webloc_link = True
2565             elif sys.platform.startswith("linux"):
2566                 desktop_link = True
2567             else:  # if sys.platform in ['win32', 'cygwin']:
2568                 url_link = True
2569         if self.params.get('writeurllink', False):
2570             url_link = True
2571         if self.params.get('writewebloclink', False):
2572             webloc_link = True
2573         if self.params.get('writedesktoplink', False):
2574             desktop_link = True
2575
2576         if url_link or webloc_link or desktop_link:
2577             if 'webpage_url' not in info_dict:
2578                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2579                 return
2580             ascii_url = iri_to_uri(info_dict['webpage_url'])
2581
2582         def _write_link_file(extension, template, newline, embed_filename):
2583             linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
2584             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2585                 self.to_screen('[info] Internet shortcut is already present')
2586             else:
2587                 try:
2588                     self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2589                     with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2590                         template_vars = {'url': ascii_url}
2591                         if embed_filename:
2592                             template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2593                         linkfile.write(template % template_vars)
2594                 except (OSError, IOError):
2595                     self.report_error('Cannot write internet shortcut ' + linkfn)
2596                     return False
2597             return True
2598
2599         if url_link:
2600             if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2601                 return
2602         if webloc_link:
2603             if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2604                 return
2605         if desktop_link:
2606             if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2607                 return
2608
2609         try:
2610             info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2611         except PostProcessingError as err:
2612             self.report_error('Preprocessing: %s' % str(err))
2613             return
2614
2615         must_record_download_archive = False
2616         if self.params.get('skip_download', False):
2617             info_dict['filepath'] = temp_filename
2618             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2619             info_dict['__files_to_move'] = files_to_move
2620             info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2621         else:
2622             # Download
2623             try:
2624
2625                 def existing_file(*filepaths):
2626                     ext = info_dict.get('ext')
2627                     final_ext = self.params.get('final_ext', ext)
2628                     existing_files = []
2629                     for file in orderedSet(filepaths):
2630                         if final_ext != ext:
2631                             converted = replace_extension(file, final_ext, ext)
2632                             if os.path.exists(encodeFilename(converted)):
2633                                 existing_files.append(converted)
2634                         if os.path.exists(encodeFilename(file)):
2635                             existing_files.append(file)
2636
2637                     if not existing_files or self.params.get('overwrites', False):
2638                         for file in orderedSet(existing_files):
2639                             self.report_file_delete(file)
2640                             os.remove(encodeFilename(file))
2641                         return None
2642
2643                     self.report_file_already_downloaded(existing_files[0])
2644                     info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2645                     return existing_files[0]
2646
2647                 success = True
2648                 if info_dict.get('requested_formats') is not None:
2649
2650                     def compatible_formats(formats):
2651                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2652                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2653                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2654                         if len(video_formats) > 2 or len(audio_formats) > 2:
2655                             return False
2656
2657                         # Check extension
2658                         exts = set(format.get('ext') for format in formats)
2659                         COMPATIBLE_EXTS = (
2660                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2661                             set(('webm',)),
2662                         )
2663                         for ext_sets in COMPATIBLE_EXTS:
2664                             if ext_sets.issuperset(exts):
2665                                 return True
2666                         # TODO: Check acodec/vcodec
2667                         return False
2668
2669                     requested_formats = info_dict['requested_formats']
2670                     old_ext = info_dict['ext']
2671                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
2672                         info_dict['ext'] = 'mkv'
2673                         self.report_warning(
2674                             'Requested formats are incompatible for merge and will be merged into mkv.')
2675
2676                     def correct_ext(filename):
2677                         filename_real_ext = os.path.splitext(filename)[1][1:]
2678                         filename_wo_ext = (
2679                             os.path.splitext(filename)[0]
2680                             if filename_real_ext == old_ext
2681                             else filename)
2682                         return '%s.%s' % (filename_wo_ext, info_dict['ext'])
2683
2684                     # Ensure filename always has a correct extension for successful merge
2685                     full_filename = correct_ext(full_filename)
2686                     temp_filename = correct_ext(temp_filename)
2687                     dl_filename = existing_file(full_filename, temp_filename)
2688                     info_dict['__real_download'] = False
2689
2690                     _protocols = set(determine_protocol(f) for f in requested_formats)
2691                     if len(_protocols) == 1:
2692                         info_dict['protocol'] = _protocols.pop()
2693                     directly_mergable = (
2694                         'no-direct-merge' not in self.params.get('compat_opts', [])
2695                         and info_dict.get('protocol') is not None  # All requested formats have same protocol
2696                         and not self.params.get('allow_unplayable_formats')
2697                         and get_suitable_downloader(info_dict, self.params).__name__ == 'FFmpegFD')
2698                     if directly_mergable:
2699                         info_dict['url'] = requested_formats[0]['url']
2700                         # Treat it as a single download
2701                         dl_filename = existing_file(full_filename, temp_filename)
2702                         if dl_filename is None:
2703                             success, real_download = self.dl(temp_filename, info_dict)
2704                             info_dict['__real_download'] = real_download
2705                     else:
2706                         downloaded = []
2707                         merger = FFmpegMergerPP(self)
2708                         if self.params.get('allow_unplayable_formats'):
2709                             self.report_warning(
2710                                 'You have requested merging of multiple formats '
2711                                 'while also allowing unplayable formats to be downloaded. '
2712                                 'The formats won\'t be merged to prevent data corruption.')
2713                         elif not merger.available:
2714                             self.report_warning(
2715                                 'You have requested merging of multiple formats but ffmpeg is not installed. '
2716                                 'The formats won\'t be merged.')
2717
2718                         if dl_filename is None:
2719                             for f in requested_formats:
2720                                 new_info = dict(info_dict)
2721                                 del new_info['requested_formats']
2722                                 new_info.update(f)
2723                                 fname = prepend_extension(
2724                                     self.prepare_filename(new_info, 'temp'),
2725                                     'f%s' % f['format_id'], new_info['ext'])
2726                                 if not self._ensure_dir_exists(fname):
2727                                     return
2728                                 downloaded.append(fname)
2729                                 partial_success, real_download = self.dl(fname, new_info)
2730                                 info_dict['__real_download'] = info_dict['__real_download'] or real_download
2731                                 success = success and partial_success
2732                             if merger.available and not self.params.get('allow_unplayable_formats'):
2733                                 info_dict['__postprocessors'].append(merger)
2734                                 info_dict['__files_to_merge'] = downloaded
2735                                 # Even if there were no downloads, it is being merged only now
2736                                 info_dict['__real_download'] = True
2737                             else:
2738                                 for file in downloaded:
2739                                     files_to_move[file] = None
2740                 else:
2741                     # Just a single file
2742                     dl_filename = existing_file(full_filename, temp_filename)
2743                     if dl_filename is None:
2744                         success, real_download = self.dl(temp_filename, info_dict)
2745                         info_dict['__real_download'] = real_download
2746
2747                 dl_filename = dl_filename or temp_filename
2748                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2749
2750             except network_exceptions as err:
2751                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2752                 return
2753             except (OSError, IOError) as err:
2754                 raise UnavailableVideoError(err)
2755             except (ContentTooShortError, ) as err:
2756                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2757                 return
2758
2759             if success and full_filename != '-':
2760
2761                 def fixup():
2762                     do_fixup = True
2763                     fixup_policy = self.params.get('fixup')
2764                     vid = info_dict['id']
2765
2766                     if fixup_policy in ('ignore', 'never'):
2767                         return
2768                     elif fixup_policy == 'warn':
2769                         do_fixup = False
2770                     elif fixup_policy != 'force':
2771                         assert fixup_policy in ('detect_or_warn', None)
2772                         if not info_dict.get('__real_download'):
2773                             do_fixup = False
2774
2775                     def ffmpeg_fixup(cndn, msg, cls):
2776                         if not cndn:
2777                             return
2778                         if not do_fixup:
2779                             self.report_warning(f'{vid}: {msg}')
2780                             return
2781                         pp = cls(self)
2782                         if pp.available:
2783                             info_dict['__postprocessors'].append(pp)
2784                         else:
2785                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
2786
2787                     stretched_ratio = info_dict.get('stretched_ratio')
2788                     ffmpeg_fixup(
2789                         stretched_ratio not in (1, None),
2790                         f'Non-uniform pixel ratio {stretched_ratio}',
2791                         FFmpegFixupStretchedPP)
2792
2793                     ffmpeg_fixup(
2794                         (info_dict.get('requested_formats') is None
2795                          and info_dict.get('container') == 'm4a_dash'
2796                          and info_dict.get('ext') == 'm4a'),
2797                         'writing DASH m4a. Only some players support this container',
2798                         FFmpegFixupM4aPP)
2799
2800                     downloader = (get_suitable_downloader(info_dict, self.params).__name__
2801                                   if 'protocol' in info_dict else None)
2802                     ffmpeg_fixup(downloader == 'HlsFD', 'malformed AAC bitstream detected', FFmpegFixupM3u8PP)
2803                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP)
2804                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed duration detected', FFmpegFixupDurationPP)
2805
2806                 fixup()
2807                 try:
2808                     info_dict = self.post_process(dl_filename, info_dict, files_to_move)
2809                 except PostProcessingError as err:
2810                     self.report_error('Postprocessing: %s' % str(err))
2811                     return
2812                 try:
2813                     for ph in self._post_hooks:
2814                         ph(info_dict['filepath'])
2815                 except Exception as err:
2816                     self.report_error('post hooks: %s' % str(err))
2817                     return
2818                 must_record_download_archive = True
2819
2820         if must_record_download_archive or self.params.get('force_write_download_archive', False):
2821             self.record_download_archive(info_dict)
2822         max_downloads = self.params.get('max_downloads')
2823         if max_downloads is not None and self._num_downloads >= int(max_downloads):
2824             raise MaxDownloadsReached()
2825
2826     def download(self, url_list):
2827         """Download a given list of URLs."""
2828         outtmpl = self.outtmpl_dict['default']
2829         if (len(url_list) > 1
2830                 and outtmpl != '-'
2831                 and '%' not in outtmpl
2832                 and self.params.get('max_downloads') != 1):
2833             raise SameFileError(outtmpl)
2834
2835         for url in url_list:
2836             try:
2837                 # It also downloads the videos
2838                 res = self.extract_info(
2839                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2840             except UnavailableVideoError:
2841                 self.report_error('unable to download video')
2842             except MaxDownloadsReached:
2843                 self.to_screen('[info] Maximum number of downloaded files reached')
2844                 raise
2845             except ExistingVideoReached:
2846                 self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
2847                 raise
2848             except RejectedVideoReached:
2849                 self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
2850                 raise
2851             else:
2852                 if self.params.get('dump_single_json', False):
2853                     self.post_extract(res)
2854                     self.to_stdout(json.dumps(res, default=repr))
2855
2856         return self._download_retcode
2857
2858     def download_with_info_file(self, info_filename):
2859         with contextlib.closing(fileinput.FileInput(
2860                 [info_filename], mode='r',
2861                 openhook=fileinput.hook_encoded('utf-8'))) as f:
2862             # FileInput doesn't have a read method, we can't call json.load
2863             info = self.filter_requested_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
2864         try:
2865             self.process_ie_result(info, download=True)
2866         except (DownloadError, EntryNotInPlaylist, ThrottledDownload):
2867             webpage_url = info.get('webpage_url')
2868             if webpage_url is not None:
2869                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2870                 return self.download([webpage_url])
2871             else:
2872                 raise
2873         return self._download_retcode
2874
2875     @staticmethod
2876     def filter_requested_info(info_dict, actually_filter=True):
2877         remove_keys = ['__original_infodict']  # Always remove this since this may contain a copy of the entire dict
2878         keep_keys = ['_type'],  # Always keep this to facilitate load-info-json
2879         if actually_filter:
2880             remove_keys += ('requested_formats', 'requested_subtitles', 'requested_entries', 'filepath', 'entries', 'original_url')
2881             empty_values = (None, {}, [], set(), tuple())
2882             reject = lambda k, v: k not in keep_keys and (
2883                 k.startswith('_') or k in remove_keys or v in empty_values)
2884         else:
2885             info_dict['epoch'] = int(time.time())
2886             reject = lambda k, v: k in remove_keys
2887         filter_fn = lambda obj: (
2888             list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))
2889             else obj if not isinstance(obj, dict)
2890             else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
2891         return filter_fn(info_dict)
2892
2893     def run_pp(self, pp, infodict):
2894         files_to_delete = []
2895         if '__files_to_move' not in infodict:
2896             infodict['__files_to_move'] = {}
2897         files_to_delete, infodict = pp.run(infodict)
2898         if not files_to_delete:
2899             return infodict
2900
2901         if self.params.get('keepvideo', False):
2902             for f in files_to_delete:
2903                 infodict['__files_to_move'].setdefault(f, '')
2904         else:
2905             for old_filename in set(files_to_delete):
2906                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2907                 try:
2908                     os.remove(encodeFilename(old_filename))
2909                 except (IOError, OSError):
2910                     self.report_warning('Unable to remove downloaded original file')
2911                 if old_filename in infodict['__files_to_move']:
2912                     del infodict['__files_to_move'][old_filename]
2913         return infodict
2914
2915     @staticmethod
2916     def post_extract(info_dict):
2917         def actual_post_extract(info_dict):
2918             if info_dict.get('_type') in ('playlist', 'multi_video'):
2919                 for video_dict in info_dict.get('entries', {}):
2920                     actual_post_extract(video_dict or {})
2921                 return
2922
2923             post_extractor = info_dict.get('__post_extractor') or (lambda: {})
2924             extra = post_extractor().items()
2925             info_dict.update(extra)
2926             info_dict.pop('__post_extractor', None)
2927
2928             original_infodict = info_dict.get('__original_infodict') or {}
2929             original_infodict.update(extra)
2930             original_infodict.pop('__post_extractor', None)
2931
2932         actual_post_extract(info_dict or {})
2933
2934     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
2935         info = dict(ie_info)
2936         info['__files_to_move'] = files_to_move or {}
2937         for pp in self._pps[key]:
2938             info = self.run_pp(pp, info)
2939         return info, info.pop('__files_to_move', None)
2940
2941     def post_process(self, filename, ie_info, files_to_move=None):
2942         """Run all the postprocessors on the given file."""
2943         info = dict(ie_info)
2944         info['filepath'] = filename
2945         info['__files_to_move'] = files_to_move or {}
2946
2947         for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
2948             info = self.run_pp(pp, info)
2949         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
2950         del info['__files_to_move']
2951         for pp in self._pps['after_move']:
2952             info = self.run_pp(pp, info)
2953         return info
2954
2955     def _make_archive_id(self, info_dict):
2956         video_id = info_dict.get('id')
2957         if not video_id:
2958             return
2959         # Future-proof against any change in case
2960         # and backwards compatibility with prior versions
2961         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
2962         if extractor is None:
2963             url = str_or_none(info_dict.get('url'))
2964             if not url:
2965                 return
2966             # Try to find matching extractor for the URL and take its ie_key
2967             for ie in self._ies:
2968                 if ie.suitable(url):
2969                     extractor = ie.ie_key()
2970                     break
2971             else:
2972                 return
2973         return '%s %s' % (extractor.lower(), video_id)
2974
2975     def in_download_archive(self, info_dict):
2976         fn = self.params.get('download_archive')
2977         if fn is None:
2978             return False
2979
2980         vid_id = self._make_archive_id(info_dict)
2981         if not vid_id:
2982             return False  # Incomplete video information
2983
2984         return vid_id in self.archive
2985
2986     def record_download_archive(self, info_dict):
2987         fn = self.params.get('download_archive')
2988         if fn is None:
2989             return
2990         vid_id = self._make_archive_id(info_dict)
2991         assert vid_id
2992         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
2993             archive_file.write(vid_id + '\n')
2994         self.archive.add(vid_id)
2995
2996     @staticmethod
2997     def format_resolution(format, default='unknown'):
2998         if format.get('vcodec') == 'none':
2999             if format.get('acodec') == 'none':
3000                 return 'images'
3001             return 'audio only'
3002         if format.get('resolution') is not None:
3003             return format['resolution']
3004         if format.get('width') and format.get('height'):
3005             res = '%dx%d' % (format['width'], format['height'])
3006         elif format.get('height'):
3007             res = '%sp' % format['height']
3008         elif format.get('width'):
3009             res = '%dx?' % format['width']
3010         else:
3011             res = default
3012         return res
3013
3014     def _format_note(self, fdict):
3015         res = ''
3016         if fdict.get('ext') in ['f4f', 'f4m']:
3017             res += '(unsupported) '
3018         if fdict.get('language'):
3019             if res:
3020                 res += ' '
3021             res += '[%s] ' % fdict['language']
3022         if fdict.get('format_note') is not None:
3023             res += fdict['format_note'] + ' '
3024         if fdict.get('tbr') is not None:
3025             res += '%4dk ' % fdict['tbr']
3026         if fdict.get('container') is not None:
3027             if res:
3028                 res += ', '
3029             res += '%s container' % fdict['container']
3030         if (fdict.get('vcodec') is not None
3031                 and fdict.get('vcodec') != 'none'):
3032             if res:
3033                 res += ', '
3034             res += fdict['vcodec']
3035             if fdict.get('vbr') is not None:
3036                 res += '@'
3037         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3038             res += 'video@'
3039         if fdict.get('vbr') is not None:
3040             res += '%4dk' % fdict['vbr']
3041         if fdict.get('fps') is not None:
3042             if res:
3043                 res += ', '
3044             res += '%sfps' % fdict['fps']
3045         if fdict.get('acodec') is not None:
3046             if res:
3047                 res += ', '
3048             if fdict['acodec'] == 'none':
3049                 res += 'video only'
3050             else:
3051                 res += '%-5s' % fdict['acodec']
3052         elif fdict.get('abr') is not None:
3053             if res:
3054                 res += ', '
3055             res += 'audio'
3056         if fdict.get('abr') is not None:
3057             res += '@%3dk' % fdict['abr']
3058         if fdict.get('asr') is not None:
3059             res += ' (%5dHz)' % fdict['asr']
3060         if fdict.get('filesize') is not None:
3061             if res:
3062                 res += ', '
3063             res += format_bytes(fdict['filesize'])
3064         elif fdict.get('filesize_approx') is not None:
3065             if res:
3066                 res += ', '
3067             res += '~' + format_bytes(fdict['filesize_approx'])
3068         return res
3069
3070     def list_formats(self, info_dict):
3071         formats = info_dict.get('formats', [info_dict])
3072         new_format = (
3073             'list-formats' not in self.params.get('compat_opts', [])
3074             and self.params.get('listformats_table', True) is not False)
3075         if new_format:
3076             table = [
3077                 [
3078                     format_field(f, 'format_id'),
3079                     format_field(f, 'ext'),
3080                     self.format_resolution(f),
3081                     format_field(f, 'fps', '%d'),
3082                     '|',
3083                     format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
3084                     format_field(f, 'tbr', '%4dk'),
3085                     shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
3086                     '|',
3087                     format_field(f, 'vcodec', default='unknown').replace('none', ''),
3088                     format_field(f, 'vbr', '%4dk'),
3089                     format_field(f, 'acodec', default='unknown').replace('none', ''),
3090                     format_field(f, 'abr', '%3dk'),
3091                     format_field(f, 'asr', '%5dHz'),
3092                     ', '.join(filter(None, (
3093                         'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
3094                         format_field(f, 'language', '[%s]'),
3095                         format_field(f, 'format_note'),
3096                         format_field(f, 'container', ignore=(None, f.get('ext'))),
3097                     ))),
3098                 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3099             header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', '  TBR', 'PROTO',
3100                            '|', 'VCODEC', '  VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO']
3101         else:
3102             table = [
3103                 [
3104                     format_field(f, 'format_id'),
3105                     format_field(f, 'ext'),
3106                     self.format_resolution(f),
3107                     self._format_note(f)]
3108                 for f in formats
3109                 if f.get('preference') is None or f['preference'] >= -1000]
3110             header_line = ['format code', 'extension', 'resolution', 'note']
3111
3112         self.to_screen(
3113             '[info] Available formats for %s:' % info_dict['id'])
3114         self.to_stdout(render_table(
3115             header_line, table, delim=new_format, extraGap=(0 if new_format else 1), hideEmpty=new_format))
3116
3117     def list_thumbnails(self, info_dict):
3118         thumbnails = list(info_dict.get('thumbnails'))
3119         if not thumbnails:
3120             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3121             return
3122
3123         self.to_screen(
3124             '[info] Thumbnails for %s:' % info_dict['id'])
3125         self.to_stdout(render_table(
3126             ['ID', 'width', 'height', 'URL'],
3127             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
3128
3129     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3130         if not subtitles:
3131             self.to_screen('%s has no %s' % (video_id, name))
3132             return
3133         self.to_screen(
3134             'Available %s for %s:' % (name, video_id))
3135
3136         def _row(lang, formats):
3137             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3138             if len(set(names)) == 1:
3139                 names = [] if names[0] == 'unknown' else names[:1]
3140             return [lang, ', '.join(names), ', '.join(exts)]
3141
3142         self.to_stdout(render_table(
3143             ['Language', 'Name', 'Formats'],
3144             [_row(lang, formats) for lang, formats in subtitles.items()],
3145             hideEmpty=True))
3146
3147     def urlopen(self, req):
3148         """ Start an HTTP download """
3149         if isinstance(req, compat_basestring):
3150             req = sanitized_Request(req)
3151         return self._opener.open(req, timeout=self._socket_timeout)
3152
3153     def print_debug_header(self):
3154         if not self.params.get('verbose'):
3155             return
3156
3157         if type('') is not compat_str:
3158             # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
3159             self.report_warning(
3160                 'Your Python is broken! Update to a newer and supported version')
3161
3162         stdout_encoding = getattr(
3163             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
3164         encoding_str = (
3165             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
3166                 locale.getpreferredencoding(),
3167                 sys.getfilesystemencoding(),
3168                 stdout_encoding,
3169                 self.get_encoding()))
3170         write_string(encoding_str, encoding=None)
3171
3172         source = (
3173             '(exe)' if hasattr(sys, 'frozen')
3174             else '(zip)' if isinstance(globals().get('__loader__'), zipimporter)
3175             else '(source)' if os.path.basename(sys.argv[0]) == '__main__.py'
3176             else '')
3177         self._write_string('[debug] yt-dlp version %s %s\n' % (__version__, source))
3178         if _LAZY_LOADER:
3179             self._write_string('[debug] Lazy loading extractors enabled\n')
3180         if _PLUGIN_CLASSES:
3181             self._write_string(
3182                 '[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])
3183         if self.params.get('compat_opts'):
3184             self._write_string(
3185                 '[debug] Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts')))
3186         try:
3187             sp = subprocess.Popen(
3188                 ['git', 'rev-parse', '--short', 'HEAD'],
3189                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3190                 cwd=os.path.dirname(os.path.abspath(__file__)))
3191             out, err = process_communicate_or_kill(sp)
3192             out = out.decode().strip()
3193             if re.match('[0-9a-f]+', out):
3194                 self._write_string('[debug] Git HEAD: %s\n' % out)
3195         except Exception:
3196             try:
3197                 sys.exc_clear()
3198             except Exception:
3199                 pass
3200
3201         def python_implementation():
3202             impl_name = platform.python_implementation()
3203             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3204                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3205             return impl_name
3206
3207         self._write_string('[debug] Python version %s (%s %s) - %s\n' % (
3208             platform.python_version(),
3209             python_implementation(),
3210             platform.architecture()[0],
3211             platform_name()))
3212
3213         exe_versions = FFmpegPostProcessor.get_versions(self)
3214         exe_versions['rtmpdump'] = rtmpdump_version()
3215         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3216         exe_str = ', '.join(
3217             '%s %s' % (exe, v)
3218             for exe, v in sorted(exe_versions.items())
3219             if v
3220         )
3221         if not exe_str:
3222             exe_str = 'none'
3223         self._write_string('[debug] exe versions: %s\n' % exe_str)
3224
3225         proxy_map = {}
3226         for handler in self._opener.handlers:
3227             if hasattr(handler, 'proxies'):
3228                 proxy_map.update(handler.proxies)
3229         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
3230
3231         if self.params.get('call_home', False):
3232             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3233             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
3234             return
3235             latest_version = self.urlopen(
3236                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3237             if version_tuple(latest_version) > version_tuple(__version__):
3238                 self.report_warning(
3239                     'You are using an outdated version (newest version: %s)! '
3240                     'See https://yt-dl.org/update if you need help updating.' %
3241                     latest_version)
3242
3243     def _setup_opener(self):
3244         timeout_val = self.params.get('socket_timeout')
3245         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
3246
3247         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3248         opts_cookiefile = self.params.get('cookiefile')
3249         opts_proxy = self.params.get('proxy')
3250
3251         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3252
3253         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3254         if opts_proxy is not None:
3255             if opts_proxy == '':
3256                 proxies = {}
3257             else:
3258                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3259         else:
3260             proxies = compat_urllib_request.getproxies()
3261             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3262             if 'http' in proxies and 'https' not in proxies:
3263                 proxies['https'] = proxies['http']
3264         proxy_handler = PerRequestProxyHandler(proxies)
3265
3266         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3267         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3268         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3269         redirect_handler = YoutubeDLRedirectHandler()
3270         data_handler = compat_urllib_request_DataHandler()
3271
3272         # When passing our own FileHandler instance, build_opener won't add the
3273         # default FileHandler and allows us to disable the file protocol, which
3274         # can be used for malicious purposes (see
3275         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3276         file_handler = compat_urllib_request.FileHandler()
3277
3278         def file_open(*args, **kwargs):
3279             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3280         file_handler.file_open = file_open
3281
3282         opener = compat_urllib_request.build_opener(
3283             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3284
3285         # Delete the default user-agent header, which would otherwise apply in
3286         # cases where our custom HTTP handler doesn't come into play
3287         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3288         opener.addheaders = []
3289         self._opener = opener
3290
3291     def encode(self, s):
3292         if isinstance(s, bytes):
3293             return s  # Already encoded
3294
3295         try:
3296             return s.encode(self.get_encoding())
3297         except UnicodeEncodeError as err:
3298             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3299             raise
3300
3301     def get_encoding(self):
3302         encoding = self.params.get('encoding')
3303         if encoding is None:
3304             encoding = preferredencoding()
3305         return encoding
3306
3307     def _write_thumbnails(self, info_dict, filename):  # return the extensions
3308         write_all = self.params.get('write_all_thumbnails', False)
3309         thumbnails = []
3310         if write_all or self.params.get('writethumbnail', False):
3311             thumbnails = info_dict.get('thumbnails') or []
3312         multiple = write_all and len(thumbnails) > 1
3313
3314         ret = []
3315         for t in thumbnails[::-1]:
3316             thumb_ext = determine_ext(t['url'], 'jpg')
3317             suffix = '%s.' % t['id'] if multiple else ''
3318             thumb_display_id = '%s ' % t['id'] if multiple else ''
3319             thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
3320
3321             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
3322                 ret.append(suffix + thumb_ext)
3323                 t['filepath'] = thumb_filename
3324                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
3325                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
3326             else:
3327                 self.to_screen('[%s] %s: Downloading thumbnail %s ...' %
3328                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
3329                 try:
3330                     uf = self.urlopen(t['url'])
3331                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3332                         shutil.copyfileobj(uf, thumbf)
3333                     ret.append(suffix + thumb_ext)
3334                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
3335                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
3336                     t['filepath'] = thumb_filename
3337                 except network_exceptions as err:
3338                     self.report_warning('Unable to download thumbnail "%s": %s' %
3339                                         (t['url'], error_to_compat_str(err)))
3340             if ret and not write_all:
3341                 break
3342         return ret