yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import functools
  13 import io
  14 import itertools
  15 import json
  16 import locale
  17 import operator
  18 import os
  19 import platform
  20 import re
  21 import shutil
  22 import subprocess
  23 import sys
  24 import tempfile
  25 import time
  26 import tokenize
  27 import traceback
  28 import random
  29 import unicodedata
  30
  31 from enum import Enum
  32 from string import ascii_letters
  33
  34 from .compat import (
  35     compat_basestring,
  36     compat_get_terminal_size,
  37     compat_kwargs,
  38     compat_numeric_types,
  39     compat_os_name,
  40     compat_pycrypto_AES,
  41     compat_shlex_quote,
  42     compat_str,
  43     compat_tokenize_tokenize,
  44     compat_urllib_error,
  45     compat_urllib_request,
  46     compat_urllib_request_DataHandler,
  47     windows_enable_vt_mode,
  48 )
  49 from .cookies import load_cookies
  50 from .utils import (
  51     age_restricted,
  52     args_to_str,
  53     ContentTooShortError,
  54     date_from_str,
  55     DateRange,
  56     DEFAULT_OUTTMPL,
  57     determine_ext,
  58     determine_protocol,
  59     DOT_DESKTOP_LINK_TEMPLATE,
  60     DOT_URL_LINK_TEMPLATE,
  61     DOT_WEBLOC_LINK_TEMPLATE,
  62     DownloadError,
  63     encode_compat_str,
  64     encodeFilename,
  65     EntryNotInPlaylist,
  66     error_to_compat_str,
  67     ExistingVideoReached,
  68     expand_path,
  69     ExtractorError,
  70     float_or_none,
  71     format_bytes,
  72     format_field,
  73     formatSeconds,
  74     GeoRestrictedError,
  75     HEADRequest,
  76     int_or_none,
  77     iri_to_uri,
  78     ISO3166Utils,
  79     LazyList,
  80     locked_file,
  81     make_dir,
  82     make_HTTPS_handler,
  83     MaxDownloadsReached,
  84     network_exceptions,
  85     number_of_digits,
  86     orderedSet,
  87     OUTTMPL_TYPES,
  88     PagedList,
  89     parse_filesize,
  90     PerRequestProxyHandler,
  91     platform_name,
  92     Popen,
  93     PostProcessingError,
  94     preferredencoding,
  95     prepend_extension,
  96     register_socks_protocols,
  97     RejectedVideoReached,
  98     render_table,
  99     replace_extension,
 100     SameFileError,
 101     sanitize_filename,
 102     sanitize_path,
 103     sanitize_url,
 104     sanitized_Request,
 105     std_headers,
 106     STR_FORMAT_RE_TMPL,
 107     STR_FORMAT_TYPES,
 108     str_or_none,
 109     strftime_or_none,
 110     subtitles_filename,
 111     supports_terminal_sequences,
 112     ThrottledDownload,
 113     to_high_limit_path,
 114     traverse_obj,
 115     try_get,
 116     UnavailableVideoError,
 117     url_basename,
 118     variadic,
 119     version_tuple,
 120     write_json_file,
 121     write_string,
 122     YoutubeDLCookieProcessor,
 123     YoutubeDLHandler,
 124     YoutubeDLRedirectHandler,
 125 )
 126 from .cache import Cache
 127 from .minicurses import format_text
 128 from .extractor import (
 129     gen_extractor_classes,
 130     get_info_extractor,
 131     _LAZY_LOADER,
 132     _PLUGIN_CLASSES as plugin_extractors
 133 )
 134 from .extractor.openload import PhantomJSwrapper
 135 from .downloader import (
 136     FFmpegFD,
 137     get_suitable_downloader,
 138     shorten_protocol_name
 139 )
 140 from .downloader.rtmp import rtmpdump_version
 141 from .postprocessor import (
 142     get_postprocessor,
 143     EmbedThumbnailPP,
 144     FFmpegFixupDurationPP,
 145     FFmpegFixupM3u8PP,
 146     FFmpegFixupM4aPP,
 147     FFmpegFixupStretchedPP,
 148     FFmpegFixupTimestampPP,
 149     FFmpegMergerPP,
 150     FFmpegPostProcessor,
 151     MoveFilesAfterDownloadPP,
 152     _PLUGIN_CLASSES as plugin_postprocessors
 153 )
 154 from .update import detect_variant
 155 from .version import __version__
 156
 157 if compat_os_name == 'nt':
 158     import ctypes
 159
 160
 161 class YoutubeDL(object):
 162     """YoutubeDL class.
 163
 164     YoutubeDL objects are the ones responsible of downloading the
 165     actual video file and writing it to disk if the user has requested
 166     it, among some other tasks. In most cases there should be one per
 167     program. As, given a video URL, the downloader doesn't know how to
 168     extract all the needed information, task that InfoExtractors do, it
 169     has to pass the URL to one of them.
 170
 171     For this, YoutubeDL objects have a method that allows
 172     InfoExtractors to be registered in a given order. When it is passed
 173     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 174     finds that reports being able to handle it. The InfoExtractor extracts
 175     all the information about the video or videos the URL refers to, and
 176     YoutubeDL process the extracted information, possibly using a File
 177     Downloader to download the video.
 178
 179     YoutubeDL objects accept a lot of parameters. In order not to saturate
 180     the object constructor with arguments, it receives a dictionary of
 181     options instead. These options are available through the params
 182     attribute for the InfoExtractors to use. The YoutubeDL also
 183     registers itself as the downloader in charge for the InfoExtractors
 184     that are added to it, so this is a "mutual registration".
 185
 186     Available options:
 187
 188     username:          Username for authentication purposes.
 189     password:          Password for authentication purposes.
 190     videopassword:     Password for accessing a video.
 191     ap_mso:            Adobe Pass multiple-system operator identifier.
 192     ap_username:       Multiple-system operator account username.
 193     ap_password:       Multiple-system operator account password.
 194     usenetrc:          Use netrc for authentication instead.
 195     verbose:           Print additional info to stdout.
 196     quiet:             Do not print messages to stdout.
 197     no_warnings:       Do not print out anything for warnings.
 198     forceprint:        A list of templates to force print
 199     forceurl:          Force printing final URL. (Deprecated)
 200     forcetitle:        Force printing title. (Deprecated)
 201     forceid:           Force printing ID. (Deprecated)
 202     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 203     forcedescription:  Force printing description. (Deprecated)
 204     forcefilename:     Force printing final filename. (Deprecated)
 205     forceduration:     Force printing duration. (Deprecated)
 206     forcejson:         Force printing info_dict as JSON.
 207     dump_single_json:  Force printing the info_dict of the whole playlist
 208                        (or video) as a single JSON line.
 209     force_write_download_archive: Force writing download archive regardless
 210                        of 'skip_download' or 'simulate'.
 211     simulate:          Do not download the video files. If unset (or None),
 212                        simulate only if listsubtitles, listformats or list_thumbnails is used
 213     format:            Video format code. see "FORMAT SELECTION" for more details.
 214     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 215     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 216                        extracting metadata even if the video is not actually
 217                        available for download (experimental)
 218     format_sort:       How to sort the video formats. see "Sorting Formats"
 219                        for more details.
 220     format_sort_force: Force the given format_sort. see "Sorting Formats"
 221                        for more details.
 222     allow_multiple_video_streams:   Allow multiple video streams to be merged
 223                        into a single file
 224     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 225                        into a single file
 226     check_formats      Whether to test if the formats are downloadable.
 227                        Can be True (check all), False (check none),
 228                        'selected' (check selected formats),
 229                        or None (check only if requested by extractor)
 230     paths:             Dictionary of output paths. The allowed keys are 'home'
 231                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 232     outtmpl:           Dictionary of templates for output names. Allowed keys
 233                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 234                        For compatibility with youtube-dl, a single string can also be used
 235     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 236     restrictfilenames: Do not allow "&" and spaces in file names
 237     trim_file_name:    Limit length of filename (extension excluded)
 238     windowsfilenames:  Force the filenames to be windows compatible
 239     ignoreerrors:      Do not stop on download/postprocessing errors.
 240                        Can be 'only_download' to ignore only download errors.
 241                        Default is 'only_download' for CLI, but False for API
 242     skip_playlist_after_errors: Number of allowed failures until the rest of
 243                        the playlist is skipped
 244     force_generic_extractor: Force downloader to use the generic extractor
 245     overwrites:        Overwrite all video and metadata files if True,
 246                        overwrite only non-video files if None
 247                        and don't overwrite any file if False
 248                        For compatibility with youtube-dl,
 249                        "nooverwrites" may also be used instead
 250     playliststart:     Playlist item to start at.
 251     playlistend:       Playlist item to end at.
 252     playlist_items:    Specific indices of playlist to download.
 253     playlistreverse:   Download playlist items in reverse order.
 254     playlistrandom:    Download playlist items in random order.
 255     matchtitle:        Download only matching titles.
 256     rejecttitle:       Reject downloads for matching titles.
 257     logger:            Log messages to a logging.Logger instance.
 258     logtostderr:       Log messages to stderr instead of stdout.
 259     consoletitle:       Display progress in console window's titlebar.
 260     writedescription:  Write the video description to a .description file
 261     writeinfojson:     Write the video description to a .info.json file
 262     clean_infojson:    Remove private fields from the infojson
 263     getcomments:       Extract video comments. This will not be written to disk
 264                        unless writeinfojson is also given
 265     writeannotations:  Write the video annotations to a .annotations.xml file
 266     writethumbnail:    Write the thumbnail image to a file
 267     allow_playlist_files: Whether to write playlists' description, infojson etc
 268                        also to disk when using the 'write*' options
 269     write_all_thumbnails:  Write all thumbnail formats to files
 270     writelink:         Write an internet shortcut file, depending on the
 271                        current platform (.url/.webloc/.desktop)
 272     writeurllink:      Write a Windows internet shortcut file (.url)
 273     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 274     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 275     writesubtitles:    Write the video subtitles to a file
 276     writeautomaticsub: Write the automatically generated subtitles to a file
 277     allsubtitles:      Deprecated - Use subtitleslangs = ['all']
 278                        Downloads all the subtitles of the video
 279                        (requires writesubtitles or writeautomaticsub)
 280     listsubtitles:     Lists all available subtitles for the video
 281     subtitlesformat:   The format code for subtitles
 282     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 283                        The list may contain "all" to refer to all the available
 284                        subtitles. The language can be prefixed with a "-" to
 285                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 286     keepvideo:         Keep the video file after post-processing
 287     daterange:         A DateRange object, download only if the upload_date is in the range.
 288     skip_download:     Skip the actual download of the video file
 289     cachedir:          Location of the cache files in the filesystem.
 290                        False to disable filesystem cache.
 291     noplaylist:        Download single video instead of a playlist if in doubt.
 292     age_limit:         An integer representing the user's age in years.
 293                        Unsuitable videos for the given age are skipped.
 294     min_views:         An integer representing the minimum view count the video
 295                        must have in order to not be skipped.
 296                        Videos without view count information are always
 297                        downloaded. None for no limit.
 298     max_views:         An integer representing the maximum view count.
 299                        Videos that are more popular than that are not
 300                        downloaded.
 301                        Videos without view count information are always
 302                        downloaded. None for no limit.
 303     download_archive:  File name of a file where all downloads are recorded.
 304                        Videos already present in the file are not downloaded
 305                        again.
 306     break_on_existing: Stop the download process after attempting to download a
 307                        file that is in the archive.
 308     break_on_reject:   Stop the download process when encountering a video that
 309                        has been filtered out.
 310     cookiefile:        File name where cookies should be read from and dumped to
 311     cookiesfrombrowser: A tuple containing the name of the browser and the profile
 312                        name/path from where cookies are loaded.
 313                        Eg: ('chrome', ) or ('vivaldi', 'default')
 314     nocheckcertificate:Do not verify SSL certificates
 315     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 316                        At the moment, this is only supported by YouTube.
 317     proxy:             URL of the proxy server to use
 318     geo_verification_proxy:  URL of the proxy to use for IP address verification
 319                        on geo-restricted sites.
 320     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 321     bidi_workaround:   Work around buggy terminals without bidirectional text
 322                        support, using fridibi
 323     debug_printtraffic:Print out sent and received HTTP traffic
 324     include_ads:       Download ads as well
 325     default_search:    Prepend this string if an input url is not valid.
 326                        'auto' for elaborate guessing
 327     encoding:          Use this encoding instead of the system-specified.
 328     extract_flat:      Do not resolve URLs, return the immediate result.
 329                        Pass in 'in_playlist' to only show this behavior for
 330                        playlist items.
 331     postprocessors:    A list of dictionaries, each with an entry
 332                        * key:  The name of the postprocessor. See
 333                                yt_dlp/postprocessor/__init__.py for a list.
 334                        * when: When to run the postprocessor. Can be one of
 335                                pre_process|before_dl|post_process|after_move.
 336                                Assumed to be 'post_process' if not given
 337     post_hooks:        Deprecated - Register a custom postprocessor instead
 338                        A list of functions that get called as the final step
 339                        for each video file, after all postprocessors have been
 340                        called. The filename will be passed as the only argument.
 341     progress_hooks:    A list of functions that get called on download
 342                        progress, with a dictionary with the entries
 343                        * status: One of "downloading", "error", or "finished".
 344                                  Check this first and ignore unknown values.
 345                        * info_dict: The extracted info_dict
 346
 347                        If status is one of "downloading", or "finished", the
 348                        following properties may also be present:
 349                        * filename: The final filename (always present)
 350                        * tmpfilename: The filename we're currently writing to
 351                        * downloaded_bytes: Bytes on disk
 352                        * total_bytes: Size of the whole file, None if unknown
 353                        * total_bytes_estimate: Guess of the eventual file size,
 354                                                None if unavailable.
 355                        * elapsed: The number of seconds since download started.
 356                        * eta: The estimated time in seconds, None if unknown
 357                        * speed: The download speed in bytes/second, None if
 358                                 unknown
 359                        * fragment_index: The counter of the currently
 360                                          downloaded video fragment.
 361                        * fragment_count: The number of fragments (= individual
 362                                          files that will be merged)
 363
 364                        Progress hooks are guaranteed to be called at least once
 365                        (with status "finished") if the download is successful.
 366     postprocessor_hooks:  A list of functions that get called on postprocessing
 367                        progress, with a dictionary with the entries
 368                        * status: One of "started", "processing", or "finished".
 369                                  Check this first and ignore unknown values.
 370                        * postprocessor: Name of the postprocessor
 371                        * info_dict: The extracted info_dict
 372
 373                        Progress hooks are guaranteed to be called at least twice
 374                        (with status "started" and "finished") if the processing is successful.
 375     merge_output_format: Extension to use when merging formats.
 376     final_ext:         Expected final extension; used to detect when the file was
 377                        already downloaded and converted. "merge_output_format" is
 378                        replaced by this extension when given
 379     fixup:             Automatically correct known faults of the file.
 380                        One of:
 381                        - "never": do nothing
 382                        - "warn": only emit a warning
 383                        - "detect_or_warn": check whether we can do anything
 384                                            about it, warn otherwise (default)
 385     source_address:    Client-side IP address to bind to.
 386     call_home:         Boolean, true iff we are allowed to contact the
 387                        yt-dlp servers for debugging. (BROKEN)
 388     sleep_interval_requests: Number of seconds to sleep between requests
 389                        during extraction
 390     sleep_interval:    Number of seconds to sleep before each download when
 391                        used alone or a lower bound of a range for randomized
 392                        sleep before each download (minimum possible number
 393                        of seconds to sleep) when used along with
 394                        max_sleep_interval.
 395     max_sleep_interval:Upper bound of a range for randomized sleep before each
 396                        download (maximum possible number of seconds to sleep).
 397                        Must only be used along with sleep_interval.
 398                        Actual sleep time will be a random float from range
 399                        [sleep_interval; max_sleep_interval].
 400     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 401     listformats:       Print an overview of available video formats and exit.
 402     list_thumbnails:   Print a table of all thumbnails and exit.
 403     match_filter:      A function that gets called with the info_dict of
 404                        every video.
 405                        If it returns a message, the video is ignored.
 406                        If it returns None, the video is downloaded.
 407                        match_filter_func in utils.py is one example for this.
 408     no_color:          Do not emit color codes in output.
 409     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 410                        HTTP header
 411     geo_bypass_country:
 412                        Two-letter ISO 3166-2 country code that will be used for
 413                        explicit geographic restriction bypassing via faking
 414                        X-Forwarded-For HTTP header
 415     geo_bypass_ip_block:
 416                        IP range in CIDR notation that will be used similarly to
 417                        geo_bypass_country
 418
 419     The following options determine which downloader is picked:
 420     external_downloader: A dictionary of protocol keys and the executable of the
 421                        external downloader to use for it. The allowed protocols
 422                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 423                        Set the value to 'native' to use the native downloader
 424     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 425                        or {'m3u8': 'ffmpeg'} instead.
 426                        Use the native HLS downloader instead of ffmpeg/avconv
 427                        if True, otherwise use ffmpeg/avconv if False, otherwise
 428                        use downloader suggested by extractor if None.
 429     compat_opts:       Compatibility options. See "Differences in default behavior".
 430                        The following options do not work when used through the API:
 431                        filename, abort-on-error, multistreams, no-live-chat, format-sort
 432                        no-clean-infojson, no-playlist-metafiles, no-keep-subs.
 433                        Refer __init__.py for their implementation
 434     progress_template: Dictionary of templates for progress outputs.
 435                        Allowed keys are 'download', 'postprocess',
 436                        'download-title' (console title) and 'postprocess-title'.
 437                        The template is mapped on a dictionary with keys 'progress' and 'info'
 438
 439     The following parameters are not used by YoutubeDL itself, they are used by
 440     the downloader (see yt_dlp/downloader/common.py):
 441     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 442     max_filesize, test, noresizebuffer, retries, fragment_retries, continuedl,
 443     noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
 444     external_downloader_args.
 445
 446     The following options are used by the post processors:
 447     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 448                        otherwise prefer ffmpeg. (avconv support is deprecated)
 449     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 450                        to the binary or its containing directory.
 451     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 452                        and a list of additional command-line arguments for the
 453                        postprocessor/executable. The dict can also have "PP+EXE" keys
 454                        which are used when the given exe is used by the given PP.
 455                        Use 'default' as the name for arguments to passed to all PP
 456                        For compatibility with youtube-dl, a single list of args
 457                        can also be used
 458
 459     The following options are used by the extractors:
 460     extractor_retries: Number of times to retry for known errors
 461     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 462     hls_split_discontinuity: Split HLS playlists to different formats at
 463                        discontinuities such as ad breaks (default: False)
 464     extractor_args:    A dictionary of arguments to be passed to the extractors.
 465                        See "EXTRACTOR ARGUMENTS" for details.
 466                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 467     youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
 468                        If True (default), DASH manifests and related
 469                        data will be downloaded and processed by extractor.
 470                        You can reduce network I/O by disabling it if you don't
 471                        care about DASH. (only for youtube)
 472     youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
 473                        If True (default), HLS manifests and related
 474                        data will be downloaded and processed by extractor.
 475                        You can reduce network I/O by disabling it if you don't
 476                        care about HLS. (only for youtube)
 477     """
 478
 479     _NUMERIC_FIELDS = set((
 480         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 481         'timestamp', 'release_timestamp',
 482         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 483         'average_rating', 'comment_count', 'age_limit',
 484         'start_time', 'end_time',
 485         'chapter_number', 'season_number', 'episode_number',
 486         'track_number', 'disc_number', 'release_year',
 487     ))
 488
 489     _format_selection_exts = {
 490         'audio': {'m4a', 'mp3', 'ogg', 'aac'},
 491         'video': {'mp4', 'flv', 'webm', '3gp'},
 492         'storyboards': {'mhtml'},
 493     }
 494
 495     params = None
 496     _ies = {}
 497     _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 498     _printed_messages = set()
 499     _first_webpage_request = True
 500     _download_retcode = None
 501     _num_downloads = None
 502     _playlist_level = 0
 503     _playlist_urls = set()
 504     _screen_file = None
 505
 506     def __init__(self, params=None, auto_init=True):
 507         """Create a FileDownloader object with the given options.
 508         @param auto_init    Whether to load the default extractors and print header (if verbose).
 509                             Set to 'no_verbose_header' to not print the header
 510         """
 511         if params is None:
 512             params = {}
 513         self._ies = {}
 514         self._ies_instances = {}
 515         self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 516         self._printed_messages = set()
 517         self._first_webpage_request = True
 518         self._post_hooks = []
 519         self._progress_hooks = []
 520         self._postprocessor_hooks = []
 521         self._download_retcode = 0
 522         self._num_downloads = 0
 523         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 524         self._err_file = sys.stderr
 525         self.params = params
 526         self.cache = Cache(self)
 527
 528         windows_enable_vt_mode()
 529         # FIXME: This will break if we ever print color to stdout
 530         self._allow_colors = {
 531             'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file),
 532             'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file),
 533         }
 534
 535         if sys.version_info < (3, 6):
 536             self.report_warning(
 537                 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 538
 539         if self.params.get('allow_unplayable_formats'):
 540             self.report_warning(
 541                 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
 542                 'This is a developer option intended for debugging. \n'
 543                 '         If you experience any issues while using this option, '
 544                 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
 545
 546         def check_deprecated(param, option, suggestion):
 547             if self.params.get(param) is not None:
 548                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 549                 return True
 550             return False
 551
 552         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 553             if self.params.get('geo_verification_proxy') is None:
 554                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 555
 556         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 557         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 558         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 559
 560         for msg in self.params.get('_warnings', []):
 561             self.report_warning(msg)
 562
 563         if 'list-formats' in self.params.get('compat_opts', []):
 564             self.params['listformats_table'] = False
 565
 566         if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
 567             # nooverwrites was unnecessarily changed to overwrites
 568             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 569             # This ensures compatibility with both keys
 570             self.params['overwrites'] = not self.params['nooverwrites']
 571         elif self.params.get('overwrites') is None:
 572             self.params.pop('overwrites', None)
 573         else:
 574             self.params['nooverwrites'] = not self.params['overwrites']
 575
 576         if params.get('bidi_workaround', False):
 577             try:
 578                 import pty
 579                 master, slave = pty.openpty()
 580                 width = compat_get_terminal_size().columns
 581                 if width is None:
 582                     width_args = []
 583                 else:
 584                     width_args = ['-w', str(width)]
 585                 sp_kwargs = dict(
 586                     stdin=subprocess.PIPE,
 587                     stdout=slave,
 588                     stderr=self._err_file)
 589                 try:
 590                     self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
 591                 except OSError:
 592                     self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 593                 self._output_channel = os.fdopen(master, 'rb')
 594             except OSError as ose:
 595                 if ose.errno == errno.ENOENT:
 596                     self.report_warning(
 597                         'Could not find fribidi executable, ignoring --bidi-workaround. '
 598                         'Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 599                 else:
 600                     raise
 601
 602         if (sys.platform != 'win32'
 603                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 604                 and not params.get('restrictfilenames', False)):
 605             # Unicode filesystem API will throw errors (#1474, #13027)
 606             self.report_warning(
 607                 'Assuming --restrict-filenames since file system encoding '
 608                 'cannot encode all characters. '
 609                 'Set the LC_ALL environment variable to fix this.')
 610             self.params['restrictfilenames'] = True
 611
 612         self.outtmpl_dict = self.parse_outtmpl()
 613
 614         # Creating format selector here allows us to catch syntax errors before the extraction
 615         self.format_selector = (
 616             None if self.params.get('format') is None
 617             else self.build_format_selector(self.params['format']))
 618
 619         self._setup_opener()
 620
 621         if auto_init:
 622             if auto_init != 'no_verbose_header':
 623                 self.print_debug_header()
 624             self.add_default_info_extractors()
 625
 626         for pp_def_raw in self.params.get('postprocessors', []):
 627             pp_def = dict(pp_def_raw)
 628             when = pp_def.pop('when', 'post_process')
 629             pp_class = get_postprocessor(pp_def.pop('key'))
 630             pp = pp_class(self, **compat_kwargs(pp_def))
 631             self.add_post_processor(pp, when=when)
 632
 633         for ph in self.params.get('post_hooks', []):
 634             self.add_post_hook(ph)
 635
 636         for ph in self.params.get('progress_hooks', []):
 637             self.add_progress_hook(ph)
 638
 639         register_socks_protocols()
 640
 641         def preload_download_archive(fn):
 642             """Preload the archive, if any is specified"""
 643             if fn is None:
 644                 return False
 645             self.write_debug(f'Loading archive file {fn!r}')
 646             try:
 647                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 648                     for line in archive_file:
 649                         self.archive.add(line.strip())
 650             except IOError as ioe:
 651                 if ioe.errno != errno.ENOENT:
 652                     raise
 653                 return False
 654             return True
 655
 656         self.archive = set()
 657         preload_download_archive(self.params.get('download_archive'))
 658
 659     def warn_if_short_id(self, argv):
 660         # short YouTube ID starting with dash?
 661         idxs = [
 662             i for i, a in enumerate(argv)
 663             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 664         if idxs:
 665             correct_argv = (
 666                 ['yt-dlp']
 667                 + [a for i, a in enumerate(argv) if i not in idxs]
 668                 + ['--'] + [argv[i] for i in idxs]
 669             )
 670             self.report_warning(
 671                 'Long argument string detected. '
 672                 'Use -- to separate parameters and URLs, like this:\n%s' %
 673                 args_to_str(correct_argv))
 674
 675     def add_info_extractor(self, ie):
 676         """Add an InfoExtractor object to the end of the list."""
 677         ie_key = ie.ie_key()
 678         self._ies[ie_key] = ie
 679         if not isinstance(ie, type):
 680             self._ies_instances[ie_key] = ie
 681             ie.set_downloader(self)
 682
 683     def _get_info_extractor_class(self, ie_key):
 684         ie = self._ies.get(ie_key)
 685         if ie is None:
 686             ie = get_info_extractor(ie_key)
 687             self.add_info_extractor(ie)
 688         return ie
 689
 690     def get_info_extractor(self, ie_key):
 691         """
 692         Get an instance of an IE with name ie_key, it will try to get one from
 693         the _ies list, if there's no instance it will create a new one and add
 694         it to the extractor list.
 695         """
 696         ie = self._ies_instances.get(ie_key)
 697         if ie is None:
 698             ie = get_info_extractor(ie_key)()
 699             self.add_info_extractor(ie)
 700         return ie
 701
 702     def add_default_info_extractors(self):
 703         """
 704         Add the InfoExtractors returned by gen_extractors to the end of the list
 705         """
 706         for ie in gen_extractor_classes():
 707             self.add_info_extractor(ie)
 708
 709     def add_post_processor(self, pp, when='post_process'):
 710         """Add a PostProcessor object to the end of the chain."""
 711         self._pps[when].append(pp)
 712         pp.set_downloader(self)
 713
 714     def add_post_hook(self, ph):
 715         """Add the post hook"""
 716         self._post_hooks.append(ph)
 717
 718     def add_progress_hook(self, ph):
 719         """Add the download progress hook"""
 720         self._progress_hooks.append(ph)
 721
 722     def add_postprocessor_hook(self, ph):
 723         """Add the postprocessing progress hook"""
 724         self._postprocessor_hooks.append(ph)
 725
 726     def _bidi_workaround(self, message):
 727         if not hasattr(self, '_output_channel'):
 728             return message
 729
 730         assert hasattr(self, '_output_process')
 731         assert isinstance(message, compat_str)
 732         line_count = message.count('\n') + 1
 733         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 734         self._output_process.stdin.flush()
 735         res = ''.join(self._output_channel.readline().decode('utf-8')
 736                       for _ in range(line_count))
 737         return res[:-len('\n')]
 738
 739     def _write_string(self, message, out=None, only_once=False):
 740         if only_once:
 741             if message in self._printed_messages:
 742                 return
 743             self._printed_messages.add(message)
 744         write_string(message, out=out, encoding=self.params.get('encoding'))
 745
 746     def to_stdout(self, message, skip_eol=False, quiet=False):
 747         """Print message to stdout"""
 748         if self.params.get('logger'):
 749             self.params['logger'].debug(message)
 750         elif not quiet or self.params.get('verbose'):
 751             self._write_string(
 752                 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 753                 self._err_file if quiet else self._screen_file)
 754
 755     def to_stderr(self, message, only_once=False):
 756         """Print message to stderr"""
 757         assert isinstance(message, compat_str)
 758         if self.params.get('logger'):
 759             self.params['logger'].error(message)
 760         else:
 761             self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
 762
 763     def to_console_title(self, message):
 764         if not self.params.get('consoletitle', False):
 765             return
 766         if compat_os_name == 'nt':
 767             if ctypes.windll.kernel32.GetConsoleWindow():
 768                 # c_wchar_p() might not be necessary if `message` is
 769                 # already of type unicode()
 770                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 771         elif 'TERM' in os.environ:
 772             self._write_string('\033]0;%s\007' % message, self._screen_file)
 773
 774     def save_console_title(self):
 775         if not self.params.get('consoletitle', False):
 776             return
 777         if self.params.get('simulate'):
 778             return
 779         if compat_os_name != 'nt' and 'TERM' in os.environ:
 780             # Save the title on stack
 781             self._write_string('\033[22;0t', self._screen_file)
 782
 783     def restore_console_title(self):
 784         if not self.params.get('consoletitle', False):
 785             return
 786         if self.params.get('simulate'):
 787             return
 788         if compat_os_name != 'nt' and 'TERM' in os.environ:
 789             # Restore the title from stack
 790             self._write_string('\033[23;0t', self._screen_file)
 791
 792     def __enter__(self):
 793         self.save_console_title()
 794         return self
 795
 796     def __exit__(self, *args):
 797         self.restore_console_title()
 798
 799         if self.params.get('cookiefile') is not None:
 800             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 801
 802     def trouble(self, message=None, tb=None):
 803         """Determine action to take when a download problem appears.
 804
 805         Depending on if the downloader has been configured to ignore
 806         download errors or not, this method may throw an exception or
 807         not when errors are found, after printing the message.
 808
 809         tb, if given, is additional traceback information.
 810         """
 811         if message is not None:
 812             self.to_stderr(message)
 813         if self.params.get('verbose'):
 814             if tb is None:
 815                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 816                     tb = ''
 817                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 818                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 819                     tb += encode_compat_str(traceback.format_exc())
 820                 else:
 821                     tb_data = traceback.format_list(traceback.extract_stack())
 822                     tb = ''.join(tb_data)
 823             if tb:
 824                 self.to_stderr(tb)
 825         if not self.params.get('ignoreerrors'):
 826             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 827                 exc_info = sys.exc_info()[1].exc_info
 828             else:
 829                 exc_info = sys.exc_info()
 830             raise DownloadError(message, exc_info)
 831         self._download_retcode = 1
 832
 833     def to_screen(self, message, skip_eol=False):
 834         """Print message to stdout if not in quiet mode"""
 835         self.to_stdout(
 836             message, skip_eol, quiet=self.params.get('quiet', False))
 837
 838     class Styles(Enum):
 839         HEADERS = 'yellow'
 840         EMPHASIS = 'blue'
 841         ID = 'green'
 842         DELIM = 'blue'
 843         ERROR = 'red'
 844         WARNING = 'yellow'
 845
 846     def __format_text(self, out, text, f, fallback=None, *, test_encoding=False):
 847         assert out in ('screen', 'err')
 848         if test_encoding:
 849             original_text = text
 850             handle = self._screen_file if out == 'screen' else self._err_file
 851             encoding = self.params.get('encoding') or getattr(handle, 'encoding', 'ascii')
 852             text = text.encode(encoding, 'ignore').decode(encoding)
 853             if fallback is not None and text != original_text:
 854                 text = fallback
 855         if isinstance(f, self.Styles):
 856             f = f._value_
 857         return format_text(text, f) if self._allow_colors[out] else text if fallback is None else fallback
 858
 859     def _format_screen(self, *args, **kwargs):
 860         return self.__format_text('screen', *args, **kwargs)
 861
 862     def _format_err(self, *args, **kwargs):
 863         return self.__format_text('err', *args, **kwargs)
 864
 865     def report_warning(self, message, only_once=False):
 866         '''
 867         Print the message to stderr, it will be prefixed with 'WARNING:'
 868         If stderr is a tty file the 'WARNING:' will be colored
 869         '''
 870         if self.params.get('logger') is not None:
 871             self.params['logger'].warning(message)
 872         else:
 873             if self.params.get('no_warnings'):
 874                 return
 875             self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
 876
 877     def report_error(self, message, tb=None):
 878         '''
 879         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 880         in red if stderr is a tty file.
 881         '''
 882         self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', tb)
 883
 884     def write_debug(self, message, only_once=False):
 885         '''Log debug message or Print message to stderr'''
 886         if not self.params.get('verbose', False):
 887             return
 888         message = '[debug] %s' % message
 889         if self.params.get('logger'):
 890             self.params['logger'].debug(message)
 891         else:
 892             self.to_stderr(message, only_once)
 893
 894     def report_file_already_downloaded(self, file_name):
 895         """Report file has already been fully downloaded."""
 896         try:
 897             self.to_screen('[download] %s has already been downloaded' % file_name)
 898         except UnicodeEncodeError:
 899             self.to_screen('[download] The file has already been downloaded')
 900
 901     def report_file_delete(self, file_name):
 902         """Report that existing file will be deleted."""
 903         try:
 904             self.to_screen('Deleting existing file %s' % file_name)
 905         except UnicodeEncodeError:
 906             self.to_screen('Deleting existing file')
 907
 908     def raise_no_formats(self, info, forced=False):
 909         has_drm = info.get('__has_drm')
 910         msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
 911         expected = self.params.get('ignore_no_formats_error')
 912         if forced or not expected:
 913             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
 914                                  expected=has_drm or expected)
 915         else:
 916             self.report_warning(msg)
 917
 918     def parse_outtmpl(self):
 919         outtmpl_dict = self.params.get('outtmpl', {})
 920         if not isinstance(outtmpl_dict, dict):
 921             outtmpl_dict = {'default': outtmpl_dict}
 922         # Remove spaces in the default template
 923         if self.params.get('restrictfilenames'):
 924             sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
 925         else:
 926             sanitize = lambda x: x
 927         outtmpl_dict.update({
 928             k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
 929             if outtmpl_dict.get(k) is None})
 930         for key, val in outtmpl_dict.items():
 931             if isinstance(val, bytes):
 932                 self.report_warning(
 933                     'Parameter outtmpl is bytes, but should be a unicode string. '
 934                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 935         return outtmpl_dict
 936
 937     def get_output_path(self, dir_type='', filename=None):
 938         paths = self.params.get('paths', {})
 939         assert isinstance(paths, dict)
 940         path = os.path.join(
 941             expand_path(paths.get('home', '').strip()),
 942             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
 943             filename or '')
 944
 945         # Temporary fix for #4787
 946         # 'Treat' all problem characters by passing filename through preferredencoding
 947         # to workaround encoding issues with subprocess on python2 @ Windows
 948         if sys.version_info < (3, 0) and sys.platform == 'win32':
 949             path = encodeFilename(path, True).decode(preferredencoding())
 950         return sanitize_path(path, force=self.params.get('windowsfilenames'))
 951
 952     @staticmethod
 953     def _outtmpl_expandpath(outtmpl):
 954         # expand_path translates '%%' into '%' and '$$' into '$'
 955         # correspondingly that is not what we want since we need to keep
 956         # '%%' intact for template dict substitution step. Working around
 957         # with boundary-alike separator hack.
 958         sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 959         outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 960
 961         # outtmpl should be expand_path'ed before template dict substitution
 962         # because meta fields may contain env variables we don't want to
 963         # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 964         # title "Hello $PATH", we don't want `$PATH` to be expanded.
 965         return expand_path(outtmpl).replace(sep, '')
 966
 967     @staticmethod
 968     def escape_outtmpl(outtmpl):
 969         ''' Escape any remaining strings like %s, %abc% etc. '''
 970         return re.sub(
 971             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
 972             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
 973             outtmpl)
 974
 975     @classmethod
 976     def validate_outtmpl(cls, outtmpl):
 977         ''' @return None or Exception object '''
 978         outtmpl = re.sub(
 979             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBU]'),
 980             lambda mobj: f'{mobj.group(0)[:-1]}s',
 981             cls._outtmpl_expandpath(outtmpl))
 982         try:
 983             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
 984             return None
 985         except ValueError as err:
 986             return err
 987
 988     @staticmethod
 989     def _copy_infodict(info_dict):
 990         info_dict = dict(info_dict)
 991         for key in ('__original_infodict', '__postprocessors'):
 992             info_dict.pop(key, None)
 993         return info_dict
 994
 995     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
 996         """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict """
 997         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
 998
 999         info_dict = self._copy_infodict(info_dict)
1000         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1001             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1002             if info_dict.get('duration', None) is not None
1003             else None)
1004         info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
1005         if info_dict.get('resolution') is None:
1006             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1007
1008         # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1009         # of %(field)s to %(field)0Nd for backward compatibility
1010         field_size_compat_map = {
1011             'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0),
1012             'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1013             'autonumber': self.params.get('autonumber_size') or 5,
1014         }
1015
1016         TMPL_DICT = {}
1017         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBU]'))
1018         MATH_FUNCTIONS = {
1019             '+': float.__add__,
1020             '-': float.__sub__,
1021         }
1022         # Field is of the form key1.key2...
1023         # where keys (except first) can be string, int or slice
1024         FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
1025         MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
1026         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1027         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
1028             (?P<negate>-)?
1029             (?P<fields>{field})
1030             (?P<maths>(?:{math_op}{math_field})*)
1031             (?:>(?P<strf_format>.+?))?
1032             (?P<alternate>(?<!\\),[^|)]+)?
1033             (?:\|(?P<default>.*?))?
1034             $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
1035
1036         def _traverse_infodict(k):
1037             k = k.split('.')
1038             if k[0] == '':
1039                 k.pop(0)
1040             return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
1041
1042         def get_value(mdict):
1043             # Object traversal
1044             value = _traverse_infodict(mdict['fields'])
1045             # Negative
1046             if mdict['negate']:
1047                 value = float_or_none(value)
1048                 if value is not None:
1049                     value *= -1
1050             # Do maths
1051             offset_key = mdict['maths']
1052             if offset_key:
1053                 value = float_or_none(value)
1054                 operator = None
1055                 while offset_key:
1056                     item = re.match(
1057                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1058                         offset_key).group(0)
1059                     offset_key = offset_key[len(item):]
1060                     if operator is None:
1061                         operator = MATH_FUNCTIONS[item]
1062                         continue
1063                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1064                     offset = float_or_none(item)
1065                     if offset is None:
1066                         offset = float_or_none(_traverse_infodict(item))
1067                     try:
1068                         value = operator(value, multiplier * offset)
1069                     except (TypeError, ZeroDivisionError):
1070                         return None
1071                     operator = None
1072             # Datetime formatting
1073             if mdict['strf_format']:
1074                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1075
1076             return value
1077
1078         na = self.params.get('outtmpl_na_placeholder', 'NA')
1079
1080         def _dumpjson_default(obj):
1081             if isinstance(obj, (set, LazyList)):
1082                 return list(obj)
1083             raise TypeError(f'Object of type {type(obj).__name__} is not JSON serializable')
1084
1085         def create_key(outer_mobj):
1086             if not outer_mobj.group('has_key'):
1087                 return outer_mobj.group(0)
1088             key = outer_mobj.group('key')
1089             mobj = re.match(INTERNAL_FORMAT_RE, key)
1090             initial_field = mobj.group('fields').split('.')[-1] if mobj else ''
1091             value, default = None, na
1092             while mobj:
1093                 mobj = mobj.groupdict()
1094                 default = mobj['default'] if mobj['default'] is not None else default
1095                 value = get_value(mobj)
1096                 if value is None and mobj['alternate']:
1097                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
1098                 else:
1099                     break
1100
1101             fmt = outer_mobj.group('format')
1102             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1103                 fmt = '0{:d}d'.format(field_size_compat_map[key])
1104
1105             value = default if value is None else value
1106
1107             str_fmt = f'{fmt[:-1]}s'
1108             if fmt[-1] == 'l':  # list
1109                 delim = '\n' if '#' in (outer_mobj.group('conversion') or '') else ', '
1110                 value, fmt = delim.join(variadic(value)), str_fmt
1111             elif fmt[-1] == 'j':  # json
1112                 value, fmt = json.dumps(value, default=_dumpjson_default), str_fmt
1113             elif fmt[-1] == 'q':  # quoted
1114                 value, fmt = compat_shlex_quote(str(value)), str_fmt
1115             elif fmt[-1] == 'B':  # bytes
1116                 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1117                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1118             elif fmt[-1] == 'U':  # unicode normalized
1119                 opts = outer_mobj.group('conversion') or ''
1120                 value, fmt = unicodedata.normalize(
1121                     # "+" = compatibility equivalence, "#" = NFD
1122                     'NF%s%s' % ('K' if '+' in opts else '', 'D' if '#' in opts else 'C'),
1123                     value), str_fmt
1124             elif fmt[-1] == 'c':
1125                 if value:
1126                     value = str(value)[0]
1127                 else:
1128                     fmt = str_fmt
1129             elif fmt[-1] not in 'rs':  # numeric
1130                 value = float_or_none(value)
1131                 if value is None:
1132                     value, fmt = default, 's'
1133
1134             if sanitize:
1135                 if fmt[-1] == 'r':
1136                     # If value is an object, sanitize might convert it to a string
1137                     # So we convert it to repr first
1138                     value, fmt = repr(value), str_fmt
1139                 if fmt[-1] in 'csr':
1140                     value = sanitize(initial_field, value)
1141
1142             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1143             TMPL_DICT[key] = value
1144             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1145
1146         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1147
1148     def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1149         outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1150         return self.escape_outtmpl(outtmpl) % info_dict
1151
1152     def _prepare_filename(self, info_dict, tmpl_type='default'):
1153         try:
1154             sanitize = lambda k, v: sanitize_filename(
1155                 compat_str(v),
1156                 restricted=self.params.get('restrictfilenames'),
1157                 is_id=(k == 'id' or k.endswith('_id')))
1158             outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
1159             filename = self.evaluate_outtmpl(outtmpl, info_dict, sanitize)
1160
1161             force_ext = OUTTMPL_TYPES.get(tmpl_type)
1162             if filename and force_ext is not None:
1163                 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1164
1165             # https://github.com/blackjack4494/youtube-dlc/issues/85
1166             trim_file_name = self.params.get('trim_file_name', False)
1167             if trim_file_name:
1168                 fn_groups = filename.rsplit('.')
1169                 ext = fn_groups[-1]
1170                 sub_ext = ''
1171                 if len(fn_groups) > 2:
1172                     sub_ext = fn_groups[-2]
1173                 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
1174
1175             return filename
1176         except ValueError as err:
1177             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1178             return None
1179
1180     def prepare_filename(self, info_dict, dir_type='', warn=False):
1181         """Generate the output filename."""
1182
1183         filename = self._prepare_filename(info_dict, dir_type or 'default')
1184         if not filename and dir_type not in ('', 'temp'):
1185             return ''
1186
1187         if warn:
1188             if not self.params.get('paths'):
1189                 pass
1190             elif filename == '-':
1191                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1192             elif os.path.isabs(filename):
1193                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1194         if filename == '-' or not filename:
1195             return filename
1196
1197         return self.get_output_path(dir_type, filename)
1198
1199     def _match_entry(self, info_dict, incomplete=False, silent=False):
1200         """ Returns None if the file should be downloaded """
1201
1202         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1203
1204         def check_filter():
1205             if 'title' in info_dict:
1206                 # This can happen when we're just evaluating the playlist
1207                 title = info_dict['title']
1208                 matchtitle = self.params.get('matchtitle', False)
1209                 if matchtitle:
1210                     if not re.search(matchtitle, title, re.IGNORECASE):
1211                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1212                 rejecttitle = self.params.get('rejecttitle', False)
1213                 if rejecttitle:
1214                     if re.search(rejecttitle, title, re.IGNORECASE):
1215                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1216             date = info_dict.get('upload_date')
1217             if date is not None:
1218                 dateRange = self.params.get('daterange', DateRange())
1219                 if date not in dateRange:
1220                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1221             view_count = info_dict.get('view_count')
1222             if view_count is not None:
1223                 min_views = self.params.get('min_views')
1224                 if min_views is not None and view_count < min_views:
1225                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1226                 max_views = self.params.get('max_views')
1227                 if max_views is not None and view_count > max_views:
1228                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1229             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1230                 return 'Skipping "%s" because it is age restricted' % video_title
1231
1232             match_filter = self.params.get('match_filter')
1233             if match_filter is not None:
1234                 try:
1235                     ret = match_filter(info_dict, incomplete=incomplete)
1236                 except TypeError:
1237                     # For backward compatibility
1238                     ret = None if incomplete else match_filter(info_dict)
1239                 if ret is not None:
1240                     return ret
1241             return None
1242
1243         if self.in_download_archive(info_dict):
1244             reason = '%s has already been recorded in the archive' % video_title
1245             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1246         else:
1247             reason = check_filter()
1248             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1249         if reason is not None:
1250             if not silent:
1251                 self.to_screen('[download] ' + reason)
1252             if self.params.get(break_opt, False):
1253                 raise break_err()
1254         return reason
1255
1256     @staticmethod
1257     def add_extra_info(info_dict, extra_info):
1258         '''Set the keys from extra_info in info dict if they are missing'''
1259         for key, value in extra_info.items():
1260             info_dict.setdefault(key, value)
1261
1262     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1263                      process=True, force_generic_extractor=False):
1264         """
1265         Return a list with a dictionary for each video extracted.
1266
1267         Arguments:
1268         url -- URL to extract
1269
1270         Keyword arguments:
1271         download -- whether to download videos during extraction
1272         ie_key -- extractor key hint
1273         extra_info -- dictionary containing the extra values to add to each result
1274         process -- whether to resolve all unresolved references (URLs, playlist items),
1275             must be True for download to work.
1276         force_generic_extractor -- force using the generic extractor
1277         """
1278
1279         if extra_info is None:
1280             extra_info = {}
1281
1282         if not ie_key and force_generic_extractor:
1283             ie_key = 'Generic'
1284
1285         if ie_key:
1286             ies = {ie_key: self._get_info_extractor_class(ie_key)}
1287         else:
1288             ies = self._ies
1289
1290         for ie_key, ie in ies.items():
1291             if not ie.suitable(url):
1292                 continue
1293
1294             if not ie.working():
1295                 self.report_warning('The program functionality for this site has been marked as broken, '
1296                                     'and will probably not work.')
1297
1298             temp_id = ie.get_temp_id(url)
1299             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1300                 self.to_screen("[%s] %s: has already been recorded in archive" % (
1301                                ie_key, temp_id))
1302                 break
1303             return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1304         else:
1305             self.report_error('no suitable InfoExtractor for URL %s' % url)
1306
1307     def __handle_extraction_exceptions(func):
1308         @functools.wraps(func)
1309         def wrapper(self, *args, **kwargs):
1310             try:
1311                 return func(self, *args, **kwargs)
1312             except GeoRestrictedError as e:
1313                 msg = e.msg
1314                 if e.countries:
1315                     msg += '\nThis video is available in %s.' % ', '.join(
1316                         map(ISO3166Utils.short2full, e.countries))
1317                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1318                 self.report_error(msg)
1319             except ExtractorError as e:  # An error we somewhat expected
1320                 self.report_error(compat_str(e), e.format_traceback())
1321             except ThrottledDownload:
1322                 self.to_stderr('\r')
1323                 self.report_warning('The download speed is below throttle limit. Re-extracting data')
1324                 return wrapper(self, *args, **kwargs)
1325             except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached, LazyList.IndexError):
1326                 raise
1327             except Exception as e:
1328                 if self.params.get('ignoreerrors'):
1329                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
1330                 else:
1331                     raise
1332         return wrapper
1333
1334     @__handle_extraction_exceptions
1335     def __extract_info(self, url, ie, download, extra_info, process):
1336         ie_result = ie.extract(url)
1337         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1338             return
1339         if isinstance(ie_result, list):
1340             # Backwards compatibility: old IE result format
1341             ie_result = {
1342                 '_type': 'compat_list',
1343                 'entries': ie_result,
1344             }
1345         if extra_info.get('original_url'):
1346             ie_result.setdefault('original_url', extra_info['original_url'])
1347         self.add_default_extra_info(ie_result, ie, url)
1348         if process:
1349             return self.process_ie_result(ie_result, download, extra_info)
1350         else:
1351             return ie_result
1352
1353     def add_default_extra_info(self, ie_result, ie, url):
1354         if url is not None:
1355             self.add_extra_info(ie_result, {
1356                 'webpage_url': url,
1357                 'original_url': url,
1358                 'webpage_url_basename': url_basename(url),
1359             })
1360         if ie is not None:
1361             self.add_extra_info(ie_result, {
1362                 'extractor': ie.IE_NAME,
1363                 'extractor_key': ie.ie_key(),
1364             })
1365
1366     def process_ie_result(self, ie_result, download=True, extra_info=None):
1367         """
1368         Take the result of the ie(may be modified) and resolve all unresolved
1369         references (URLs, playlist items).
1370
1371         It will also download the videos if 'download'.
1372         Returns the resolved ie_result.
1373         """
1374         if extra_info is None:
1375             extra_info = {}
1376         result_type = ie_result.get('_type', 'video')
1377
1378         if result_type in ('url', 'url_transparent'):
1379             ie_result['url'] = sanitize_url(ie_result['url'])
1380             if ie_result.get('original_url'):
1381                 extra_info.setdefault('original_url', ie_result['original_url'])
1382
1383             extract_flat = self.params.get('extract_flat', False)
1384             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1385                     or extract_flat is True):
1386                 info_copy = ie_result.copy()
1387                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1388                 if ie and not ie_result.get('id'):
1389                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1390                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1391                 self.add_extra_info(info_copy, extra_info)
1392                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1393                 if self.params.get('force_write_download_archive', False):
1394                     self.record_download_archive(info_copy)
1395                 return ie_result
1396
1397         if result_type == 'video':
1398             self.add_extra_info(ie_result, extra_info)
1399             ie_result = self.process_video_result(ie_result, download=download)
1400             additional_urls = (ie_result or {}).get('additional_urls')
1401             if additional_urls:
1402                 # TODO: Improve MetadataParserPP to allow setting a list
1403                 if isinstance(additional_urls, compat_str):
1404                     additional_urls = [additional_urls]
1405                 self.to_screen(
1406                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1407                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1408                 ie_result['additional_entries'] = [
1409                     self.extract_info(
1410                         url, download, extra_info,
1411                         force_generic_extractor=self.params.get('force_generic_extractor'))
1412                     for url in additional_urls
1413                 ]
1414             return ie_result
1415         elif result_type == 'url':
1416             # We have to add extra_info to the results because it may be
1417             # contained in a playlist
1418             return self.extract_info(
1419                 ie_result['url'], download,
1420                 ie_key=ie_result.get('ie_key'),
1421                 extra_info=extra_info)
1422         elif result_type == 'url_transparent':
1423             # Use the information from the embedding page
1424             info = self.extract_info(
1425                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1426                 extra_info=extra_info, download=False, process=False)
1427
1428             # extract_info may return None when ignoreerrors is enabled and
1429             # extraction failed with an error, don't crash and return early
1430             # in this case
1431             if not info:
1432                 return info
1433
1434             force_properties = dict(
1435                 (k, v) for k, v in ie_result.items() if v is not None)
1436             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1437                 if f in force_properties:
1438                     del force_properties[f]
1439             new_result = info.copy()
1440             new_result.update(force_properties)
1441
1442             # Extracted info may not be a video result (i.e.
1443             # info.get('_type', 'video') != video) but rather an url or
1444             # url_transparent. In such cases outer metadata (from ie_result)
1445             # should be propagated to inner one (info). For this to happen
1446             # _type of info should be overridden with url_transparent. This
1447             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1448             if new_result.get('_type') == 'url':
1449                 new_result['_type'] = 'url_transparent'
1450
1451             return self.process_ie_result(
1452                 new_result, download=download, extra_info=extra_info)
1453         elif result_type in ('playlist', 'multi_video'):
1454             # Protect from infinite recursion due to recursively nested playlists
1455             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1456             webpage_url = ie_result['webpage_url']
1457             if webpage_url in self._playlist_urls:
1458                 self.to_screen(
1459                     '[download] Skipping already downloaded playlist: %s'
1460                     % ie_result.get('title') or ie_result.get('id'))
1461                 return
1462
1463             self._playlist_level += 1
1464             self._playlist_urls.add(webpage_url)
1465             self._sanitize_thumbnails(ie_result)
1466             try:
1467                 return self.__process_playlist(ie_result, download)
1468             finally:
1469                 self._playlist_level -= 1
1470                 if not self._playlist_level:
1471                     self._playlist_urls.clear()
1472         elif result_type == 'compat_list':
1473             self.report_warning(
1474                 'Extractor %s returned a compat_list result. '
1475                 'It needs to be updated.' % ie_result.get('extractor'))
1476
1477             def _fixup(r):
1478                 self.add_extra_info(r, {
1479                     'extractor': ie_result['extractor'],
1480                     'webpage_url': ie_result['webpage_url'],
1481                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1482                     'extractor_key': ie_result['extractor_key'],
1483                 })
1484                 return r
1485             ie_result['entries'] = [
1486                 self.process_ie_result(_fixup(r), download, extra_info)
1487                 for r in ie_result['entries']
1488             ]
1489             return ie_result
1490         else:
1491             raise Exception('Invalid result type: %s' % result_type)
1492
1493     def _ensure_dir_exists(self, path):
1494         return make_dir(path, self.report_error)
1495
1496     def __process_playlist(self, ie_result, download):
1497         # We process each entry in the playlist
1498         playlist = ie_result.get('title') or ie_result.get('id')
1499         self.to_screen('[download] Downloading playlist: %s' % playlist)
1500
1501         if 'entries' not in ie_result:
1502             raise EntryNotInPlaylist()
1503         incomplete_entries = bool(ie_result.get('requested_entries'))
1504         if incomplete_entries:
1505             def fill_missing_entries(entries, indexes):
1506                 ret = [None] * max(*indexes)
1507                 for i, entry in zip(indexes, entries):
1508                     ret[i - 1] = entry
1509                 return ret
1510             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1511
1512         playlist_results = []
1513
1514         playliststart = self.params.get('playliststart', 1)
1515         playlistend = self.params.get('playlistend')
1516         # For backwards compatibility, interpret -1 as whole list
1517         if playlistend == -1:
1518             playlistend = None
1519
1520         playlistitems_str = self.params.get('playlist_items')
1521         playlistitems = None
1522         if playlistitems_str is not None:
1523             def iter_playlistitems(format):
1524                 for string_segment in format.split(','):
1525                     if '-' in string_segment:
1526                         start, end = string_segment.split('-')
1527                         for item in range(int(start), int(end) + 1):
1528                             yield int(item)
1529                     else:
1530                         yield int(string_segment)
1531             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1532
1533         ie_entries = ie_result['entries']
1534         msg = (
1535             'Downloading %d videos' if not isinstance(ie_entries, list)
1536             else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
1537
1538         if isinstance(ie_entries, list):
1539             def get_entry(i):
1540                 return ie_entries[i - 1]
1541         else:
1542             if not isinstance(ie_entries, PagedList):
1543                 ie_entries = LazyList(ie_entries)
1544
1545             def get_entry(i):
1546                 return YoutubeDL.__handle_extraction_exceptions(
1547                     lambda self, i: ie_entries[i - 1]
1548                 )(self, i)
1549
1550         entries = []
1551         items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1552         for i in items:
1553             if i == 0:
1554                 continue
1555             if playlistitems is None and playlistend is not None and playlistend < i:
1556                 break
1557             entry = None
1558             try:
1559                 entry = get_entry(i)
1560                 if entry is None:
1561                     raise EntryNotInPlaylist()
1562             except (IndexError, EntryNotInPlaylist):
1563                 if incomplete_entries:
1564                     raise EntryNotInPlaylist()
1565                 elif not playlistitems:
1566                     break
1567             entries.append(entry)
1568             try:
1569                 if entry is not None:
1570                     self._match_entry(entry, incomplete=True, silent=True)
1571             except (ExistingVideoReached, RejectedVideoReached):
1572                 break
1573         ie_result['entries'] = entries
1574
1575         # Save playlist_index before re-ordering
1576         entries = [
1577             ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
1578             for i, entry in enumerate(entries, 1)
1579             if entry is not None]
1580         n_entries = len(entries)
1581
1582         if not playlistitems and (playliststart or playlistend):
1583             playlistitems = list(range(playliststart, playliststart + n_entries))
1584         ie_result['requested_entries'] = playlistitems
1585
1586         if not self.params.get('simulate') and self.params.get('allow_playlist_files', True):
1587             ie_copy = {
1588                 'playlist': playlist,
1589                 'playlist_id': ie_result.get('id'),
1590                 'playlist_title': ie_result.get('title'),
1591                 'playlist_uploader': ie_result.get('uploader'),
1592                 'playlist_uploader_id': ie_result.get('uploader_id'),
1593                 'playlist_index': 0,
1594                 'n_entries': n_entries,
1595             }
1596             ie_copy.update(dict(ie_result))
1597
1598             if self._write_info_json('playlist', ie_result,
1599                                      self.prepare_filename(ie_copy, 'pl_infojson')) is None:
1600                 return
1601             if self._write_description('playlist', ie_result,
1602                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1603                 return
1604             # TODO: This should be passed to ThumbnailsConvertor if necessary
1605             self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1606
1607         if self.params.get('playlistreverse', False):
1608             entries = entries[::-1]
1609         if self.params.get('playlistrandom', False):
1610             random.shuffle(entries)
1611
1612         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1613
1614         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1615         failures = 0
1616         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1617         for i, entry_tuple in enumerate(entries, 1):
1618             playlist_index, entry = entry_tuple
1619             if 'playlist-index' in self.params.get('compat_opts', []):
1620                 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
1621             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1622             # This __x_forwarded_for_ip thing is a bit ugly but requires
1623             # minimal changes
1624             if x_forwarded_for:
1625                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1626             extra = {
1627                 'n_entries': n_entries,
1628                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1629                 'playlist_index': playlist_index,
1630                 'playlist_autonumber': i,
1631                 'playlist': playlist,
1632                 'playlist_id': ie_result.get('id'),
1633                 'playlist_title': ie_result.get('title'),
1634                 'playlist_uploader': ie_result.get('uploader'),
1635                 'playlist_uploader_id': ie_result.get('uploader_id'),
1636                 'extractor': ie_result['extractor'],
1637                 'webpage_url': ie_result['webpage_url'],
1638                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1639                 'extractor_key': ie_result['extractor_key'],
1640             }
1641
1642             if self._match_entry(entry, incomplete=True) is not None:
1643                 continue
1644
1645             entry_result = self.__process_iterable_entry(entry, download, extra)
1646             if not entry_result:
1647                 failures += 1
1648             if failures >= max_failures:
1649                 self.report_error(
1650                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1651                 break
1652             # TODO: skip failed (empty) entries?
1653             playlist_results.append(entry_result)
1654         ie_result['entries'] = playlist_results
1655         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1656         return ie_result
1657
1658     @__handle_extraction_exceptions
1659     def __process_iterable_entry(self, entry, download, extra_info):
1660         return self.process_ie_result(
1661             entry, download=download, extra_info=extra_info)
1662
1663     def _build_format_filter(self, filter_spec):
1664         " Returns a function to filter the formats according to the filter_spec "
1665
1666         OPERATORS = {
1667             '<': operator.lt,
1668             '<=': operator.le,
1669             '>': operator.gt,
1670             '>=': operator.ge,
1671             '=': operator.eq,
1672             '!=': operator.ne,
1673         }
1674         operator_rex = re.compile(r'''(?x)\s*
1675             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1676             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1677             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1678             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1679         m = operator_rex.fullmatch(filter_spec)
1680         if m:
1681             try:
1682                 comparison_value = int(m.group('value'))
1683             except ValueError:
1684                 comparison_value = parse_filesize(m.group('value'))
1685                 if comparison_value is None:
1686                     comparison_value = parse_filesize(m.group('value') + 'B')
1687                 if comparison_value is None:
1688                     raise ValueError(
1689                         'Invalid value %r in format specification %r' % (
1690                             m.group('value'), filter_spec))
1691             op = OPERATORS[m.group('op')]
1692
1693         if not m:
1694             STR_OPERATORS = {
1695                 '=': operator.eq,
1696                 '^=': lambda attr, value: attr.startswith(value),
1697                 '$=': lambda attr, value: attr.endswith(value),
1698                 '*=': lambda attr, value: value in attr,
1699             }
1700             str_operator_rex = re.compile(r'''(?x)\s*
1701                 (?P<key>[a-zA-Z0-9._-]+)\s*
1702                 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1703                 (?P<value>[a-zA-Z0-9._-]+)\s*
1704                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1705             m = str_operator_rex.fullmatch(filter_spec)
1706             if m:
1707                 comparison_value = m.group('value')
1708                 str_op = STR_OPERATORS[m.group('op')]
1709                 if m.group('negation'):
1710                     op = lambda attr, value: not str_op(attr, value)
1711                 else:
1712                     op = str_op
1713
1714         if not m:
1715             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1716
1717         def _filter(f):
1718             actual_value = f.get(m.group('key'))
1719             if actual_value is None:
1720                 return m.group('none_inclusive')
1721             return op(actual_value, comparison_value)
1722         return _filter
1723
1724     def _check_formats(self, formats):
1725         for f in formats:
1726             self.to_screen('[info] Testing format %s' % f['format_id'])
1727             temp_file = tempfile.NamedTemporaryFile(
1728                 suffix='.tmp', delete=False,
1729                 dir=self.get_output_path('temp') or None)
1730             temp_file.close()
1731             try:
1732                 success, _ = self.dl(temp_file.name, f, test=True)
1733             except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1734                 success = False
1735             finally:
1736                 if os.path.exists(temp_file.name):
1737                     try:
1738                         os.remove(temp_file.name)
1739                     except OSError:
1740                         self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1741             if success:
1742                 yield f
1743             else:
1744                 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1745
1746     def _default_format_spec(self, info_dict, download=True):
1747
1748         def can_merge():
1749             merger = FFmpegMergerPP(self)
1750             return merger.available and merger.can_merge()
1751
1752         prefer_best = (
1753             not self.params.get('simulate')
1754             and download
1755             and (
1756                 not can_merge()
1757                 or info_dict.get('is_live', False)
1758                 or self.outtmpl_dict['default'] == '-'))
1759         compat = (
1760             prefer_best
1761             or self.params.get('allow_multiple_audio_streams', False)
1762             or 'format-spec' in self.params.get('compat_opts', []))
1763
1764         return (
1765             'best/bestvideo+bestaudio' if prefer_best
1766             else 'bestvideo*+bestaudio/best' if not compat
1767             else 'bestvideo+bestaudio/best')
1768
1769     def build_format_selector(self, format_spec):
1770         def syntax_error(note, start):
1771             message = (
1772                 'Invalid format specification: '
1773                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1774             return SyntaxError(message)
1775
1776         PICKFIRST = 'PICKFIRST'
1777         MERGE = 'MERGE'
1778         SINGLE = 'SINGLE'
1779         GROUP = 'GROUP'
1780         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1781
1782         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1783                                   'video': self.params.get('allow_multiple_video_streams', False)}
1784
1785         check_formats = self.params.get('check_formats') == 'selected'
1786
1787         def _parse_filter(tokens):
1788             filter_parts = []
1789             for type, string, start, _, _ in tokens:
1790                 if type == tokenize.OP and string == ']':
1791                     return ''.join(filter_parts)
1792                 else:
1793                     filter_parts.append(string)
1794
1795         def _remove_unused_ops(tokens):
1796             # Remove operators that we don't use and join them with the surrounding strings
1797             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1798             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1799             last_string, last_start, last_end, last_line = None, None, None, None
1800             for type, string, start, end, line in tokens:
1801                 if type == tokenize.OP and string == '[':
1802                     if last_string:
1803                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1804                         last_string = None
1805                     yield type, string, start, end, line
1806                     # everything inside brackets will be handled by _parse_filter
1807                     for type, string, start, end, line in tokens:
1808                         yield type, string, start, end, line
1809                         if type == tokenize.OP and string == ']':
1810                             break
1811                 elif type == tokenize.OP and string in ALLOWED_OPS:
1812                     if last_string:
1813                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1814                         last_string = None
1815                     yield type, string, start, end, line
1816                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1817                     if not last_string:
1818                         last_string = string
1819                         last_start = start
1820                         last_end = end
1821                     else:
1822                         last_string += string
1823             if last_string:
1824                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1825
1826         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1827             selectors = []
1828             current_selector = None
1829             for type, string, start, _, _ in tokens:
1830                 # ENCODING is only defined in python 3.x
1831                 if type == getattr(tokenize, 'ENCODING', None):
1832                     continue
1833                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1834                     current_selector = FormatSelector(SINGLE, string, [])
1835                 elif type == tokenize.OP:
1836                     if string == ')':
1837                         if not inside_group:
1838                             # ')' will be handled by the parentheses group
1839                             tokens.restore_last_token()
1840                         break
1841                     elif inside_merge and string in ['/', ',']:
1842                         tokens.restore_last_token()
1843                         break
1844                     elif inside_choice and string == ',':
1845                         tokens.restore_last_token()
1846                         break
1847                     elif string == ',':
1848                         if not current_selector:
1849                             raise syntax_error('"," must follow a format selector', start)
1850                         selectors.append(current_selector)
1851                         current_selector = None
1852                     elif string == '/':
1853                         if not current_selector:
1854                             raise syntax_error('"/" must follow a format selector', start)
1855                         first_choice = current_selector
1856                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1857                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1858                     elif string == '[':
1859                         if not current_selector:
1860                             current_selector = FormatSelector(SINGLE, 'best', [])
1861                         format_filter = _parse_filter(tokens)
1862                         current_selector.filters.append(format_filter)
1863                     elif string == '(':
1864                         if current_selector:
1865                             raise syntax_error('Unexpected "("', start)
1866                         group = _parse_format_selection(tokens, inside_group=True)
1867                         current_selector = FormatSelector(GROUP, group, [])
1868                     elif string == '+':
1869                         if not current_selector:
1870                             raise syntax_error('Unexpected "+"', start)
1871                         selector_1 = current_selector
1872                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1873                         if not selector_2:
1874                             raise syntax_error('Expected a selector', start)
1875                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1876                     else:
1877                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1878                 elif type == tokenize.ENDMARKER:
1879                     break
1880             if current_selector:
1881                 selectors.append(current_selector)
1882             return selectors
1883
1884         def _merge(formats_pair):
1885             format_1, format_2 = formats_pair
1886
1887             formats_info = []
1888             formats_info.extend(format_1.get('requested_formats', (format_1,)))
1889             formats_info.extend(format_2.get('requested_formats', (format_2,)))
1890
1891             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1892                 get_no_more = {'video': False, 'audio': False}
1893                 for (i, fmt_info) in enumerate(formats_info):
1894                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
1895                         formats_info.pop(i)
1896                         continue
1897                     for aud_vid in ['audio', 'video']:
1898                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1899                             if get_no_more[aud_vid]:
1900                                 formats_info.pop(i)
1901                                 break
1902                             get_no_more[aud_vid] = True
1903
1904             if len(formats_info) == 1:
1905                 return formats_info[0]
1906
1907             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1908             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1909
1910             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1911             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1912
1913             output_ext = self.params.get('merge_output_format')
1914             if not output_ext:
1915                 if the_only_video:
1916                     output_ext = the_only_video['ext']
1917                 elif the_only_audio and not video_fmts:
1918                     output_ext = the_only_audio['ext']
1919                 else:
1920                     output_ext = 'mkv'
1921
1922             filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
1923
1924             new_dict = {
1925                 'requested_formats': formats_info,
1926                 'format': '+'.join(filtered('format')),
1927                 'format_id': '+'.join(filtered('format_id')),
1928                 'ext': output_ext,
1929                 'protocol': '+'.join(map(determine_protocol, formats_info)),
1930                 'language': '+'.join(orderedSet(filtered('language'))),
1931                 'format_note': '+'.join(orderedSet(filtered('format_note'))),
1932                 'filesize_approx': sum(filtered('filesize', 'filesize_approx')),
1933                 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
1934             }
1935
1936             if the_only_video:
1937                 new_dict.update({
1938                     'width': the_only_video.get('width'),
1939                     'height': the_only_video.get('height'),
1940                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1941                     'fps': the_only_video.get('fps'),
1942                     'dynamic_range': the_only_video.get('dynamic_range'),
1943                     'vcodec': the_only_video.get('vcodec'),
1944                     'vbr': the_only_video.get('vbr'),
1945                     'stretched_ratio': the_only_video.get('stretched_ratio'),
1946                 })
1947
1948             if the_only_audio:
1949                 new_dict.update({
1950                     'acodec': the_only_audio.get('acodec'),
1951                     'abr': the_only_audio.get('abr'),
1952                     'asr': the_only_audio.get('asr'),
1953                 })
1954
1955             return new_dict
1956
1957         def _check_formats(formats):
1958             if not check_formats:
1959                 yield from formats
1960                 return
1961             yield from self._check_formats(formats)
1962
1963         def _build_selector_function(selector):
1964             if isinstance(selector, list):  # ,
1965                 fs = [_build_selector_function(s) for s in selector]
1966
1967                 def selector_function(ctx):
1968                     for f in fs:
1969                         yield from f(ctx)
1970                 return selector_function
1971
1972             elif selector.type == GROUP:  # ()
1973                 selector_function = _build_selector_function(selector.selector)
1974
1975             elif selector.type == PICKFIRST:  # /
1976                 fs = [_build_selector_function(s) for s in selector.selector]
1977
1978                 def selector_function(ctx):
1979                     for f in fs:
1980                         picked_formats = list(f(ctx))
1981                         if picked_formats:
1982                             return picked_formats
1983                     return []
1984
1985             elif selector.type == MERGE:  # +
1986                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1987
1988                 def selector_function(ctx):
1989                     for pair in itertools.product(
1990                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1991                         yield _merge(pair)
1992
1993             elif selector.type == SINGLE:  # atom
1994                 format_spec = selector.selector or 'best'
1995
1996                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
1997                 if format_spec == 'all':
1998                     def selector_function(ctx):
1999                         yield from _check_formats(ctx['formats'])
2000                 elif format_spec == 'mergeall':
2001                     def selector_function(ctx):
2002                         formats = list(_check_formats(ctx['formats']))
2003                         if not formats:
2004                             return
2005                         merged_format = formats[-1]
2006                         for f in formats[-2::-1]:
2007                             merged_format = _merge((merged_format, f))
2008                         yield merged_format
2009
2010                 else:
2011                     format_fallback, format_reverse, format_idx = False, True, 1
2012                     mobj = re.match(
2013                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2014                         format_spec)
2015                     if mobj is not None:
2016                         format_idx = int_or_none(mobj.group('n'), default=1)
2017                         format_reverse = mobj.group('bw')[0] == 'b'
2018                         format_type = (mobj.group('type') or [None])[0]
2019                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2020                         format_modified = mobj.group('mod') is not None
2021
2022                         format_fallback = not format_type and not format_modified  # for b, w
2023                         _filter_f = (
2024                             (lambda f: f.get('%scodec' % format_type) != 'none')
2025                             if format_type and format_modified  # bv*, ba*, wv*, wa*
2026                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2027                             if format_type  # bv, ba, wv, wa
2028                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2029                             if not format_modified  # b, w
2030                             else lambda f: True)  # b*, w*
2031                         filter_f = lambda f: _filter_f(f) and (
2032                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2033                     else:
2034                         if format_spec in self._format_selection_exts['audio']:
2035                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2036                         elif format_spec in self._format_selection_exts['video']:
2037                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2038                         elif format_spec in self._format_selection_exts['storyboards']:
2039                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2040                         else:
2041                             filter_f = lambda f: f.get('format_id') == format_spec  # id
2042
2043                     def selector_function(ctx):
2044                         formats = list(ctx['formats'])
2045                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2046                         if format_fallback and ctx['incomplete_formats'] and not matches:
2047                             # for extractors with incomplete formats (audio only (soundcloud)
2048                             # or video only (imgur)) best/worst will fallback to
2049                             # best/worst {video,audio}-only format
2050                             matches = formats
2051                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2052                         try:
2053                             yield matches[format_idx - 1]
2054                         except IndexError:
2055                             return
2056
2057             filters = [self._build_format_filter(f) for f in selector.filters]
2058
2059             def final_selector(ctx):
2060                 ctx_copy = copy.deepcopy(ctx)
2061                 for _filter in filters:
2062                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2063                 return selector_function(ctx_copy)
2064             return final_selector
2065
2066         stream = io.BytesIO(format_spec.encode('utf-8'))
2067         try:
2068             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
2069         except tokenize.TokenError:
2070             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2071
2072         class TokenIterator(object):
2073             def __init__(self, tokens):
2074                 self.tokens = tokens
2075                 self.counter = 0
2076
2077             def __iter__(self):
2078                 return self
2079
2080             def __next__(self):
2081                 if self.counter >= len(self.tokens):
2082                     raise StopIteration()
2083                 value = self.tokens[self.counter]
2084                 self.counter += 1
2085                 return value
2086
2087             next = __next__
2088
2089             def restore_last_token(self):
2090                 self.counter -= 1
2091
2092         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2093         return _build_selector_function(parsed_selector)
2094
2095     def _calc_headers(self, info_dict):
2096         res = std_headers.copy()
2097
2098         add_headers = info_dict.get('http_headers')
2099         if add_headers:
2100             res.update(add_headers)
2101
2102         cookies = self._calc_cookies(info_dict)
2103         if cookies:
2104             res['Cookie'] = cookies
2105
2106         if 'X-Forwarded-For' not in res:
2107             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2108             if x_forwarded_for_ip:
2109                 res['X-Forwarded-For'] = x_forwarded_for_ip
2110
2111         return res
2112
2113     def _calc_cookies(self, info_dict):
2114         pr = sanitized_Request(info_dict['url'])
2115         self.cookiejar.add_cookie_header(pr)
2116         return pr.get_header('Cookie')
2117
2118     def _sort_thumbnails(self, thumbnails):
2119         thumbnails.sort(key=lambda t: (
2120             t.get('preference') if t.get('preference') is not None else -1,
2121             t.get('width') if t.get('width') is not None else -1,
2122             t.get('height') if t.get('height') is not None else -1,
2123             t.get('id') if t.get('id') is not None else '',
2124             t.get('url')))
2125
2126     def _sanitize_thumbnails(self, info_dict):
2127         thumbnails = info_dict.get('thumbnails')
2128         if thumbnails is None:
2129             thumbnail = info_dict.get('thumbnail')
2130             if thumbnail:
2131                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2132         if not thumbnails:
2133             return
2134
2135         def check_thumbnails(thumbnails):
2136             for t in thumbnails:
2137                 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2138                 try:
2139                     self.urlopen(HEADRequest(t['url']))
2140                 except network_exceptions as err:
2141                     self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2142                     continue
2143                 yield t
2144
2145         self._sort_thumbnails(thumbnails)
2146         for i, t in enumerate(thumbnails):
2147             if t.get('id') is None:
2148                 t['id'] = '%d' % i
2149             if t.get('width') and t.get('height'):
2150                 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2151             t['url'] = sanitize_url(t['url'])
2152
2153         if self.params.get('check_formats') is True:
2154             info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1])).reverse()
2155         else:
2156             info_dict['thumbnails'] = thumbnails
2157
2158     def process_video_result(self, info_dict, download=True):
2159         assert info_dict.get('_type', 'video') == 'video'
2160
2161         if 'id' not in info_dict:
2162             raise ExtractorError('Missing "id" field in extractor result')
2163         if 'title' not in info_dict:
2164             raise ExtractorError('Missing "title" field in extractor result',
2165                                  video_id=info_dict['id'], ie=info_dict['extractor'])
2166
2167         def report_force_conversion(field, field_not, conversion):
2168             self.report_warning(
2169                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2170                 % (field, field_not, conversion))
2171
2172         def sanitize_string_field(info, string_field):
2173             field = info.get(string_field)
2174             if field is None or isinstance(field, compat_str):
2175                 return
2176             report_force_conversion(string_field, 'a string', 'string')
2177             info[string_field] = compat_str(field)
2178
2179         def sanitize_numeric_fields(info):
2180             for numeric_field in self._NUMERIC_FIELDS:
2181                 field = info.get(numeric_field)
2182                 if field is None or isinstance(field, compat_numeric_types):
2183                     continue
2184                 report_force_conversion(numeric_field, 'numeric', 'int')
2185                 info[numeric_field] = int_or_none(field)
2186
2187         sanitize_string_field(info_dict, 'id')
2188         sanitize_numeric_fields(info_dict)
2189
2190         if 'playlist' not in info_dict:
2191             # It isn't part of a playlist
2192             info_dict['playlist'] = None
2193             info_dict['playlist_index'] = None
2194
2195         self._sanitize_thumbnails(info_dict)
2196
2197         thumbnail = info_dict.get('thumbnail')
2198         thumbnails = info_dict.get('thumbnails')
2199         if thumbnail:
2200             info_dict['thumbnail'] = sanitize_url(thumbnail)
2201         elif thumbnails:
2202             info_dict['thumbnail'] = thumbnails[-1]['url']
2203
2204         if info_dict.get('display_id') is None and 'id' in info_dict:
2205             info_dict['display_id'] = info_dict['id']
2206
2207         if info_dict.get('duration') is not None:
2208             info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2209
2210         for ts_key, date_key in (
2211                 ('timestamp', 'upload_date'),
2212                 ('release_timestamp', 'release_date'),
2213         ):
2214             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2215                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2216                 # see http://bugs.python.org/issue1646728)
2217                 try:
2218                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2219                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2220                 except (ValueError, OverflowError, OSError):
2221                     pass
2222
2223         live_keys = ('is_live', 'was_live')
2224         live_status = info_dict.get('live_status')
2225         if live_status is None:
2226             for key in live_keys:
2227                 if info_dict.get(key) is False:
2228                     continue
2229                 if info_dict.get(key):
2230                     live_status = key
2231                 break
2232             if all(info_dict.get(key) is False for key in live_keys):
2233                 live_status = 'not_live'
2234         if live_status:
2235             info_dict['live_status'] = live_status
2236             for key in live_keys:
2237                 if info_dict.get(key) is None:
2238                     info_dict[key] = (live_status == key)
2239
2240         # Auto generate title fields corresponding to the *_number fields when missing
2241         # in order to always have clean titles. This is very common for TV series.
2242         for field in ('chapter', 'season', 'episode'):
2243             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2244                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2245
2246         for cc_kind in ('subtitles', 'automatic_captions'):
2247             cc = info_dict.get(cc_kind)
2248             if cc:
2249                 for _, subtitle in cc.items():
2250                     for subtitle_format in subtitle:
2251                         if subtitle_format.get('url'):
2252                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2253                         if subtitle_format.get('ext') is None:
2254                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2255
2256         automatic_captions = info_dict.get('automatic_captions')
2257         subtitles = info_dict.get('subtitles')
2258
2259         info_dict['requested_subtitles'] = self.process_subtitles(
2260             info_dict['id'], subtitles, automatic_captions)
2261
2262         if info_dict.get('formats') is None:
2263             # There's only one format available
2264             formats = [info_dict]
2265         else:
2266             formats = info_dict['formats']
2267
2268         info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
2269         if not self.params.get('allow_unplayable_formats'):
2270             formats = [f for f in formats if not f.get('has_drm')]
2271
2272         if not formats:
2273             self.raise_no_formats(info_dict)
2274
2275         def is_wellformed(f):
2276             url = f.get('url')
2277             if not url:
2278                 self.report_warning(
2279                     '"url" field is missing or empty - skipping format, '
2280                     'there is an error in extractor')
2281                 return False
2282             if isinstance(url, bytes):
2283                 sanitize_string_field(f, 'url')
2284             return True
2285
2286         # Filter out malformed formats for better extraction robustness
2287         formats = list(filter(is_wellformed, formats))
2288
2289         formats_dict = {}
2290
2291         # We check that all the formats have the format and format_id fields
2292         for i, format in enumerate(formats):
2293             sanitize_string_field(format, 'format_id')
2294             sanitize_numeric_fields(format)
2295             format['url'] = sanitize_url(format['url'])
2296             if not format.get('format_id'):
2297                 format['format_id'] = compat_str(i)
2298             else:
2299                 # Sanitize format_id from characters used in format selector expression
2300                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2301             format_id = format['format_id']
2302             if format_id not in formats_dict:
2303                 formats_dict[format_id] = []
2304             formats_dict[format_id].append(format)
2305
2306         # Make sure all formats have unique format_id
2307         common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2308         for format_id, ambiguous_formats in formats_dict.items():
2309             ambigious_id = len(ambiguous_formats) > 1
2310             for i, format in enumerate(ambiguous_formats):
2311                 if ambigious_id:
2312                     format['format_id'] = '%s-%d' % (format_id, i)
2313                 if format.get('ext') is None:
2314                     format['ext'] = determine_ext(format['url']).lower()
2315                 # Ensure there is no conflict between id and ext in format selection
2316                 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2317                 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2318                     format['format_id'] = 'f%s' % format['format_id']
2319
2320         for i, format in enumerate(formats):
2321             if format.get('format') is None:
2322                 format['format'] = '{id} - {res}{note}'.format(
2323                     id=format['format_id'],
2324                     res=self.format_resolution(format),
2325                     note=format_field(format, 'format_note', ' (%s)'),
2326                 )
2327             if format.get('protocol') is None:
2328                 format['protocol'] = determine_protocol(format)
2329             if format.get('resolution') is None:
2330                 format['resolution'] = self.format_resolution(format, default=None)
2331             if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2332                 format['dynamic_range'] = 'SDR'
2333             if (info_dict.get('duration') and format.get('tbr')
2334                     and not format.get('filesize') and not format.get('filesize_approx')):
2335                 format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
2336
2337             # Add HTTP headers, so that external programs can use them from the
2338             # json output
2339             full_format_info = info_dict.copy()
2340             full_format_info.update(format)
2341             format['http_headers'] = self._calc_headers(full_format_info)
2342         # Remove private housekeeping stuff
2343         if '__x_forwarded_for_ip' in info_dict:
2344             del info_dict['__x_forwarded_for_ip']
2345
2346         # TODO Central sorting goes here
2347
2348         if self.params.get('check_formats') is True:
2349             formats = LazyList(self._check_formats(formats[::-1])).reverse()
2350
2351         if not formats or formats[0] is not info_dict:
2352             # only set the 'formats' fields if the original info_dict list them
2353             # otherwise we end up with a circular reference, the first (and unique)
2354             # element in the 'formats' field in info_dict is info_dict itself,
2355             # which can't be exported to json
2356             info_dict['formats'] = formats
2357
2358         info_dict, _ = self.pre_process(info_dict)
2359
2360         if self.params.get('list_thumbnails'):
2361             self.list_thumbnails(info_dict)
2362         if self.params.get('listformats'):
2363             if not info_dict.get('formats') and not info_dict.get('url'):
2364                 self.to_screen('%s has no formats' % info_dict['id'])
2365             else:
2366                 self.list_formats(info_dict)
2367         if self.params.get('listsubtitles'):
2368             if 'automatic_captions' in info_dict:
2369                 self.list_subtitles(
2370                     info_dict['id'], automatic_captions, 'automatic captions')
2371             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2372         list_only = self.params.get('simulate') is None and (
2373             self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2374         if list_only:
2375             # Without this printing, -F --print-json will not work
2376             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2377             return
2378
2379         format_selector = self.format_selector
2380         if format_selector is None:
2381             req_format = self._default_format_spec(info_dict, download=download)
2382             self.write_debug('Default format spec: %s' % req_format)
2383             format_selector = self.build_format_selector(req_format)
2384
2385         # While in format selection we may need to have an access to the original
2386         # format set in order to calculate some metrics or do some processing.
2387         # For now we need to be able to guess whether original formats provided
2388         # by extractor are incomplete or not (i.e. whether extractor provides only
2389         # video-only or audio-only formats) for proper formats selection for
2390         # extractors with such incomplete formats (see
2391         # https://github.com/ytdl-org/youtube-dl/pull/5556).
2392         # Since formats may be filtered during format selection and may not match
2393         # the original formats the results may be incorrect. Thus original formats
2394         # or pre-calculated metrics should be passed to format selection routines
2395         # as well.
2396         # We will pass a context object containing all necessary additional data
2397         # instead of just formats.
2398         # This fixes incorrect format selection issue (see
2399         # https://github.com/ytdl-org/youtube-dl/issues/10083).
2400         incomplete_formats = (
2401             # All formats are video-only or
2402             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2403             # all formats are audio-only
2404             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2405
2406         ctx = {
2407             'formats': formats,
2408             'incomplete_formats': incomplete_formats,
2409         }
2410
2411         formats_to_download = list(format_selector(ctx))
2412         if not formats_to_download:
2413             if not self.params.get('ignore_no_formats_error'):
2414                 raise ExtractorError('Requested format is not available', expected=True,
2415                                      video_id=info_dict['id'], ie=info_dict['extractor'])
2416             else:
2417                 self.report_warning('Requested format is not available')
2418                 # Process what we can, even without any available formats.
2419                 self.process_info(dict(info_dict))
2420         elif download:
2421             self.to_screen(
2422                 '[info] %s: Downloading %d format(s): %s' % (
2423                     info_dict['id'], len(formats_to_download),
2424                     ", ".join([f['format_id'] for f in formats_to_download])))
2425             for fmt in formats_to_download:
2426                 new_info = dict(info_dict)
2427                 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2428                 new_info['__original_infodict'] = info_dict
2429                 new_info.update(fmt)
2430                 self.process_info(new_info)
2431         # We update the info dict with the selected best quality format (backwards compatibility)
2432         if formats_to_download:
2433             info_dict.update(formats_to_download[-1])
2434         return info_dict
2435
2436     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2437         """Select the requested subtitles and their format"""
2438         available_subs = {}
2439         if normal_subtitles and self.params.get('writesubtitles'):
2440             available_subs.update(normal_subtitles)
2441         if automatic_captions and self.params.get('writeautomaticsub'):
2442             for lang, cap_info in automatic_captions.items():
2443                 if lang not in available_subs:
2444                     available_subs[lang] = cap_info
2445
2446         if (not self.params.get('writesubtitles') and not
2447                 self.params.get('writeautomaticsub') or not
2448                 available_subs):
2449             return None
2450
2451         all_sub_langs = available_subs.keys()
2452         if self.params.get('allsubtitles', False):
2453             requested_langs = all_sub_langs
2454         elif self.params.get('subtitleslangs', False):
2455             # A list is used so that the order of languages will be the same as
2456             # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2457             requested_langs = []
2458             for lang_re in self.params.get('subtitleslangs'):
2459                 if lang_re == 'all':
2460                     requested_langs.extend(all_sub_langs)
2461                     continue
2462                 discard = lang_re[0] == '-'
2463                 if discard:
2464                     lang_re = lang_re[1:]
2465                 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2466                 if discard:
2467                     for lang in current_langs:
2468                         while lang in requested_langs:
2469                             requested_langs.remove(lang)
2470                 else:
2471                     requested_langs.extend(current_langs)
2472             requested_langs = orderedSet(requested_langs)
2473         elif 'en' in available_subs:
2474             requested_langs = ['en']
2475         else:
2476             requested_langs = [list(all_sub_langs)[0]]
2477         if requested_langs:
2478             self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2479
2480         formats_query = self.params.get('subtitlesformat', 'best')
2481         formats_preference = formats_query.split('/') if formats_query else []
2482         subs = {}
2483         for lang in requested_langs:
2484             formats = available_subs.get(lang)
2485             if formats is None:
2486                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2487                 continue
2488             for ext in formats_preference:
2489                 if ext == 'best':
2490                     f = formats[-1]
2491                     break
2492                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2493                 if matches:
2494                     f = matches[-1]
2495                     break
2496             else:
2497                 f = formats[-1]
2498                 self.report_warning(
2499                     'No subtitle format found matching "%s" for language %s, '
2500                     'using %s' % (formats_query, lang, f['ext']))
2501             subs[lang] = f
2502         return subs
2503
2504     def __forced_printings(self, info_dict, filename, incomplete):
2505         def print_mandatory(field, actual_field=None):
2506             if actual_field is None:
2507                 actual_field = field
2508             if (self.params.get('force%s' % field, False)
2509                     and (not incomplete or info_dict.get(actual_field) is not None)):
2510                 self.to_stdout(info_dict[actual_field])
2511
2512         def print_optional(field):
2513             if (self.params.get('force%s' % field, False)
2514                     and info_dict.get(field) is not None):
2515                 self.to_stdout(info_dict[field])
2516
2517         info_dict = info_dict.copy()
2518         if filename is not None:
2519             info_dict['filename'] = filename
2520         if info_dict.get('requested_formats') is not None:
2521             # For RTMP URLs, also include the playpath
2522             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2523         elif 'url' in info_dict:
2524             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2525
2526         if self.params.get('forceprint') or self.params.get('forcejson'):
2527             self.post_extract(info_dict)
2528         for tmpl in self.params.get('forceprint', []):
2529             mobj = re.match(r'\w+(=?)$', tmpl)
2530             if mobj and mobj.group(1):
2531                 tmpl = f'{tmpl[:-1]} = %({tmpl[:-1]})s'
2532             elif mobj:
2533                 tmpl = '%({})s'.format(tmpl)
2534             self.to_stdout(self.evaluate_outtmpl(tmpl, info_dict))
2535
2536         print_mandatory('title')
2537         print_mandatory('id')
2538         print_mandatory('url', 'urls')
2539         print_optional('thumbnail')
2540         print_optional('description')
2541         print_optional('filename')
2542         if self.params.get('forceduration') and info_dict.get('duration') is not None:
2543             self.to_stdout(formatSeconds(info_dict['duration']))
2544         print_mandatory('format')
2545
2546         if self.params.get('forcejson'):
2547             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2548
2549     def dl(self, name, info, subtitle=False, test=False):
2550         if not info.get('url'):
2551             self.raise_no_formats(info, True)
2552
2553         if test:
2554             verbose = self.params.get('verbose')
2555             params = {
2556                 'test': True,
2557                 'quiet': self.params.get('quiet') or not verbose,
2558                 'verbose': verbose,
2559                 'noprogress': not verbose,
2560                 'nopart': True,
2561                 'skip_unavailable_fragments': False,
2562                 'keep_fragments': False,
2563                 'overwrites': True,
2564                 '_no_ytdl_file': True,
2565             }
2566         else:
2567             params = self.params
2568         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2569         if not test:
2570             for ph in self._progress_hooks:
2571                 fd.add_progress_hook(ph)
2572             urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2573             self.write_debug('Invoking downloader on "%s"' % urls)
2574
2575         new_info = copy.deepcopy(self._copy_infodict(info))
2576         if new_info.get('http_headers') is None:
2577             new_info['http_headers'] = self._calc_headers(new_info)
2578         return fd.download(name, new_info, subtitle)
2579
2580     def process_info(self, info_dict):
2581         """Process a single resolved IE result."""
2582
2583         assert info_dict.get('_type', 'video') == 'video'
2584
2585         max_downloads = self.params.get('max_downloads')
2586         if max_downloads is not None:
2587             if self._num_downloads >= int(max_downloads):
2588                 raise MaxDownloadsReached()
2589
2590         # TODO: backward compatibility, to be removed
2591         info_dict['fulltitle'] = info_dict['title']
2592
2593         if 'format' not in info_dict and 'ext' in info_dict:
2594             info_dict['format'] = info_dict['ext']
2595
2596         if self._match_entry(info_dict) is not None:
2597             return
2598
2599         self.post_extract(info_dict)
2600         self._num_downloads += 1
2601
2602         # info_dict['_filename'] needs to be set for backward compatibility
2603         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2604         temp_filename = self.prepare_filename(info_dict, 'temp')
2605         files_to_move = {}
2606
2607         # Forced printings
2608         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2609
2610         if self.params.get('simulate'):
2611             if self.params.get('force_write_download_archive', False):
2612                 self.record_download_archive(info_dict)
2613             # Do nothing else if in simulate mode
2614             return
2615
2616         if full_filename is None:
2617             return
2618         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2619             return
2620         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2621             return
2622
2623         if self._write_description('video', info_dict,
2624                                    self.prepare_filename(info_dict, 'description')) is None:
2625             return
2626
2627         sub_files = self._write_subtitles(info_dict, temp_filename)
2628         if sub_files is None:
2629             return
2630         files_to_move.update(dict(sub_files))
2631
2632         thumb_files = self._write_thumbnails(
2633             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2634         if thumb_files is None:
2635             return
2636         files_to_move.update(dict(thumb_files))
2637
2638         infofn = self.prepare_filename(info_dict, 'infojson')
2639         _infojson_written = self._write_info_json('video', info_dict, infofn)
2640         if _infojson_written:
2641             info_dict['__infojson_filename'] = infofn
2642         elif _infojson_written is None:
2643             return
2644
2645         # Note: Annotations are deprecated
2646         annofn = None
2647         if self.params.get('writeannotations', False):
2648             annofn = self.prepare_filename(info_dict, 'annotation')
2649         if annofn:
2650             if not self._ensure_dir_exists(encodeFilename(annofn)):
2651                 return
2652             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2653                 self.to_screen('[info] Video annotations are already present')
2654             elif not info_dict.get('annotations'):
2655                 self.report_warning('There are no annotations to write.')
2656             else:
2657                 try:
2658                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2659                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2660                         annofile.write(info_dict['annotations'])
2661                 except (KeyError, TypeError):
2662                     self.report_warning('There are no annotations to write.')
2663                 except (OSError, IOError):
2664                     self.report_error('Cannot write annotations file: ' + annofn)
2665                     return
2666
2667         # Write internet shortcut files
2668         url_link = webloc_link = desktop_link = False
2669         if self.params.get('writelink', False):
2670             if sys.platform == "darwin":  # macOS.
2671                 webloc_link = True
2672             elif sys.platform.startswith("linux"):
2673                 desktop_link = True
2674             else:  # if sys.platform in ['win32', 'cygwin']:
2675                 url_link = True
2676         if self.params.get('writeurllink', False):
2677             url_link = True
2678         if self.params.get('writewebloclink', False):
2679             webloc_link = True
2680         if self.params.get('writedesktoplink', False):
2681             desktop_link = True
2682
2683         if url_link or webloc_link or desktop_link:
2684             if 'webpage_url' not in info_dict:
2685                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2686                 return
2687             ascii_url = iri_to_uri(info_dict['webpage_url'])
2688
2689         def _write_link_file(extension, template, newline, embed_filename):
2690             linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
2691             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2692                 self.to_screen('[info] Internet shortcut is already present')
2693             else:
2694                 try:
2695                     self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2696                     with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2697                         template_vars = {'url': ascii_url}
2698                         if embed_filename:
2699                             template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2700                         linkfile.write(template % template_vars)
2701                 except (OSError, IOError):
2702                     self.report_error('Cannot write internet shortcut ' + linkfn)
2703                     return False
2704             return True
2705
2706         if url_link:
2707             if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2708                 return
2709         if webloc_link:
2710             if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2711                 return
2712         if desktop_link:
2713             if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2714                 return
2715
2716         try:
2717             info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2718         except PostProcessingError as err:
2719             self.report_error('Preprocessing: %s' % str(err))
2720             return
2721
2722         must_record_download_archive = False
2723         if self.params.get('skip_download', False):
2724             info_dict['filepath'] = temp_filename
2725             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2726             info_dict['__files_to_move'] = files_to_move
2727             info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2728         else:
2729             # Download
2730             info_dict.setdefault('__postprocessors', [])
2731             try:
2732
2733                 def existing_file(*filepaths):
2734                     ext = info_dict.get('ext')
2735                     final_ext = self.params.get('final_ext', ext)
2736                     existing_files = []
2737                     for file in orderedSet(filepaths):
2738                         if final_ext != ext:
2739                             converted = replace_extension(file, final_ext, ext)
2740                             if os.path.exists(encodeFilename(converted)):
2741                                 existing_files.append(converted)
2742                         if os.path.exists(encodeFilename(file)):
2743                             existing_files.append(file)
2744
2745                     if not existing_files or self.params.get('overwrites', False):
2746                         for file in orderedSet(existing_files):
2747                             self.report_file_delete(file)
2748                             os.remove(encodeFilename(file))
2749                         return None
2750
2751                     info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2752                     return existing_files[0]
2753
2754                 success = True
2755                 if info_dict.get('requested_formats') is not None:
2756
2757                     def compatible_formats(formats):
2758                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2759                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2760                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2761                         if len(video_formats) > 2 or len(audio_formats) > 2:
2762                             return False
2763
2764                         # Check extension
2765                         exts = set(format.get('ext') for format in formats)
2766                         COMPATIBLE_EXTS = (
2767                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2768                             set(('webm',)),
2769                         )
2770                         for ext_sets in COMPATIBLE_EXTS:
2771                             if ext_sets.issuperset(exts):
2772                                 return True
2773                         # TODO: Check acodec/vcodec
2774                         return False
2775
2776                     requested_formats = info_dict['requested_formats']
2777                     old_ext = info_dict['ext']
2778                     if self.params.get('merge_output_format') is None:
2779                         if not compatible_formats(requested_formats):
2780                             info_dict['ext'] = 'mkv'
2781                             self.report_warning(
2782                                 'Requested formats are incompatible for merge and will be merged into mkv')
2783                         if (info_dict['ext'] == 'webm'
2784                                 and info_dict.get('thumbnails')
2785                                 # check with type instead of pp_key, __name__, or isinstance
2786                                 # since we dont want any custom PPs to trigger this
2787                                 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
2788                             info_dict['ext'] = 'mkv'
2789                             self.report_warning(
2790                                 'webm doesn\'t support embedding a thumbnail, mkv will be used')
2791                     new_ext = info_dict['ext']
2792
2793                     def correct_ext(filename, ext=new_ext):
2794                         if filename == '-':
2795                             return filename
2796                         filename_real_ext = os.path.splitext(filename)[1][1:]
2797                         filename_wo_ext = (
2798                             os.path.splitext(filename)[0]
2799                             if filename_real_ext in (old_ext, new_ext)
2800                             else filename)
2801                         return '%s.%s' % (filename_wo_ext, ext)
2802
2803                     # Ensure filename always has a correct extension for successful merge
2804                     full_filename = correct_ext(full_filename)
2805                     temp_filename = correct_ext(temp_filename)
2806                     dl_filename = existing_file(full_filename, temp_filename)
2807                     info_dict['__real_download'] = False
2808
2809                     if dl_filename is not None:
2810                         self.report_file_already_downloaded(dl_filename)
2811                     elif get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-'):
2812                         info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
2813                         success, real_download = self.dl(temp_filename, info_dict)
2814                         info_dict['__real_download'] = real_download
2815                     else:
2816                         downloaded = []
2817                         merger = FFmpegMergerPP(self)
2818                         if self.params.get('allow_unplayable_formats'):
2819                             self.report_warning(
2820                                 'You have requested merging of multiple formats '
2821                                 'while also allowing unplayable formats to be downloaded. '
2822                                 'The formats won\'t be merged to prevent data corruption.')
2823                         elif not merger.available:
2824                             self.report_warning(
2825                                 'You have requested merging of multiple formats but ffmpeg is not installed. '
2826                                 'The formats won\'t be merged.')
2827
2828                         if temp_filename == '-':
2829                             reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict)
2830                                       else 'but the formats are incompatible for simultaneous download' if merger.available
2831                                       else 'but ffmpeg is not installed')
2832                             self.report_warning(
2833                                 f'You have requested downloading multiple formats to stdout {reason}. '
2834                                 'The formats will be streamed one after the other')
2835                             fname = temp_filename
2836                         for f in requested_formats:
2837                             new_info = dict(info_dict)
2838                             del new_info['requested_formats']
2839                             new_info.update(f)
2840                             if temp_filename != '-':
2841                                 fname = prepend_extension(
2842                                     correct_ext(temp_filename, new_info['ext']),
2843                                     'f%s' % f['format_id'], new_info['ext'])
2844                                 if not self._ensure_dir_exists(fname):
2845                                     return
2846                                 f['filepath'] = fname
2847                                 downloaded.append(fname)
2848                             partial_success, real_download = self.dl(fname, new_info)
2849                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
2850                             success = success and partial_success
2851                         if merger.available and not self.params.get('allow_unplayable_formats'):
2852                             info_dict['__postprocessors'].append(merger)
2853                             info_dict['__files_to_merge'] = downloaded
2854                             # Even if there were no downloads, it is being merged only now
2855                             info_dict['__real_download'] = True
2856                         else:
2857                             for file in downloaded:
2858                                 files_to_move[file] = None
2859                 else:
2860                     # Just a single file
2861                     dl_filename = existing_file(full_filename, temp_filename)
2862                     if dl_filename is None or dl_filename == temp_filename:
2863                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
2864                         # So we should try to resume the download
2865                         success, real_download = self.dl(temp_filename, info_dict)
2866                         info_dict['__real_download'] = real_download
2867                     else:
2868                         self.report_file_already_downloaded(dl_filename)
2869
2870                 dl_filename = dl_filename or temp_filename
2871                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2872
2873             except network_exceptions as err:
2874                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2875                 return
2876             except (OSError, IOError) as err:
2877                 raise UnavailableVideoError(err)
2878             except (ContentTooShortError, ) as err:
2879                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2880                 return
2881
2882             if success and full_filename != '-':
2883
2884                 def fixup():
2885                     do_fixup = True
2886                     fixup_policy = self.params.get('fixup')
2887                     vid = info_dict['id']
2888
2889                     if fixup_policy in ('ignore', 'never'):
2890                         return
2891                     elif fixup_policy == 'warn':
2892                         do_fixup = False
2893                     elif fixup_policy != 'force':
2894                         assert fixup_policy in ('detect_or_warn', None)
2895                         if not info_dict.get('__real_download'):
2896                             do_fixup = False
2897
2898                     def ffmpeg_fixup(cndn, msg, cls):
2899                         if not cndn:
2900                             return
2901                         if not do_fixup:
2902                             self.report_warning(f'{vid}: {msg}')
2903                             return
2904                         pp = cls(self)
2905                         if pp.available:
2906                             info_dict['__postprocessors'].append(pp)
2907                         else:
2908                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
2909
2910                     stretched_ratio = info_dict.get('stretched_ratio')
2911                     ffmpeg_fixup(
2912                         stretched_ratio not in (1, None),
2913                         f'Non-uniform pixel ratio {stretched_ratio}',
2914                         FFmpegFixupStretchedPP)
2915
2916                     ffmpeg_fixup(
2917                         (info_dict.get('requested_formats') is None
2918                          and info_dict.get('container') == 'm4a_dash'
2919                          and info_dict.get('ext') == 'm4a'),
2920                         'writing DASH m4a. Only some players support this container',
2921                         FFmpegFixupM4aPP)
2922
2923                     downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
2924                     downloader = downloader.__name__ if downloader else None
2925                     ffmpeg_fixup(info_dict.get('requested_formats') is None and downloader == 'HlsFD',
2926                                  'malformed AAC bitstream detected', FFmpegFixupM3u8PP)
2927                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP)
2928                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed duration detected', FFmpegFixupDurationPP)
2929
2930                 fixup()
2931                 try:
2932                     info_dict = self.post_process(dl_filename, info_dict, files_to_move)
2933                 except PostProcessingError as err:
2934                     self.report_error('Postprocessing: %s' % str(err))
2935                     return
2936                 try:
2937                     for ph in self._post_hooks:
2938                         ph(info_dict['filepath'])
2939                 except Exception as err:
2940                     self.report_error('post hooks: %s' % str(err))
2941                     return
2942                 must_record_download_archive = True
2943
2944         if must_record_download_archive or self.params.get('force_write_download_archive', False):
2945             self.record_download_archive(info_dict)
2946         max_downloads = self.params.get('max_downloads')
2947         if max_downloads is not None and self._num_downloads >= int(max_downloads):
2948             raise MaxDownloadsReached()
2949
2950     def download(self, url_list):
2951         """Download a given list of URLs."""
2952         outtmpl = self.outtmpl_dict['default']
2953         if (len(url_list) > 1
2954                 and outtmpl != '-'
2955                 and '%' not in outtmpl
2956                 and self.params.get('max_downloads') != 1):
2957             raise SameFileError(outtmpl)
2958
2959         for url in url_list:
2960             try:
2961                 # It also downloads the videos
2962                 res = self.extract_info(
2963                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2964             except UnavailableVideoError:
2965                 self.report_error('unable to download video')
2966             except MaxDownloadsReached:
2967                 self.to_screen('[info] Maximum number of downloads reached')
2968                 raise
2969             except ExistingVideoReached:
2970                 self.to_screen('[info] Encountered a video that is already in the archive, stopping due to --break-on-existing')
2971                 raise
2972             except RejectedVideoReached:
2973                 self.to_screen('[info] Encountered a video that did not match filter, stopping due to --break-on-reject')
2974                 raise
2975             else:
2976                 if self.params.get('dump_single_json', False):
2977                     self.post_extract(res)
2978                     self.to_stdout(json.dumps(self.sanitize_info(res)))
2979
2980         return self._download_retcode
2981
2982     def download_with_info_file(self, info_filename):
2983         with contextlib.closing(fileinput.FileInput(
2984                 [info_filename], mode='r',
2985                 openhook=fileinput.hook_encoded('utf-8'))) as f:
2986             # FileInput doesn't have a read method, we can't call json.load
2987             info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
2988         try:
2989             self.process_ie_result(info, download=True)
2990         except (DownloadError, EntryNotInPlaylist, ThrottledDownload):
2991             webpage_url = info.get('webpage_url')
2992             if webpage_url is not None:
2993                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2994                 return self.download([webpage_url])
2995             else:
2996                 raise
2997         return self._download_retcode
2998
2999     @staticmethod
3000     def sanitize_info(info_dict, remove_private_keys=False):
3001         ''' Sanitize the infodict for converting to json '''
3002         if info_dict is None:
3003             return info_dict
3004         info_dict.setdefault('epoch', int(time.time()))
3005         remove_keys = {'__original_infodict'}  # Always remove this since this may contain a copy of the entire dict
3006         keep_keys = ['_type'],  # Always keep this to facilitate load-info-json
3007         if remove_private_keys:
3008             remove_keys |= {
3009                 'requested_formats', 'requested_subtitles', 'requested_entries',
3010                 'filepath', 'entries', 'original_url', 'playlist_autonumber',
3011             }
3012             empty_values = (None, {}, [], set(), tuple())
3013             reject = lambda k, v: k not in keep_keys and (
3014                 k.startswith('_') or k in remove_keys or v in empty_values)
3015         else:
3016             reject = lambda k, v: k in remove_keys
3017         filter_fn = lambda obj: (
3018             list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))
3019             else obj if not isinstance(obj, dict)
3020             else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
3021         return filter_fn(info_dict)
3022
3023     @staticmethod
3024     def filter_requested_info(info_dict, actually_filter=True):
3025         ''' Alias of sanitize_info for backward compatibility '''
3026         return YoutubeDL.sanitize_info(info_dict, actually_filter)
3027
3028     def run_pp(self, pp, infodict):
3029         files_to_delete = []
3030         if '__files_to_move' not in infodict:
3031             infodict['__files_to_move'] = {}
3032         try:
3033             files_to_delete, infodict = pp.run(infodict)
3034         except PostProcessingError as e:
3035             # Must be True and not 'only_download'
3036             if self.params.get('ignoreerrors') is True:
3037                 self.report_error(e)
3038                 return infodict
3039             raise
3040
3041         if not files_to_delete:
3042             return infodict
3043         if self.params.get('keepvideo', False):
3044             for f in files_to_delete:
3045                 infodict['__files_to_move'].setdefault(f, '')
3046         else:
3047             for old_filename in set(files_to_delete):
3048                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3049                 try:
3050                     os.remove(encodeFilename(old_filename))
3051                 except (IOError, OSError):
3052                     self.report_warning('Unable to remove downloaded original file')
3053                 if old_filename in infodict['__files_to_move']:
3054                     del infodict['__files_to_move'][old_filename]
3055         return infodict
3056
3057     @staticmethod
3058     def post_extract(info_dict):
3059         def actual_post_extract(info_dict):
3060             if info_dict.get('_type') in ('playlist', 'multi_video'):
3061                 for video_dict in info_dict.get('entries', {}):
3062                     actual_post_extract(video_dict or {})
3063                 return
3064
3065             post_extractor = info_dict.get('__post_extractor') or (lambda: {})
3066             extra = post_extractor().items()
3067             info_dict.update(extra)
3068             info_dict.pop('__post_extractor', None)
3069
3070             original_infodict = info_dict.get('__original_infodict') or {}
3071             original_infodict.update(extra)
3072             original_infodict.pop('__post_extractor', None)
3073
3074         actual_post_extract(info_dict or {})
3075
3076     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3077         info = dict(ie_info)
3078         info['__files_to_move'] = files_to_move or {}
3079         for pp in self._pps[key]:
3080             info = self.run_pp(pp, info)
3081         return info, info.pop('__files_to_move', None)
3082
3083     def post_process(self, filename, ie_info, files_to_move=None):
3084         """Run all the postprocessors on the given file."""
3085         info = dict(ie_info)
3086         info['filepath'] = filename
3087         info['__files_to_move'] = files_to_move or {}
3088
3089         for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
3090             info = self.run_pp(pp, info)
3091         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3092         del info['__files_to_move']
3093         for pp in self._pps['after_move']:
3094             info = self.run_pp(pp, info)
3095         return info
3096
3097     def _make_archive_id(self, info_dict):
3098         video_id = info_dict.get('id')
3099         if not video_id:
3100             return
3101         # Future-proof against any change in case
3102         # and backwards compatibility with prior versions
3103         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3104         if extractor is None:
3105             url = str_or_none(info_dict.get('url'))
3106             if not url:
3107                 return
3108             # Try to find matching extractor for the URL and take its ie_key
3109             for ie_key, ie in self._ies.items():
3110                 if ie.suitable(url):
3111                     extractor = ie_key
3112                     break
3113             else:
3114                 return
3115         return '%s %s' % (extractor.lower(), video_id)
3116
3117     def in_download_archive(self, info_dict):
3118         fn = self.params.get('download_archive')
3119         if fn is None:
3120             return False
3121
3122         vid_id = self._make_archive_id(info_dict)
3123         if not vid_id:
3124             return False  # Incomplete video information
3125
3126         return vid_id in self.archive
3127
3128     def record_download_archive(self, info_dict):
3129         fn = self.params.get('download_archive')
3130         if fn is None:
3131             return
3132         vid_id = self._make_archive_id(info_dict)
3133         assert vid_id
3134         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3135             archive_file.write(vid_id + '\n')
3136         self.archive.add(vid_id)
3137
3138     @staticmethod
3139     def format_resolution(format, default='unknown'):
3140         is_images = format.get('vcodec') == 'none' and format.get('acodec') == 'none'
3141         if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3142             return 'audio only'
3143         if format.get('resolution') is not None:
3144             return format['resolution']
3145         if format.get('width') and format.get('height'):
3146             res = '%dx%d' % (format['width'], format['height'])
3147         elif format.get('height'):
3148             res = '%sp' % format['height']
3149         elif format.get('width'):
3150             res = '%dx?' % format['width']
3151         elif is_images:
3152             return 'images'
3153         else:
3154             return default
3155         return f'{res} images' if is_images else res
3156
3157     def _format_note(self, fdict):
3158         res = ''
3159         if fdict.get('ext') in ['f4f', 'f4m']:
3160             res += '(unsupported) '
3161         if fdict.get('language'):
3162             if res:
3163                 res += ' '
3164             res += '[%s] ' % fdict['language']
3165         if fdict.get('format_note') is not None:
3166             res += fdict['format_note'] + ' '
3167         if fdict.get('tbr') is not None:
3168             res += '%4dk ' % fdict['tbr']
3169         if fdict.get('container') is not None:
3170             if res:
3171                 res += ', '
3172             res += '%s container' % fdict['container']
3173         if (fdict.get('vcodec') is not None
3174                 and fdict.get('vcodec') != 'none'):
3175             if res:
3176                 res += ', '
3177             res += fdict['vcodec']
3178             if fdict.get('vbr') is not None:
3179                 res += '@'
3180         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3181             res += 'video@'
3182         if fdict.get('vbr') is not None:
3183             res += '%4dk' % fdict['vbr']
3184         if fdict.get('fps') is not None:
3185             if res:
3186                 res += ', '
3187             res += '%sfps' % fdict['fps']
3188         if fdict.get('acodec') is not None:
3189             if res:
3190                 res += ', '
3191             if fdict['acodec'] == 'none':
3192                 res += 'video only'
3193             else:
3194                 res += '%-5s' % fdict['acodec']
3195         elif fdict.get('abr') is not None:
3196             if res:
3197                 res += ', '
3198             res += 'audio'
3199         if fdict.get('abr') is not None:
3200             res += '@%3dk' % fdict['abr']
3201         if fdict.get('asr') is not None:
3202             res += ' (%5dHz)' % fdict['asr']
3203         if fdict.get('filesize') is not None:
3204             if res:
3205                 res += ', '
3206             res += format_bytes(fdict['filesize'])
3207         elif fdict.get('filesize_approx') is not None:
3208             if res:
3209                 res += ', '
3210             res += '~' + format_bytes(fdict['filesize_approx'])
3211         return res
3212
3213     def _list_format_headers(self, *headers):
3214         if self.params.get('listformats_table', True) is not False:
3215             return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
3216         return headers
3217
3218     def list_formats(self, info_dict):
3219         formats = info_dict.get('formats', [info_dict])
3220         new_format = self.params.get('listformats_table', True) is not False
3221         if new_format:
3222             tbr_digits = number_of_digits(max(f.get('tbr') or 0 for f in formats))
3223             vbr_digits = number_of_digits(max(f.get('vbr') or 0 for f in formats))
3224             abr_digits = number_of_digits(max(f.get('abr') or 0 for f in formats))
3225             delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3226             table = [
3227                 [
3228                     self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
3229                     format_field(f, 'ext'),
3230                     self.format_resolution(f),
3231                     format_field(f, 'fps', '%d'),
3232                     format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3233                     delim,
3234                     format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
3235                     format_field(f, 'tbr', f'%{tbr_digits}dk'),
3236                     shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
3237                     delim,
3238                     format_field(f, 'vcodec', default='unknown').replace('none', ''),
3239                     format_field(f, 'vbr', f'%{vbr_digits}dk'),
3240                     format_field(f, 'acodec', default='unknown').replace('none', ''),
3241                     format_field(f, 'abr', f'%{abr_digits}dk'),
3242                     format_field(f, 'asr', '%5dHz'),
3243                     ', '.join(filter(None, (
3244                         self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else '',
3245                         format_field(f, 'language', '[%s]'),
3246                         format_field(f, 'format_note'),
3247                         format_field(f, 'container', ignore=(None, f.get('ext'))),
3248                     ))),
3249                 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3250             header_line = self._list_format_headers(
3251                 'ID', 'EXT', 'RESOLUTION', 'FPS', 'HDR', delim, ' FILESIZE', '  TBR', 'PROTO',
3252                 delim, 'VCODEC', '  VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO')
3253         else:
3254             table = [
3255                 [
3256                     format_field(f, 'format_id'),
3257                     format_field(f, 'ext'),
3258                     self.format_resolution(f),
3259                     self._format_note(f)]
3260                 for f in formats
3261                 if f.get('preference') is None or f['preference'] >= -1000]
3262             header_line = ['format code', 'extension', 'resolution', 'note']
3263
3264         self.to_screen(
3265             '[info] Available formats for %s:' % info_dict['id'])
3266         self.to_stdout(render_table(
3267             header_line, table,
3268             extraGap=(0 if new_format else 1),
3269             hideEmpty=new_format,
3270             delim=new_format and self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True)))
3271
3272     def list_thumbnails(self, info_dict):
3273         thumbnails = list(info_dict.get('thumbnails'))
3274         if not thumbnails:
3275             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3276             return
3277
3278         self.to_screen(
3279             '[info] Thumbnails for %s:' % info_dict['id'])
3280         self.to_stdout(render_table(
3281             self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3282             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
3283
3284     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3285         if not subtitles:
3286             self.to_screen('%s has no %s' % (video_id, name))
3287             return
3288         self.to_screen(
3289             'Available %s for %s:' % (name, video_id))
3290
3291         def _row(lang, formats):
3292             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3293             if len(set(names)) == 1:
3294                 names = [] if names[0] == 'unknown' else names[:1]
3295             return [lang, ', '.join(names), ', '.join(exts)]
3296
3297         self.to_stdout(render_table(
3298             self._list_format_headers('Language', 'Name', 'Formats'),
3299             [_row(lang, formats) for lang, formats in subtitles.items()],
3300             hideEmpty=True))
3301
3302     def urlopen(self, req):
3303         """ Start an HTTP download """
3304         if isinstance(req, compat_basestring):
3305             req = sanitized_Request(req)
3306         return self._opener.open(req, timeout=self._socket_timeout)
3307
3308     def print_debug_header(self):
3309         if not self.params.get('verbose'):
3310             return
3311
3312         def get_encoding(stream):
3313             ret = getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)
3314             if not supports_terminal_sequences(stream):
3315                 ret += ' (No ANSI)'
3316             return ret
3317
3318         encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
3319             locale.getpreferredencoding(),
3320             sys.getfilesystemencoding(),
3321             get_encoding(self._screen_file), get_encoding(self._err_file),
3322             self.get_encoding())
3323
3324         logger = self.params.get('logger')
3325         if logger:
3326             write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3327             write_debug(encoding_str)
3328         else:
3329             write_string(f'[debug] {encoding_str}\n', encoding=None)
3330             write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3331
3332         source = detect_variant()
3333         write_debug('yt-dlp version %s%s' % (__version__, '' if source == 'unknown' else f' ({source})'))
3334         if not _LAZY_LOADER:
3335             if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3336                 write_debug('Lazy loading extractors is forcibly disabled')
3337             else:
3338                 write_debug('Lazy loading extractors is disabled')
3339         if plugin_extractors or plugin_postprocessors:
3340             write_debug('Plugins: %s' % [
3341                 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3342                 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3343         if self.params.get('compat_opts'):
3344             write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
3345         try:
3346             sp = Popen(
3347                 ['git', 'rev-parse', '--short', 'HEAD'],
3348                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3349                 cwd=os.path.dirname(os.path.abspath(__file__)))
3350             out, err = sp.communicate_or_kill()
3351             out = out.decode().strip()
3352             if re.match('[0-9a-f]+', out):
3353                 write_debug('Git HEAD: %s' % out)
3354         except Exception:
3355             try:
3356                 sys.exc_clear()
3357             except Exception:
3358                 pass
3359
3360         def python_implementation():
3361             impl_name = platform.python_implementation()
3362             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3363                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3364             return impl_name
3365
3366         write_debug('Python version %s (%s %s) - %s' % (
3367             platform.python_version(),
3368             python_implementation(),
3369             platform.architecture()[0],
3370             platform_name()))
3371
3372         exe_versions = FFmpegPostProcessor.get_versions(self)
3373         exe_versions['rtmpdump'] = rtmpdump_version()
3374         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3375         exe_str = ', '.join(
3376             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3377         ) or 'none'
3378         write_debug('exe versions: %s' % exe_str)
3379
3380         from .downloader.websocket import has_websockets
3381         from .postprocessor.embedthumbnail import has_mutagen
3382         from .cookies import SQLITE_AVAILABLE, KEYRING_AVAILABLE
3383
3384         lib_str = ', '.join(sorted(filter(None, (
3385             compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
3386             has_websockets and 'websockets',
3387             has_mutagen and 'mutagen',
3388             SQLITE_AVAILABLE and 'sqlite',
3389             KEYRING_AVAILABLE and 'keyring',
3390         )))) or 'none'
3391         write_debug('Optional libraries: %s' % lib_str)
3392
3393         proxy_map = {}
3394         for handler in self._opener.handlers:
3395             if hasattr(handler, 'proxies'):
3396                 proxy_map.update(handler.proxies)
3397         write_debug(f'Proxy map: {proxy_map}')
3398
3399         # Not implemented
3400         if False and self.params.get('call_home'):
3401             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3402             write_debug('Public IP address: %s' % ipaddr)
3403             latest_version = self.urlopen(
3404                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3405             if version_tuple(latest_version) > version_tuple(__version__):
3406                 self.report_warning(
3407                     'You are using an outdated version (newest version: %s)! '
3408                     'See https://yt-dl.org/update if you need help updating.' %
3409                     latest_version)
3410
3411     def _setup_opener(self):
3412         timeout_val = self.params.get('socket_timeout')
3413         self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3414
3415         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3416         opts_cookiefile = self.params.get('cookiefile')
3417         opts_proxy = self.params.get('proxy')
3418
3419         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3420
3421         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3422         if opts_proxy is not None:
3423             if opts_proxy == '':
3424                 proxies = {}
3425             else:
3426                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3427         else:
3428             proxies = compat_urllib_request.getproxies()
3429             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3430             if 'http' in proxies and 'https' not in proxies:
3431                 proxies['https'] = proxies['http']
3432         proxy_handler = PerRequestProxyHandler(proxies)
3433
3434         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3435         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3436         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3437         redirect_handler = YoutubeDLRedirectHandler()
3438         data_handler = compat_urllib_request_DataHandler()
3439
3440         # When passing our own FileHandler instance, build_opener won't add the
3441         # default FileHandler and allows us to disable the file protocol, which
3442         # can be used for malicious purposes (see
3443         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3444         file_handler = compat_urllib_request.FileHandler()
3445
3446         def file_open(*args, **kwargs):
3447             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3448         file_handler.file_open = file_open
3449
3450         opener = compat_urllib_request.build_opener(
3451             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3452
3453         # Delete the default user-agent header, which would otherwise apply in
3454         # cases where our custom HTTP handler doesn't come into play
3455         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3456         opener.addheaders = []
3457         self._opener = opener
3458
3459     def encode(self, s):
3460         if isinstance(s, bytes):
3461             return s  # Already encoded
3462
3463         try:
3464             return s.encode(self.get_encoding())
3465         except UnicodeEncodeError as err:
3466             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3467             raise
3468
3469     def get_encoding(self):
3470         encoding = self.params.get('encoding')
3471         if encoding is None:
3472             encoding = preferredencoding()
3473         return encoding
3474
3475     def _write_info_json(self, label, ie_result, infofn):
3476         ''' Write infojson and returns True = written, False = skip, None = error '''
3477         if not self.params.get('writeinfojson'):
3478             return False
3479         elif not infofn:
3480             self.write_debug(f'Skipping writing {label} infojson')
3481             return False
3482         elif not self._ensure_dir_exists(infofn):
3483             return None
3484         elif not self.params.get('overwrites', True) and os.path.exists(infofn):
3485             self.to_screen(f'[info] {label.title()} metadata is already present')
3486         else:
3487             self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3488             try:
3489                 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3490             except (OSError, IOError):
3491                 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3492                 return None
3493         return True
3494
3495     def _write_description(self, label, ie_result, descfn):
3496         ''' Write description and returns True = written, False = skip, None = error '''
3497         if not self.params.get('writedescription'):
3498             return False
3499         elif not descfn:
3500             self.write_debug(f'Skipping writing {label} description')
3501             return False
3502         elif not self._ensure_dir_exists(descfn):
3503             return None
3504         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3505             self.to_screen(f'[info] {label.title()} description is already present')
3506         elif ie_result.get('description') is None:
3507             self.report_warning(f'There\'s no {label} description to write')
3508             return False
3509         else:
3510             try:
3511                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3512                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3513                     descfile.write(ie_result['description'])
3514             except (OSError, IOError):
3515                 self.report_error(f'Cannot write {label} description file {descfn}')
3516                 return None
3517         return True
3518
3519     def _write_subtitles(self, info_dict, filename):
3520         ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3521         ret = []
3522         subtitles = info_dict.get('requested_subtitles')
3523         if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3524             # subtitles download errors are already managed as troubles in relevant IE
3525             # that way it will silently go on when used with unsupporting IE
3526             return ret
3527
3528         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3529         if not sub_filename_base:
3530             self.to_screen('[info] Skipping writing video subtitles')
3531             return ret
3532         for sub_lang, sub_info in subtitles.items():
3533             sub_format = sub_info['ext']
3534             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3535             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3536             if not self.params.get('overwrites', True) and os.path.exists(sub_filename):
3537                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3538                 sub_info['filepath'] = sub_filename
3539                 ret.append((sub_filename, sub_filename_final))
3540                 continue
3541
3542             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3543             if sub_info.get('data') is not None:
3544                 try:
3545                     # Use newline='' to prevent conversion of newline characters
3546                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
3547                     with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3548                         subfile.write(sub_info['data'])
3549                     sub_info['filepath'] = sub_filename
3550                     ret.append((sub_filename, sub_filename_final))
3551                     continue
3552                 except (OSError, IOError):
3553                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
3554                     return None
3555
3556             try:
3557                 sub_copy = sub_info.copy()
3558                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3559                 self.dl(sub_filename, sub_copy, subtitle=True)
3560                 sub_info['filepath'] = sub_filename
3561                 ret.append((sub_filename, sub_filename_final))
3562             except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3563                 self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
3564                 continue
3565         return ret
3566
3567     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3568         ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3569         write_all = self.params.get('write_all_thumbnails', False)
3570         thumbnails, ret = [], []
3571         if write_all or self.params.get('writethumbnail', False):
3572             thumbnails = info_dict.get('thumbnails') or []
3573         multiple = write_all and len(thumbnails) > 1
3574
3575         if thumb_filename_base is None:
3576             thumb_filename_base = filename
3577         if thumbnails and not thumb_filename_base:
3578             self.write_debug(f'Skipping writing {label} thumbnail')
3579             return ret
3580
3581         for t in thumbnails[::-1]:
3582             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3583             thumb_display_id = f'{label} thumbnail' + (f' {t["id"]}' if multiple else '')
3584             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3585             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3586
3587             if not self.params.get('overwrites', True) and os.path.exists(thumb_filename):
3588                 ret.append((thumb_filename, thumb_filename_final))
3589                 t['filepath'] = thumb_filename
3590                 self.to_screen(f'[info] {thumb_display_id.title()} is already present')
3591             else:
3592                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3593                 try:
3594                     uf = self.urlopen(t['url'])
3595                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3596                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3597                         shutil.copyfileobj(uf, thumbf)
3598                     ret.append((thumb_filename, thumb_filename_final))
3599                     t['filepath'] = thumb_filename
3600                 except network_exceptions as err:
3601                     self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3602             if ret and not write_all:
3603                 break
3604         return ret