yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import functools
  13 import io
  14 import itertools
  15 import json
  16 import locale
  17 import operator
  18 import os
  19 import platform
  20 import re
  21 import shutil
  22 import subprocess
  23 import sys
  24 import tempfile
  25 import time
  26 import tokenize
  27 import traceback
  28 import random
  29 import unicodedata
  30
  31 from enum import Enum
  32 from string import ascii_letters
  33
  34 from .compat import (
  35     compat_basestring,
  36     compat_get_terminal_size,
  37     compat_kwargs,
  38     compat_numeric_types,
  39     compat_os_name,
  40     compat_pycrypto_AES,
  41     compat_shlex_quote,
  42     compat_str,
  43     compat_tokenize_tokenize,
  44     compat_urllib_error,
  45     compat_urllib_request,
  46     compat_urllib_request_DataHandler,
  47     windows_enable_vt_mode,
  48 )
  49 from .cookies import load_cookies
  50 from .utils import (
  51     age_restricted,
  52     args_to_str,
  53     ContentTooShortError,
  54     date_from_str,
  55     DateRange,
  56     DEFAULT_OUTTMPL,
  57     determine_ext,
  58     determine_protocol,
  59     DownloadCancelled,
  60     DownloadError,
  61     encode_compat_str,
  62     encodeFilename,
  63     EntryNotInPlaylist,
  64     error_to_compat_str,
  65     ExistingVideoReached,
  66     expand_path,
  67     ExtractorError,
  68     float_or_none,
  69     format_bytes,
  70     format_field,
  71     formatSeconds,
  72     GeoRestrictedError,
  73     HEADRequest,
  74     int_or_none,
  75     iri_to_uri,
  76     ISO3166Utils,
  77     join_nonempty,
  78     LazyList,
  79     LINK_TEMPLATES,
  80     locked_file,
  81     make_dir,
  82     make_HTTPS_handler,
  83     MaxDownloadsReached,
  84     network_exceptions,
  85     number_of_digits,
  86     orderedSet,
  87     OUTTMPL_TYPES,
  88     PagedList,
  89     parse_filesize,
  90     PerRequestProxyHandler,
  91     platform_name,
  92     Popen,
  93     PostProcessingError,
  94     preferredencoding,
  95     prepend_extension,
  96     register_socks_protocols,
  97     RejectedVideoReached,
  98     render_table,
  99     replace_extension,
 100     SameFileError,
 101     sanitize_filename,
 102     sanitize_path,
 103     sanitize_url,
 104     sanitized_Request,
 105     std_headers,
 106     STR_FORMAT_RE_TMPL,
 107     STR_FORMAT_TYPES,
 108     str_or_none,
 109     strftime_or_none,
 110     subtitles_filename,
 111     supports_terminal_sequences,
 112     ThrottledDownload,
 113     to_high_limit_path,
 114     traverse_obj,
 115     try_get,
 116     UnavailableVideoError,
 117     url_basename,
 118     variadic,
 119     version_tuple,
 120     write_json_file,
 121     write_string,
 122     YoutubeDLCookieProcessor,
 123     YoutubeDLHandler,
 124     YoutubeDLRedirectHandler,
 125 )
 126 from .cache import Cache
 127 from .minicurses import format_text
 128 from .extractor import (
 129     gen_extractor_classes,
 130     get_info_extractor,
 131     _LAZY_LOADER,
 132     _PLUGIN_CLASSES as plugin_extractors
 133 )
 134 from .extractor.openload import PhantomJSwrapper
 135 from .downloader import (
 136     FFmpegFD,
 137     get_suitable_downloader,
 138     shorten_protocol_name
 139 )
 140 from .downloader.rtmp import rtmpdump_version
 141 from .postprocessor import (
 142     get_postprocessor,
 143     EmbedThumbnailPP,
 144     FFmpegFixupDurationPP,
 145     FFmpegFixupM3u8PP,
 146     FFmpegFixupM4aPP,
 147     FFmpegFixupStretchedPP,
 148     FFmpegFixupTimestampPP,
 149     FFmpegMergerPP,
 150     FFmpegPostProcessor,
 151     MoveFilesAfterDownloadPP,
 152     _PLUGIN_CLASSES as plugin_postprocessors
 153 )
 154 from .update import detect_variant
 155 from .version import __version__
 156
 157 if compat_os_name == 'nt':
 158     import ctypes
 159
 160
 161 class YoutubeDL(object):
 162     """YoutubeDL class.
 163
 164     YoutubeDL objects are the ones responsible of downloading the
 165     actual video file and writing it to disk if the user has requested
 166     it, among some other tasks. In most cases there should be one per
 167     program. As, given a video URL, the downloader doesn't know how to
 168     extract all the needed information, task that InfoExtractors do, it
 169     has to pass the URL to one of them.
 170
 171     For this, YoutubeDL objects have a method that allows
 172     InfoExtractors to be registered in a given order. When it is passed
 173     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 174     finds that reports being able to handle it. The InfoExtractor extracts
 175     all the information about the video or videos the URL refers to, and
 176     YoutubeDL process the extracted information, possibly using a File
 177     Downloader to download the video.
 178
 179     YoutubeDL objects accept a lot of parameters. In order not to saturate
 180     the object constructor with arguments, it receives a dictionary of
 181     options instead. These options are available through the params
 182     attribute for the InfoExtractors to use. The YoutubeDL also
 183     registers itself as the downloader in charge for the InfoExtractors
 184     that are added to it, so this is a "mutual registration".
 185
 186     Available options:
 187
 188     username:          Username for authentication purposes.
 189     password:          Password for authentication purposes.
 190     videopassword:     Password for accessing a video.
 191     ap_mso:            Adobe Pass multiple-system operator identifier.
 192     ap_username:       Multiple-system operator account username.
 193     ap_password:       Multiple-system operator account password.
 194     usenetrc:          Use netrc for authentication instead.
 195     verbose:           Print additional info to stdout.
 196     quiet:             Do not print messages to stdout.
 197     no_warnings:       Do not print out anything for warnings.
 198     forceprint:        A list of templates to force print
 199     forceurl:          Force printing final URL. (Deprecated)
 200     forcetitle:        Force printing title. (Deprecated)
 201     forceid:           Force printing ID. (Deprecated)
 202     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 203     forcedescription:  Force printing description. (Deprecated)
 204     forcefilename:     Force printing final filename. (Deprecated)
 205     forceduration:     Force printing duration. (Deprecated)
 206     forcejson:         Force printing info_dict as JSON.
 207     dump_single_json:  Force printing the info_dict of the whole playlist
 208                        (or video) as a single JSON line.
 209     force_write_download_archive: Force writing download archive regardless
 210                        of 'skip_download' or 'simulate'.
 211     simulate:          Do not download the video files. If unset (or None),
 212                        simulate only if listsubtitles, listformats or list_thumbnails is used
 213     format:            Video format code. see "FORMAT SELECTION" for more details.
 214     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 215     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 216                        extracting metadata even if the video is not actually
 217                        available for download (experimental)
 218     format_sort:       A list of fields by which to sort the video formats.
 219                        See "Sorting Formats" for more details.
 220     format_sort_force: Force the given format_sort. see "Sorting Formats"
 221                        for more details.
 222     allow_multiple_video_streams:   Allow multiple video streams to be merged
 223                        into a single file
 224     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 225                        into a single file
 226     check_formats      Whether to test if the formats are downloadable.
 227                        Can be True (check all), False (check none),
 228                        'selected' (check selected formats),
 229                        or None (check only if requested by extractor)
 230     paths:             Dictionary of output paths. The allowed keys are 'home'
 231                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 232     outtmpl:           Dictionary of templates for output names. Allowed keys
 233                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 234                        For compatibility with youtube-dl, a single string can also be used
 235     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 236     restrictfilenames: Do not allow "&" and spaces in file names
 237     trim_file_name:    Limit length of filename (extension excluded)
 238     windowsfilenames:  Force the filenames to be windows compatible
 239     ignoreerrors:      Do not stop on download/postprocessing errors.
 240                        Can be 'only_download' to ignore only download errors.
 241                        Default is 'only_download' for CLI, but False for API
 242     skip_playlist_after_errors: Number of allowed failures until the rest of
 243                        the playlist is skipped
 244     force_generic_extractor: Force downloader to use the generic extractor
 245     overwrites:        Overwrite all video and metadata files if True,
 246                        overwrite only non-video files if None
 247                        and don't overwrite any file if False
 248                        For compatibility with youtube-dl,
 249                        "nooverwrites" may also be used instead
 250     playliststart:     Playlist item to start at.
 251     playlistend:       Playlist item to end at.
 252     playlist_items:    Specific indices of playlist to download.
 253     playlistreverse:   Download playlist items in reverse order.
 254     playlistrandom:    Download playlist items in random order.
 255     matchtitle:        Download only matching titles.
 256     rejecttitle:       Reject downloads for matching titles.
 257     logger:            Log messages to a logging.Logger instance.
 258     logtostderr:       Log messages to stderr instead of stdout.
 259     consoletitle:       Display progress in console window's titlebar.
 260     writedescription:  Write the video description to a .description file
 261     writeinfojson:     Write the video description to a .info.json file
 262     clean_infojson:    Remove private fields from the infojson
 263     getcomments:       Extract video comments. This will not be written to disk
 264                        unless writeinfojson is also given
 265     writeannotations:  Write the video annotations to a .annotations.xml file
 266     writethumbnail:    Write the thumbnail image to a file
 267     allow_playlist_files: Whether to write playlists' description, infojson etc
 268                        also to disk when using the 'write*' options
 269     write_all_thumbnails:  Write all thumbnail formats to files
 270     writelink:         Write an internet shortcut file, depending on the
 271                        current platform (.url/.webloc/.desktop)
 272     writeurllink:      Write a Windows internet shortcut file (.url)
 273     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 274     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 275     writesubtitles:    Write the video subtitles to a file
 276     writeautomaticsub: Write the automatically generated subtitles to a file
 277     allsubtitles:      Deprecated - Use subtitleslangs = ['all']
 278                        Downloads all the subtitles of the video
 279                        (requires writesubtitles or writeautomaticsub)
 280     listsubtitles:     Lists all available subtitles for the video
 281     subtitlesformat:   The format code for subtitles
 282     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 283                        The list may contain "all" to refer to all the available
 284                        subtitles. The language can be prefixed with a "-" to
 285                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 286     keepvideo:         Keep the video file after post-processing
 287     daterange:         A DateRange object, download only if the upload_date is in the range.
 288     skip_download:     Skip the actual download of the video file
 289     cachedir:          Location of the cache files in the filesystem.
 290                        False to disable filesystem cache.
 291     noplaylist:        Download single video instead of a playlist if in doubt.
 292     age_limit:         An integer representing the user's age in years.
 293                        Unsuitable videos for the given age are skipped.
 294     min_views:         An integer representing the minimum view count the video
 295                        must have in order to not be skipped.
 296                        Videos without view count information are always
 297                        downloaded. None for no limit.
 298     max_views:         An integer representing the maximum view count.
 299                        Videos that are more popular than that are not
 300                        downloaded.
 301                        Videos without view count information are always
 302                        downloaded. None for no limit.
 303     download_archive:  File name of a file where all downloads are recorded.
 304                        Videos already present in the file are not downloaded
 305                        again.
 306     break_on_existing: Stop the download process after attempting to download a
 307                        file that is in the archive.
 308     break_on_reject:   Stop the download process when encountering a video that
 309                        has been filtered out.
 310     cookiefile:        File name where cookies should be read from and dumped to
 311     cookiesfrombrowser: A tuple containing the name of the browser and the profile
 312                        name/path from where cookies are loaded.
 313                        Eg: ('chrome', ) or ('vivaldi', 'default')
 314     nocheckcertificate:Do not verify SSL certificates
 315     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 316                        At the moment, this is only supported by YouTube.
 317     proxy:             URL of the proxy server to use
 318     geo_verification_proxy:  URL of the proxy to use for IP address verification
 319                        on geo-restricted sites.
 320     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 321     bidi_workaround:   Work around buggy terminals without bidirectional text
 322                        support, using fridibi
 323     debug_printtraffic:Print out sent and received HTTP traffic
 324     include_ads:       Download ads as well
 325     default_search:    Prepend this string if an input url is not valid.
 326                        'auto' for elaborate guessing
 327     encoding:          Use this encoding instead of the system-specified.
 328     extract_flat:      Do not resolve URLs, return the immediate result.
 329                        Pass in 'in_playlist' to only show this behavior for
 330                        playlist items.
 331     postprocessors:    A list of dictionaries, each with an entry
 332                        * key:  The name of the postprocessor. See
 333                                yt_dlp/postprocessor/__init__.py for a list.
 334                        * when: When to run the postprocessor. Can be one of
 335                                pre_process|before_dl|post_process|after_move.
 336                                Assumed to be 'post_process' if not given
 337     post_hooks:        Deprecated - Register a custom postprocessor instead
 338                        A list of functions that get called as the final step
 339                        for each video file, after all postprocessors have been
 340                        called. The filename will be passed as the only argument.
 341     progress_hooks:    A list of functions that get called on download
 342                        progress, with a dictionary with the entries
 343                        * status: One of "downloading", "error", or "finished".
 344                                  Check this first and ignore unknown values.
 345                        * info_dict: The extracted info_dict
 346
 347                        If status is one of "downloading", or "finished", the
 348                        following properties may also be present:
 349                        * filename: The final filename (always present)
 350                        * tmpfilename: The filename we're currently writing to
 351                        * downloaded_bytes: Bytes on disk
 352                        * total_bytes: Size of the whole file, None if unknown
 353                        * total_bytes_estimate: Guess of the eventual file size,
 354                                                None if unavailable.
 355                        * elapsed: The number of seconds since download started.
 356                        * eta: The estimated time in seconds, None if unknown
 357                        * speed: The download speed in bytes/second, None if
 358                                 unknown
 359                        * fragment_index: The counter of the currently
 360                                          downloaded video fragment.
 361                        * fragment_count: The number of fragments (= individual
 362                                          files that will be merged)
 363
 364                        Progress hooks are guaranteed to be called at least once
 365                        (with status "finished") if the download is successful.
 366     postprocessor_hooks:  A list of functions that get called on postprocessing
 367                        progress, with a dictionary with the entries
 368                        * status: One of "started", "processing", or "finished".
 369                                  Check this first and ignore unknown values.
 370                        * postprocessor: Name of the postprocessor
 371                        * info_dict: The extracted info_dict
 372
 373                        Progress hooks are guaranteed to be called at least twice
 374                        (with status "started" and "finished") if the processing is successful.
 375     merge_output_format: Extension to use when merging formats.
 376     final_ext:         Expected final extension; used to detect when the file was
 377                        already downloaded and converted. "merge_output_format" is
 378                        replaced by this extension when given
 379     fixup:             Automatically correct known faults of the file.
 380                        One of:
 381                        - "never": do nothing
 382                        - "warn": only emit a warning
 383                        - "detect_or_warn": check whether we can do anything
 384                                            about it, warn otherwise (default)
 385     source_address:    Client-side IP address to bind to.
 386     call_home:         Boolean, true iff we are allowed to contact the
 387                        yt-dlp servers for debugging. (BROKEN)
 388     sleep_interval_requests: Number of seconds to sleep between requests
 389                        during extraction
 390     sleep_interval:    Number of seconds to sleep before each download when
 391                        used alone or a lower bound of a range for randomized
 392                        sleep before each download (minimum possible number
 393                        of seconds to sleep) when used along with
 394                        max_sleep_interval.
 395     max_sleep_interval:Upper bound of a range for randomized sleep before each
 396                        download (maximum possible number of seconds to sleep).
 397                        Must only be used along with sleep_interval.
 398                        Actual sleep time will be a random float from range
 399                        [sleep_interval; max_sleep_interval].
 400     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 401     listformats:       Print an overview of available video formats and exit.
 402     list_thumbnails:   Print a table of all thumbnails and exit.
 403     match_filter:      A function that gets called with the info_dict of
 404                        every video.
 405                        If it returns a message, the video is ignored.
 406                        If it returns None, the video is downloaded.
 407                        match_filter_func in utils.py is one example for this.
 408     no_color:          Do not emit color codes in output.
 409     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 410                        HTTP header
 411     geo_bypass_country:
 412                        Two-letter ISO 3166-2 country code that will be used for
 413                        explicit geographic restriction bypassing via faking
 414                        X-Forwarded-For HTTP header
 415     geo_bypass_ip_block:
 416                        IP range in CIDR notation that will be used similarly to
 417                        geo_bypass_country
 418
 419     The following options determine which downloader is picked:
 420     external_downloader: A dictionary of protocol keys and the executable of the
 421                        external downloader to use for it. The allowed protocols
 422                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 423                        Set the value to 'native' to use the native downloader
 424     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 425                        or {'m3u8': 'ffmpeg'} instead.
 426                        Use the native HLS downloader instead of ffmpeg/avconv
 427                        if True, otherwise use ffmpeg/avconv if False, otherwise
 428                        use downloader suggested by extractor if None.
 429     compat_opts:       Compatibility options. See "Differences in default behavior".
 430                        The following options do not work when used through the API:
 431                        filename, abort-on-error, multistreams, no-live-chat, format-sort
 432                        no-clean-infojson, no-playlist-metafiles, no-keep-subs.
 433                        Refer __init__.py for their implementation
 434     progress_template: Dictionary of templates for progress outputs.
 435                        Allowed keys are 'download', 'postprocess',
 436                        'download-title' (console title) and 'postprocess-title'.
 437                        The template is mapped on a dictionary with keys 'progress' and 'info'
 438
 439     The following parameters are not used by YoutubeDL itself, they are used by
 440     the downloader (see yt_dlp/downloader/common.py):
 441     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 442     max_filesize, test, noresizebuffer, retries, fragment_retries, continuedl,
 443     noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
 444     external_downloader_args.
 445
 446     The following options are used by the post processors:
 447     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 448                        otherwise prefer ffmpeg. (avconv support is deprecated)
 449     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 450                        to the binary or its containing directory.
 451     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 452                        and a list of additional command-line arguments for the
 453                        postprocessor/executable. The dict can also have "PP+EXE" keys
 454                        which are used when the given exe is used by the given PP.
 455                        Use 'default' as the name for arguments to passed to all PP
 456                        For compatibility with youtube-dl, a single list of args
 457                        can also be used
 458
 459     The following options are used by the extractors:
 460     extractor_retries: Number of times to retry for known errors
 461     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 462     hls_split_discontinuity: Split HLS playlists to different formats at
 463                        discontinuities such as ad breaks (default: False)
 464     extractor_args:    A dictionary of arguments to be passed to the extractors.
 465                        See "EXTRACTOR ARGUMENTS" for details.
 466                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 467     youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
 468                        If True (default), DASH manifests and related
 469                        data will be downloaded and processed by extractor.
 470                        You can reduce network I/O by disabling it if you don't
 471                        care about DASH. (only for youtube)
 472     youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
 473                        If True (default), HLS manifests and related
 474                        data will be downloaded and processed by extractor.
 475                        You can reduce network I/O by disabling it if you don't
 476                        care about HLS. (only for youtube)
 477     """
 478
 479     _NUMERIC_FIELDS = set((
 480         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 481         'timestamp', 'release_timestamp',
 482         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 483         'average_rating', 'comment_count', 'age_limit',
 484         'start_time', 'end_time',
 485         'chapter_number', 'season_number', 'episode_number',
 486         'track_number', 'disc_number', 'release_year',
 487     ))
 488
 489     _format_selection_exts = {
 490         'audio': {'m4a', 'mp3', 'ogg', 'aac'},
 491         'video': {'mp4', 'flv', 'webm', '3gp'},
 492         'storyboards': {'mhtml'},
 493     }
 494
 495     params = None
 496     _ies = {}
 497     _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 498     _printed_messages = set()
 499     _first_webpage_request = True
 500     _download_retcode = None
 501     _num_downloads = None
 502     _playlist_level = 0
 503     _playlist_urls = set()
 504     _screen_file = None
 505
 506     def __init__(self, params=None, auto_init=True):
 507         """Create a FileDownloader object with the given options.
 508         @param auto_init    Whether to load the default extractors and print header (if verbose).
 509                             Set to 'no_verbose_header' to not print the header
 510         """
 511         if params is None:
 512             params = {}
 513         self._ies = {}
 514         self._ies_instances = {}
 515         self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 516         self._printed_messages = set()
 517         self._first_webpage_request = True
 518         self._post_hooks = []
 519         self._progress_hooks = []
 520         self._postprocessor_hooks = []
 521         self._download_retcode = 0
 522         self._num_downloads = 0
 523         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 524         self._err_file = sys.stderr
 525         self.params = params
 526         self.cache = Cache(self)
 527
 528         windows_enable_vt_mode()
 529         # FIXME: This will break if we ever print color to stdout
 530         self._allow_colors = {
 531             'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file),
 532             'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file),
 533         }
 534
 535         if sys.version_info < (3, 6):
 536             self.report_warning(
 537                 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 538
 539         if self.params.get('allow_unplayable_formats'):
 540             self.report_warning(
 541                 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
 542                 'This is a developer option intended for debugging. \n'
 543                 '         If you experience any issues while using this option, '
 544                 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
 545
 546         def check_deprecated(param, option, suggestion):
 547             if self.params.get(param) is not None:
 548                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 549                 return True
 550             return False
 551
 552         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 553             if self.params.get('geo_verification_proxy') is None:
 554                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 555
 556         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 557         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 558         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 559
 560         for msg in self.params.get('_warnings', []):
 561             self.report_warning(msg)
 562
 563         if 'list-formats' in self.params.get('compat_opts', []):
 564             self.params['listformats_table'] = False
 565
 566         if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
 567             # nooverwrites was unnecessarily changed to overwrites
 568             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 569             # This ensures compatibility with both keys
 570             self.params['overwrites'] = not self.params['nooverwrites']
 571         elif self.params.get('overwrites') is None:
 572             self.params.pop('overwrites', None)
 573         else:
 574             self.params['nooverwrites'] = not self.params['overwrites']
 575
 576         if params.get('bidi_workaround', False):
 577             try:
 578                 import pty
 579                 master, slave = pty.openpty()
 580                 width = compat_get_terminal_size().columns
 581                 if width is None:
 582                     width_args = []
 583                 else:
 584                     width_args = ['-w', str(width)]
 585                 sp_kwargs = dict(
 586                     stdin=subprocess.PIPE,
 587                     stdout=slave,
 588                     stderr=self._err_file)
 589                 try:
 590                     self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
 591                 except OSError:
 592                     self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 593                 self._output_channel = os.fdopen(master, 'rb')
 594             except OSError as ose:
 595                 if ose.errno == errno.ENOENT:
 596                     self.report_warning(
 597                         'Could not find fribidi executable, ignoring --bidi-workaround. '
 598                         'Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 599                 else:
 600                     raise
 601
 602         if (sys.platform != 'win32'
 603                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 604                 and not params.get('restrictfilenames', False)):
 605             # Unicode filesystem API will throw errors (#1474, #13027)
 606             self.report_warning(
 607                 'Assuming --restrict-filenames since file system encoding '
 608                 'cannot encode all characters. '
 609                 'Set the LC_ALL environment variable to fix this.')
 610             self.params['restrictfilenames'] = True
 611
 612         self.outtmpl_dict = self.parse_outtmpl()
 613
 614         # Creating format selector here allows us to catch syntax errors before the extraction
 615         self.format_selector = (
 616             None if self.params.get('format') is None
 617             else self.build_format_selector(self.params['format']))
 618
 619         self._setup_opener()
 620
 621         if auto_init:
 622             if auto_init != 'no_verbose_header':
 623                 self.print_debug_header()
 624             self.add_default_info_extractors()
 625
 626         for pp_def_raw in self.params.get('postprocessors', []):
 627             pp_def = dict(pp_def_raw)
 628             when = pp_def.pop('when', 'post_process')
 629             pp_class = get_postprocessor(pp_def.pop('key'))
 630             pp = pp_class(self, **compat_kwargs(pp_def))
 631             self.add_post_processor(pp, when=when)
 632
 633         for ph in self.params.get('post_hooks', []):
 634             self.add_post_hook(ph)
 635
 636         for ph in self.params.get('progress_hooks', []):
 637             self.add_progress_hook(ph)
 638
 639         register_socks_protocols()
 640
 641         def preload_download_archive(fn):
 642             """Preload the archive, if any is specified"""
 643             if fn is None:
 644                 return False
 645             self.write_debug(f'Loading archive file {fn!r}')
 646             try:
 647                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 648                     for line in archive_file:
 649                         self.archive.add(line.strip())
 650             except IOError as ioe:
 651                 if ioe.errno != errno.ENOENT:
 652                     raise
 653                 return False
 654             return True
 655
 656         self.archive = set()
 657         preload_download_archive(self.params.get('download_archive'))
 658
 659     def warn_if_short_id(self, argv):
 660         # short YouTube ID starting with dash?
 661         idxs = [
 662             i for i, a in enumerate(argv)
 663             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 664         if idxs:
 665             correct_argv = (
 666                 ['yt-dlp']
 667                 + [a for i, a in enumerate(argv) if i not in idxs]
 668                 + ['--'] + [argv[i] for i in idxs]
 669             )
 670             self.report_warning(
 671                 'Long argument string detected. '
 672                 'Use -- to separate parameters and URLs, like this:\n%s' %
 673                 args_to_str(correct_argv))
 674
 675     def add_info_extractor(self, ie):
 676         """Add an InfoExtractor object to the end of the list."""
 677         ie_key = ie.ie_key()
 678         self._ies[ie_key] = ie
 679         if not isinstance(ie, type):
 680             self._ies_instances[ie_key] = ie
 681             ie.set_downloader(self)
 682
 683     def _get_info_extractor_class(self, ie_key):
 684         ie = self._ies.get(ie_key)
 685         if ie is None:
 686             ie = get_info_extractor(ie_key)
 687             self.add_info_extractor(ie)
 688         return ie
 689
 690     def get_info_extractor(self, ie_key):
 691         """
 692         Get an instance of an IE with name ie_key, it will try to get one from
 693         the _ies list, if there's no instance it will create a new one and add
 694         it to the extractor list.
 695         """
 696         ie = self._ies_instances.get(ie_key)
 697         if ie is None:
 698             ie = get_info_extractor(ie_key)()
 699             self.add_info_extractor(ie)
 700         return ie
 701
 702     def add_default_info_extractors(self):
 703         """
 704         Add the InfoExtractors returned by gen_extractors to the end of the list
 705         """
 706         for ie in gen_extractor_classes():
 707             self.add_info_extractor(ie)
 708
 709     def add_post_processor(self, pp, when='post_process'):
 710         """Add a PostProcessor object to the end of the chain."""
 711         self._pps[when].append(pp)
 712         pp.set_downloader(self)
 713
 714     def add_post_hook(self, ph):
 715         """Add the post hook"""
 716         self._post_hooks.append(ph)
 717
 718     def add_progress_hook(self, ph):
 719         """Add the download progress hook"""
 720         self._progress_hooks.append(ph)
 721
 722     def add_postprocessor_hook(self, ph):
 723         """Add the postprocessing progress hook"""
 724         self._postprocessor_hooks.append(ph)
 725
 726     def _bidi_workaround(self, message):
 727         if not hasattr(self, '_output_channel'):
 728             return message
 729
 730         assert hasattr(self, '_output_process')
 731         assert isinstance(message, compat_str)
 732         line_count = message.count('\n') + 1
 733         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 734         self._output_process.stdin.flush()
 735         res = ''.join(self._output_channel.readline().decode('utf-8')
 736                       for _ in range(line_count))
 737         return res[:-len('\n')]
 738
 739     def _write_string(self, message, out=None, only_once=False):
 740         if only_once:
 741             if message in self._printed_messages:
 742                 return
 743             self._printed_messages.add(message)
 744         write_string(message, out=out, encoding=self.params.get('encoding'))
 745
 746     def to_stdout(self, message, skip_eol=False, quiet=False):
 747         """Print message to stdout"""
 748         if self.params.get('logger'):
 749             self.params['logger'].debug(message)
 750         elif not quiet or self.params.get('verbose'):
 751             self._write_string(
 752                 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 753                 self._err_file if quiet else self._screen_file)
 754
 755     def to_stderr(self, message, only_once=False):
 756         """Print message to stderr"""
 757         assert isinstance(message, compat_str)
 758         if self.params.get('logger'):
 759             self.params['logger'].error(message)
 760         else:
 761             self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
 762
 763     def to_console_title(self, message):
 764         if not self.params.get('consoletitle', False):
 765             return
 766         if compat_os_name == 'nt':
 767             if ctypes.windll.kernel32.GetConsoleWindow():
 768                 # c_wchar_p() might not be necessary if `message` is
 769                 # already of type unicode()
 770                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 771         elif 'TERM' in os.environ:
 772             self._write_string('\033]0;%s\007' % message, self._screen_file)
 773
 774     def save_console_title(self):
 775         if not self.params.get('consoletitle', False):
 776             return
 777         if self.params.get('simulate'):
 778             return
 779         if compat_os_name != 'nt' and 'TERM' in os.environ:
 780             # Save the title on stack
 781             self._write_string('\033[22;0t', self._screen_file)
 782
 783     def restore_console_title(self):
 784         if not self.params.get('consoletitle', False):
 785             return
 786         if self.params.get('simulate'):
 787             return
 788         if compat_os_name != 'nt' and 'TERM' in os.environ:
 789             # Restore the title from stack
 790             self._write_string('\033[23;0t', self._screen_file)
 791
 792     def __enter__(self):
 793         self.save_console_title()
 794         return self
 795
 796     def __exit__(self, *args):
 797         self.restore_console_title()
 798
 799         if self.params.get('cookiefile') is not None:
 800             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 801
 802     def trouble(self, message=None, tb=None):
 803         """Determine action to take when a download problem appears.
 804
 805         Depending on if the downloader has been configured to ignore
 806         download errors or not, this method may throw an exception or
 807         not when errors are found, after printing the message.
 808
 809         tb, if given, is additional traceback information.
 810         """
 811         if message is not None:
 812             self.to_stderr(message)
 813         if self.params.get('verbose'):
 814             if tb is None:
 815                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 816                     tb = ''
 817                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 818                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 819                     tb += encode_compat_str(traceback.format_exc())
 820                 else:
 821                     tb_data = traceback.format_list(traceback.extract_stack())
 822                     tb = ''.join(tb_data)
 823             if tb:
 824                 self.to_stderr(tb)
 825         if not self.params.get('ignoreerrors'):
 826             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 827                 exc_info = sys.exc_info()[1].exc_info
 828             else:
 829                 exc_info = sys.exc_info()
 830             raise DownloadError(message, exc_info)
 831         self._download_retcode = 1
 832
 833     def to_screen(self, message, skip_eol=False):
 834         """Print message to stdout if not in quiet mode"""
 835         self.to_stdout(
 836             message, skip_eol, quiet=self.params.get('quiet', False))
 837
 838     class Styles(Enum):
 839         HEADERS = 'yellow'
 840         EMPHASIS = 'blue'
 841         ID = 'green'
 842         DELIM = 'blue'
 843         ERROR = 'red'
 844         WARNING = 'yellow'
 845
 846     def __format_text(self, out, text, f, fallback=None, *, test_encoding=False):
 847         assert out in ('screen', 'err')
 848         if test_encoding:
 849             original_text = text
 850             handle = self._screen_file if out == 'screen' else self._err_file
 851             encoding = self.params.get('encoding') or getattr(handle, 'encoding', 'ascii')
 852             text = text.encode(encoding, 'ignore').decode(encoding)
 853             if fallback is not None and text != original_text:
 854                 text = fallback
 855         if isinstance(f, self.Styles):
 856             f = f._value_
 857         return format_text(text, f) if self._allow_colors[out] else text if fallback is None else fallback
 858
 859     def _format_screen(self, *args, **kwargs):
 860         return self.__format_text('screen', *args, **kwargs)
 861
 862     def _format_err(self, *args, **kwargs):
 863         return self.__format_text('err', *args, **kwargs)
 864
 865     def report_warning(self, message, only_once=False):
 866         '''
 867         Print the message to stderr, it will be prefixed with 'WARNING:'
 868         If stderr is a tty file the 'WARNING:' will be colored
 869         '''
 870         if self.params.get('logger') is not None:
 871             self.params['logger'].warning(message)
 872         else:
 873             if self.params.get('no_warnings'):
 874                 return
 875             self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
 876
 877     def report_error(self, message, tb=None):
 878         '''
 879         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 880         in red if stderr is a tty file.
 881         '''
 882         self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', tb)
 883
 884     def write_debug(self, message, only_once=False):
 885         '''Log debug message or Print message to stderr'''
 886         if not self.params.get('verbose', False):
 887             return
 888         message = '[debug] %s' % message
 889         if self.params.get('logger'):
 890             self.params['logger'].debug(message)
 891         else:
 892             self.to_stderr(message, only_once)
 893
 894     def report_file_already_downloaded(self, file_name):
 895         """Report file has already been fully downloaded."""
 896         try:
 897             self.to_screen('[download] %s has already been downloaded' % file_name)
 898         except UnicodeEncodeError:
 899             self.to_screen('[download] The file has already been downloaded')
 900
 901     def report_file_delete(self, file_name):
 902         """Report that existing file will be deleted."""
 903         try:
 904             self.to_screen('Deleting existing file %s' % file_name)
 905         except UnicodeEncodeError:
 906             self.to_screen('Deleting existing file')
 907
 908     def raise_no_formats(self, info, forced=False):
 909         has_drm = info.get('__has_drm')
 910         msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
 911         expected = self.params.get('ignore_no_formats_error')
 912         if forced or not expected:
 913             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
 914                                  expected=has_drm or expected)
 915         else:
 916             self.report_warning(msg)
 917
 918     def parse_outtmpl(self):
 919         outtmpl_dict = self.params.get('outtmpl', {})
 920         if not isinstance(outtmpl_dict, dict):
 921             outtmpl_dict = {'default': outtmpl_dict}
 922         # Remove spaces in the default template
 923         if self.params.get('restrictfilenames'):
 924             sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
 925         else:
 926             sanitize = lambda x: x
 927         outtmpl_dict.update({
 928             k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
 929             if outtmpl_dict.get(k) is None})
 930         for key, val in outtmpl_dict.items():
 931             if isinstance(val, bytes):
 932                 self.report_warning(
 933                     'Parameter outtmpl is bytes, but should be a unicode string. '
 934                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 935         return outtmpl_dict
 936
 937     def get_output_path(self, dir_type='', filename=None):
 938         paths = self.params.get('paths', {})
 939         assert isinstance(paths, dict)
 940         path = os.path.join(
 941             expand_path(paths.get('home', '').strip()),
 942             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
 943             filename or '')
 944
 945         # Temporary fix for #4787
 946         # 'Treat' all problem characters by passing filename through preferredencoding
 947         # to workaround encoding issues with subprocess on python2 @ Windows
 948         if sys.version_info < (3, 0) and sys.platform == 'win32':
 949             path = encodeFilename(path, True).decode(preferredencoding())
 950         return sanitize_path(path, force=self.params.get('windowsfilenames'))
 951
 952     @staticmethod
 953     def _outtmpl_expandpath(outtmpl):
 954         # expand_path translates '%%' into '%' and '$$' into '$'
 955         # correspondingly that is not what we want since we need to keep
 956         # '%%' intact for template dict substitution step. Working around
 957         # with boundary-alike separator hack.
 958         sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 959         outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 960
 961         # outtmpl should be expand_path'ed before template dict substitution
 962         # because meta fields may contain env variables we don't want to
 963         # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 964         # title "Hello $PATH", we don't want `$PATH` to be expanded.
 965         return expand_path(outtmpl).replace(sep, '')
 966
 967     @staticmethod
 968     def escape_outtmpl(outtmpl):
 969         ''' Escape any remaining strings like %s, %abc% etc. '''
 970         return re.sub(
 971             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
 972             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
 973             outtmpl)
 974
 975     @classmethod
 976     def validate_outtmpl(cls, outtmpl):
 977         ''' @return None or Exception object '''
 978         outtmpl = re.sub(
 979             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBU]'),
 980             lambda mobj: f'{mobj.group(0)[:-1]}s',
 981             cls._outtmpl_expandpath(outtmpl))
 982         try:
 983             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
 984             return None
 985         except ValueError as err:
 986             return err
 987
 988     @staticmethod
 989     def _copy_infodict(info_dict):
 990         info_dict = dict(info_dict)
 991         for key in ('__original_infodict', '__postprocessors'):
 992             info_dict.pop(key, None)
 993         return info_dict
 994
 995     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
 996         """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict """
 997         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
 998
 999         info_dict = self._copy_infodict(info_dict)
1000         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1001             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1002             if info_dict.get('duration', None) is not None
1003             else None)
1004         info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
1005         if info_dict.get('resolution') is None:
1006             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1007
1008         # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1009         # of %(field)s to %(field)0Nd for backward compatibility
1010         field_size_compat_map = {
1011             'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0),
1012             'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1013             'autonumber': self.params.get('autonumber_size') or 5,
1014         }
1015
1016         TMPL_DICT = {}
1017         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBU]'))
1018         MATH_FUNCTIONS = {
1019             '+': float.__add__,
1020             '-': float.__sub__,
1021         }
1022         # Field is of the form key1.key2...
1023         # where keys (except first) can be string, int or slice
1024         FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
1025         MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
1026         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1027         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
1028             (?P<negate>-)?
1029             (?P<fields>{field})
1030             (?P<maths>(?:{math_op}{math_field})*)
1031             (?:>(?P<strf_format>.+?))?
1032             (?P<alternate>(?<!\\),[^|)]+)?
1033             (?:\|(?P<default>.*?))?
1034             $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
1035
1036         def _traverse_infodict(k):
1037             k = k.split('.')
1038             if k[0] == '':
1039                 k.pop(0)
1040             return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
1041
1042         def get_value(mdict):
1043             # Object traversal
1044             value = _traverse_infodict(mdict['fields'])
1045             # Negative
1046             if mdict['negate']:
1047                 value = float_or_none(value)
1048                 if value is not None:
1049                     value *= -1
1050             # Do maths
1051             offset_key = mdict['maths']
1052             if offset_key:
1053                 value = float_or_none(value)
1054                 operator = None
1055                 while offset_key:
1056                     item = re.match(
1057                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1058                         offset_key).group(0)
1059                     offset_key = offset_key[len(item):]
1060                     if operator is None:
1061                         operator = MATH_FUNCTIONS[item]
1062                         continue
1063                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1064                     offset = float_or_none(item)
1065                     if offset is None:
1066                         offset = float_or_none(_traverse_infodict(item))
1067                     try:
1068                         value = operator(value, multiplier * offset)
1069                     except (TypeError, ZeroDivisionError):
1070                         return None
1071                     operator = None
1072             # Datetime formatting
1073             if mdict['strf_format']:
1074                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1075
1076             return value
1077
1078         na = self.params.get('outtmpl_na_placeholder', 'NA')
1079
1080         def _dumpjson_default(obj):
1081             if isinstance(obj, (set, LazyList)):
1082                 return list(obj)
1083             raise TypeError(f'Object of type {type(obj).__name__} is not JSON serializable')
1084
1085         def create_key(outer_mobj):
1086             if not outer_mobj.group('has_key'):
1087                 return outer_mobj.group(0)
1088             key = outer_mobj.group('key')
1089             mobj = re.match(INTERNAL_FORMAT_RE, key)
1090             initial_field = mobj.group('fields').split('.')[-1] if mobj else ''
1091             value, default = None, na
1092             while mobj:
1093                 mobj = mobj.groupdict()
1094                 default = mobj['default'] if mobj['default'] is not None else default
1095                 value = get_value(mobj)
1096                 if value is None and mobj['alternate']:
1097                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
1098                 else:
1099                     break
1100
1101             fmt = outer_mobj.group('format')
1102             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1103                 fmt = '0{:d}d'.format(field_size_compat_map[key])
1104
1105             value = default if value is None else value
1106
1107             str_fmt = f'{fmt[:-1]}s'
1108             if fmt[-1] == 'l':  # list
1109                 delim = '\n' if '#' in (outer_mobj.group('conversion') or '') else ', '
1110                 value, fmt = delim.join(variadic(value)), str_fmt
1111             elif fmt[-1] == 'j':  # json
1112                 value, fmt = json.dumps(value, default=_dumpjson_default), str_fmt
1113             elif fmt[-1] == 'q':  # quoted
1114                 value, fmt = compat_shlex_quote(str(value)), str_fmt
1115             elif fmt[-1] == 'B':  # bytes
1116                 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1117                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1118             elif fmt[-1] == 'U':  # unicode normalized
1119                 opts = outer_mobj.group('conversion') or ''
1120                 value, fmt = unicodedata.normalize(
1121                     # "+" = compatibility equivalence, "#" = NFD
1122                     'NF%s%s' % ('K' if '+' in opts else '', 'D' if '#' in opts else 'C'),
1123                     value), str_fmt
1124             elif fmt[-1] == 'c':
1125                 if value:
1126                     value = str(value)[0]
1127                 else:
1128                     fmt = str_fmt
1129             elif fmt[-1] not in 'rs':  # numeric
1130                 value = float_or_none(value)
1131                 if value is None:
1132                     value, fmt = default, 's'
1133
1134             if sanitize:
1135                 if fmt[-1] == 'r':
1136                     # If value is an object, sanitize might convert it to a string
1137                     # So we convert it to repr first
1138                     value, fmt = repr(value), str_fmt
1139                 if fmt[-1] in 'csr':
1140                     value = sanitize(initial_field, value)
1141
1142             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1143             TMPL_DICT[key] = value
1144             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1145
1146         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1147
1148     def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1149         outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1150         return self.escape_outtmpl(outtmpl) % info_dict
1151
1152     def _prepare_filename(self, info_dict, tmpl_type='default'):
1153         try:
1154             sanitize = lambda k, v: sanitize_filename(
1155                 compat_str(v),
1156                 restricted=self.params.get('restrictfilenames'),
1157                 is_id=(k == 'id' or k.endswith('_id')))
1158             outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
1159             filename = self.evaluate_outtmpl(outtmpl, info_dict, sanitize)
1160
1161             force_ext = OUTTMPL_TYPES.get(tmpl_type)
1162             if filename and force_ext is not None:
1163                 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1164
1165             # https://github.com/blackjack4494/youtube-dlc/issues/85
1166             trim_file_name = self.params.get('trim_file_name', False)
1167             if trim_file_name:
1168                 fn_groups = filename.rsplit('.')
1169                 ext = fn_groups[-1]
1170                 sub_ext = ''
1171                 if len(fn_groups) > 2:
1172                     sub_ext = fn_groups[-2]
1173                 filename = join_nonempty(fn_groups[0][:trim_file_name], sub_ext, ext, delim='.')
1174
1175             return filename
1176         except ValueError as err:
1177             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1178             return None
1179
1180     def prepare_filename(self, info_dict, dir_type='', warn=False):
1181         """Generate the output filename."""
1182
1183         filename = self._prepare_filename(info_dict, dir_type or 'default')
1184         if not filename and dir_type not in ('', 'temp'):
1185             return ''
1186
1187         if warn:
1188             if not self.params.get('paths'):
1189                 pass
1190             elif filename == '-':
1191                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1192             elif os.path.isabs(filename):
1193                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1194         if filename == '-' or not filename:
1195             return filename
1196
1197         return self.get_output_path(dir_type, filename)
1198
1199     def _match_entry(self, info_dict, incomplete=False, silent=False):
1200         """ Returns None if the file should be downloaded """
1201
1202         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1203
1204         def check_filter():
1205             if 'title' in info_dict:
1206                 # This can happen when we're just evaluating the playlist
1207                 title = info_dict['title']
1208                 matchtitle = self.params.get('matchtitle', False)
1209                 if matchtitle:
1210                     if not re.search(matchtitle, title, re.IGNORECASE):
1211                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1212                 rejecttitle = self.params.get('rejecttitle', False)
1213                 if rejecttitle:
1214                     if re.search(rejecttitle, title, re.IGNORECASE):
1215                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1216             date = info_dict.get('upload_date')
1217             if date is not None:
1218                 dateRange = self.params.get('daterange', DateRange())
1219                 if date not in dateRange:
1220                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1221             view_count = info_dict.get('view_count')
1222             if view_count is not None:
1223                 min_views = self.params.get('min_views')
1224                 if min_views is not None and view_count < min_views:
1225                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1226                 max_views = self.params.get('max_views')
1227                 if max_views is not None and view_count > max_views:
1228                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1229             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1230                 return 'Skipping "%s" because it is age restricted' % video_title
1231
1232             match_filter = self.params.get('match_filter')
1233             if match_filter is not None:
1234                 try:
1235                     ret = match_filter(info_dict, incomplete=incomplete)
1236                 except TypeError:
1237                     # For backward compatibility
1238                     ret = None if incomplete else match_filter(info_dict)
1239                 if ret is not None:
1240                     return ret
1241             return None
1242
1243         if self.in_download_archive(info_dict):
1244             reason = '%s has already been recorded in the archive' % video_title
1245             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1246         else:
1247             reason = check_filter()
1248             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1249         if reason is not None:
1250             if not silent:
1251                 self.to_screen('[download] ' + reason)
1252             if self.params.get(break_opt, False):
1253                 raise break_err()
1254         return reason
1255
1256     @staticmethod
1257     def add_extra_info(info_dict, extra_info):
1258         '''Set the keys from extra_info in info dict if they are missing'''
1259         for key, value in extra_info.items():
1260             info_dict.setdefault(key, value)
1261
1262     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1263                      process=True, force_generic_extractor=False):
1264         """
1265         Return a list with a dictionary for each video extracted.
1266
1267         Arguments:
1268         url -- URL to extract
1269
1270         Keyword arguments:
1271         download -- whether to download videos during extraction
1272         ie_key -- extractor key hint
1273         extra_info -- dictionary containing the extra values to add to each result
1274         process -- whether to resolve all unresolved references (URLs, playlist items),
1275             must be True for download to work.
1276         force_generic_extractor -- force using the generic extractor
1277         """
1278
1279         if extra_info is None:
1280             extra_info = {}
1281
1282         if not ie_key and force_generic_extractor:
1283             ie_key = 'Generic'
1284
1285         if ie_key:
1286             ies = {ie_key: self._get_info_extractor_class(ie_key)}
1287         else:
1288             ies = self._ies
1289
1290         for ie_key, ie in ies.items():
1291             if not ie.suitable(url):
1292                 continue
1293
1294             if not ie.working():
1295                 self.report_warning('The program functionality for this site has been marked as broken, '
1296                                     'and will probably not work.')
1297
1298             temp_id = ie.get_temp_id(url)
1299             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1300                 self.to_screen("[%s] %s: has already been recorded in archive" % (
1301                                ie_key, temp_id))
1302                 break
1303             return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1304         else:
1305             self.report_error('no suitable InfoExtractor for URL %s' % url)
1306
1307     def __handle_extraction_exceptions(func):
1308         @functools.wraps(func)
1309         def wrapper(self, *args, **kwargs):
1310             try:
1311                 return func(self, *args, **kwargs)
1312             except GeoRestrictedError as e:
1313                 msg = e.msg
1314                 if e.countries:
1315                     msg += '\nThis video is available in %s.' % ', '.join(
1316                         map(ISO3166Utils.short2full, e.countries))
1317                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1318                 self.report_error(msg)
1319             except ExtractorError as e:  # An error we somewhat expected
1320                 self.report_error(compat_str(e), e.format_traceback())
1321             except ThrottledDownload:
1322                 self.to_stderr('\r')
1323                 self.report_warning('The download speed is below throttle limit. Re-extracting data')
1324                 return wrapper(self, *args, **kwargs)
1325             except (DownloadCancelled, LazyList.IndexError):
1326                 raise
1327             except Exception as e:
1328                 if self.params.get('ignoreerrors'):
1329                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
1330                 else:
1331                     raise
1332         return wrapper
1333
1334     @__handle_extraction_exceptions
1335     def __extract_info(self, url, ie, download, extra_info, process):
1336         ie_result = ie.extract(url)
1337         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1338             return
1339         if isinstance(ie_result, list):
1340             # Backwards compatibility: old IE result format
1341             ie_result = {
1342                 '_type': 'compat_list',
1343                 'entries': ie_result,
1344             }
1345         if extra_info.get('original_url'):
1346             ie_result.setdefault('original_url', extra_info['original_url'])
1347         self.add_default_extra_info(ie_result, ie, url)
1348         if process:
1349             return self.process_ie_result(ie_result, download, extra_info)
1350         else:
1351             return ie_result
1352
1353     def add_default_extra_info(self, ie_result, ie, url):
1354         if url is not None:
1355             self.add_extra_info(ie_result, {
1356                 'webpage_url': url,
1357                 'original_url': url,
1358                 'webpage_url_basename': url_basename(url),
1359             })
1360         if ie is not None:
1361             self.add_extra_info(ie_result, {
1362                 'extractor': ie.IE_NAME,
1363                 'extractor_key': ie.ie_key(),
1364             })
1365
1366     def process_ie_result(self, ie_result, download=True, extra_info=None):
1367         """
1368         Take the result of the ie(may be modified) and resolve all unresolved
1369         references (URLs, playlist items).
1370
1371         It will also download the videos if 'download'.
1372         Returns the resolved ie_result.
1373         """
1374         if extra_info is None:
1375             extra_info = {}
1376         result_type = ie_result.get('_type', 'video')
1377
1378         if result_type in ('url', 'url_transparent'):
1379             ie_result['url'] = sanitize_url(ie_result['url'])
1380             if ie_result.get('original_url'):
1381                 extra_info.setdefault('original_url', ie_result['original_url'])
1382
1383             extract_flat = self.params.get('extract_flat', False)
1384             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1385                     or extract_flat is True):
1386                 info_copy = ie_result.copy()
1387                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1388                 if ie and not ie_result.get('id'):
1389                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1390                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1391                 self.add_extra_info(info_copy, extra_info)
1392                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1393                 if self.params.get('force_write_download_archive', False):
1394                     self.record_download_archive(info_copy)
1395                 return ie_result
1396
1397         if result_type == 'video':
1398             self.add_extra_info(ie_result, extra_info)
1399             ie_result = self.process_video_result(ie_result, download=download)
1400             additional_urls = (ie_result or {}).get('additional_urls')
1401             if additional_urls:
1402                 # TODO: Improve MetadataParserPP to allow setting a list
1403                 if isinstance(additional_urls, compat_str):
1404                     additional_urls = [additional_urls]
1405                 self.to_screen(
1406                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1407                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1408                 ie_result['additional_entries'] = [
1409                     self.extract_info(
1410                         url, download, extra_info,
1411                         force_generic_extractor=self.params.get('force_generic_extractor'))
1412                     for url in additional_urls
1413                 ]
1414             return ie_result
1415         elif result_type == 'url':
1416             # We have to add extra_info to the results because it may be
1417             # contained in a playlist
1418             return self.extract_info(
1419                 ie_result['url'], download,
1420                 ie_key=ie_result.get('ie_key'),
1421                 extra_info=extra_info)
1422         elif result_type == 'url_transparent':
1423             # Use the information from the embedding page
1424             info = self.extract_info(
1425                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1426                 extra_info=extra_info, download=False, process=False)
1427
1428             # extract_info may return None when ignoreerrors is enabled and
1429             # extraction failed with an error, don't crash and return early
1430             # in this case
1431             if not info:
1432                 return info
1433
1434             force_properties = dict(
1435                 (k, v) for k, v in ie_result.items() if v is not None)
1436             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1437                 if f in force_properties:
1438                     del force_properties[f]
1439             new_result = info.copy()
1440             new_result.update(force_properties)
1441
1442             # Extracted info may not be a video result (i.e.
1443             # info.get('_type', 'video') != video) but rather an url or
1444             # url_transparent. In such cases outer metadata (from ie_result)
1445             # should be propagated to inner one (info). For this to happen
1446             # _type of info should be overridden with url_transparent. This
1447             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1448             if new_result.get('_type') == 'url':
1449                 new_result['_type'] = 'url_transparent'
1450
1451             return self.process_ie_result(
1452                 new_result, download=download, extra_info=extra_info)
1453         elif result_type in ('playlist', 'multi_video'):
1454             # Protect from infinite recursion due to recursively nested playlists
1455             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1456             webpage_url = ie_result['webpage_url']
1457             if webpage_url in self._playlist_urls:
1458                 self.to_screen(
1459                     '[download] Skipping already downloaded playlist: %s'
1460                     % ie_result.get('title') or ie_result.get('id'))
1461                 return
1462
1463             self._playlist_level += 1
1464             self._playlist_urls.add(webpage_url)
1465             self._sanitize_thumbnails(ie_result)
1466             try:
1467                 return self.__process_playlist(ie_result, download)
1468             finally:
1469                 self._playlist_level -= 1
1470                 if not self._playlist_level:
1471                     self._playlist_urls.clear()
1472         elif result_type == 'compat_list':
1473             self.report_warning(
1474                 'Extractor %s returned a compat_list result. '
1475                 'It needs to be updated.' % ie_result.get('extractor'))
1476
1477             def _fixup(r):
1478                 self.add_extra_info(r, {
1479                     'extractor': ie_result['extractor'],
1480                     'webpage_url': ie_result['webpage_url'],
1481                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1482                     'extractor_key': ie_result['extractor_key'],
1483                 })
1484                 return r
1485             ie_result['entries'] = [
1486                 self.process_ie_result(_fixup(r), download, extra_info)
1487                 for r in ie_result['entries']
1488             ]
1489             return ie_result
1490         else:
1491             raise Exception('Invalid result type: %s' % result_type)
1492
1493     def _ensure_dir_exists(self, path):
1494         return make_dir(path, self.report_error)
1495
1496     def __process_playlist(self, ie_result, download):
1497         # We process each entry in the playlist
1498         playlist = ie_result.get('title') or ie_result.get('id')
1499         self.to_screen('[download] Downloading playlist: %s' % playlist)
1500
1501         if 'entries' not in ie_result:
1502             raise EntryNotInPlaylist()
1503         incomplete_entries = bool(ie_result.get('requested_entries'))
1504         if incomplete_entries:
1505             def fill_missing_entries(entries, indexes):
1506                 ret = [None] * max(*indexes)
1507                 for i, entry in zip(indexes, entries):
1508                     ret[i - 1] = entry
1509                 return ret
1510             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1511
1512         playlist_results = []
1513
1514         playliststart = self.params.get('playliststart', 1)
1515         playlistend = self.params.get('playlistend')
1516         # For backwards compatibility, interpret -1 as whole list
1517         if playlistend == -1:
1518             playlistend = None
1519
1520         playlistitems_str = self.params.get('playlist_items')
1521         playlistitems = None
1522         if playlistitems_str is not None:
1523             def iter_playlistitems(format):
1524                 for string_segment in format.split(','):
1525                     if '-' in string_segment:
1526                         start, end = string_segment.split('-')
1527                         for item in range(int(start), int(end) + 1):
1528                             yield int(item)
1529                     else:
1530                         yield int(string_segment)
1531             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1532
1533         ie_entries = ie_result['entries']
1534         msg = (
1535             'Downloading %d videos' if not isinstance(ie_entries, list)
1536             else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
1537
1538         if isinstance(ie_entries, list):
1539             def get_entry(i):
1540                 return ie_entries[i - 1]
1541         else:
1542             if not isinstance(ie_entries, PagedList):
1543                 ie_entries = LazyList(ie_entries)
1544
1545             def get_entry(i):
1546                 return YoutubeDL.__handle_extraction_exceptions(
1547                     lambda self, i: ie_entries[i - 1]
1548                 )(self, i)
1549
1550         entries = []
1551         items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1552         for i in items:
1553             if i == 0:
1554                 continue
1555             if playlistitems is None and playlistend is not None and playlistend < i:
1556                 break
1557             entry = None
1558             try:
1559                 entry = get_entry(i)
1560                 if entry is None:
1561                     raise EntryNotInPlaylist()
1562             except (IndexError, EntryNotInPlaylist):
1563                 if incomplete_entries:
1564                     raise EntryNotInPlaylist()
1565                 elif not playlistitems:
1566                     break
1567             entries.append(entry)
1568             try:
1569                 if entry is not None:
1570                     self._match_entry(entry, incomplete=True, silent=True)
1571             except (ExistingVideoReached, RejectedVideoReached):
1572                 break
1573         ie_result['entries'] = entries
1574
1575         # Save playlist_index before re-ordering
1576         entries = [
1577             ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
1578             for i, entry in enumerate(entries, 1)
1579             if entry is not None]
1580         n_entries = len(entries)
1581
1582         if not playlistitems and (playliststart or playlistend):
1583             playlistitems = list(range(playliststart, playliststart + n_entries))
1584         ie_result['requested_entries'] = playlistitems
1585
1586         if not self.params.get('simulate') and self.params.get('allow_playlist_files', True):
1587             ie_copy = {
1588                 'playlist': playlist,
1589                 'playlist_id': ie_result.get('id'),
1590                 'playlist_title': ie_result.get('title'),
1591                 'playlist_uploader': ie_result.get('uploader'),
1592                 'playlist_uploader_id': ie_result.get('uploader_id'),
1593                 'playlist_index': 0,
1594                 'n_entries': n_entries,
1595             }
1596             ie_copy.update(dict(ie_result))
1597
1598             if self._write_info_json('playlist', ie_result,
1599                                      self.prepare_filename(ie_copy, 'pl_infojson')) is None:
1600                 return
1601             if self._write_description('playlist', ie_result,
1602                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1603                 return
1604             # TODO: This should be passed to ThumbnailsConvertor if necessary
1605             self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1606
1607         if self.params.get('playlistreverse', False):
1608             entries = entries[::-1]
1609         if self.params.get('playlistrandom', False):
1610             random.shuffle(entries)
1611
1612         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1613
1614         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1615         failures = 0
1616         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1617         for i, entry_tuple in enumerate(entries, 1):
1618             playlist_index, entry = entry_tuple
1619             if 'playlist-index' in self.params.get('compat_opts', []):
1620                 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
1621             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1622             # This __x_forwarded_for_ip thing is a bit ugly but requires
1623             # minimal changes
1624             if x_forwarded_for:
1625                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1626             extra = {
1627                 'n_entries': n_entries,
1628                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1629                 'playlist_index': playlist_index,
1630                 'playlist_autonumber': i,
1631                 'playlist': playlist,
1632                 'playlist_id': ie_result.get('id'),
1633                 'playlist_title': ie_result.get('title'),
1634                 'playlist_uploader': ie_result.get('uploader'),
1635                 'playlist_uploader_id': ie_result.get('uploader_id'),
1636                 'extractor': ie_result['extractor'],
1637                 'webpage_url': ie_result['webpage_url'],
1638                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1639                 'extractor_key': ie_result['extractor_key'],
1640             }
1641
1642             if self._match_entry(entry, incomplete=True) is not None:
1643                 continue
1644
1645             entry_result = self.__process_iterable_entry(entry, download, extra)
1646             if not entry_result:
1647                 failures += 1
1648             if failures >= max_failures:
1649                 self.report_error(
1650                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1651                 break
1652             # TODO: skip failed (empty) entries?
1653             playlist_results.append(entry_result)
1654         ie_result['entries'] = playlist_results
1655         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1656         return ie_result
1657
1658     @__handle_extraction_exceptions
1659     def __process_iterable_entry(self, entry, download, extra_info):
1660         return self.process_ie_result(
1661             entry, download=download, extra_info=extra_info)
1662
1663     def _build_format_filter(self, filter_spec):
1664         " Returns a function to filter the formats according to the filter_spec "
1665
1666         OPERATORS = {
1667             '<': operator.lt,
1668             '<=': operator.le,
1669             '>': operator.gt,
1670             '>=': operator.ge,
1671             '=': operator.eq,
1672             '!=': operator.ne,
1673         }
1674         operator_rex = re.compile(r'''(?x)\s*
1675             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1676             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1677             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1678             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1679         m = operator_rex.fullmatch(filter_spec)
1680         if m:
1681             try:
1682                 comparison_value = int(m.group('value'))
1683             except ValueError:
1684                 comparison_value = parse_filesize(m.group('value'))
1685                 if comparison_value is None:
1686                     comparison_value = parse_filesize(m.group('value') + 'B')
1687                 if comparison_value is None:
1688                     raise ValueError(
1689                         'Invalid value %r in format specification %r' % (
1690                             m.group('value'), filter_spec))
1691             op = OPERATORS[m.group('op')]
1692
1693         if not m:
1694             STR_OPERATORS = {
1695                 '=': operator.eq,
1696                 '^=': lambda attr, value: attr.startswith(value),
1697                 '$=': lambda attr, value: attr.endswith(value),
1698                 '*=': lambda attr, value: value in attr,
1699             }
1700             str_operator_rex = re.compile(r'''(?x)\s*
1701                 (?P<key>[a-zA-Z0-9._-]+)\s*
1702                 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1703                 (?P<value>[a-zA-Z0-9._-]+)\s*
1704                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1705             m = str_operator_rex.fullmatch(filter_spec)
1706             if m:
1707                 comparison_value = m.group('value')
1708                 str_op = STR_OPERATORS[m.group('op')]
1709                 if m.group('negation'):
1710                     op = lambda attr, value: not str_op(attr, value)
1711                 else:
1712                     op = str_op
1713
1714         if not m:
1715             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1716
1717         def _filter(f):
1718             actual_value = f.get(m.group('key'))
1719             if actual_value is None:
1720                 return m.group('none_inclusive')
1721             return op(actual_value, comparison_value)
1722         return _filter
1723
1724     def _check_formats(self, formats):
1725         for f in formats:
1726             self.to_screen('[info] Testing format %s' % f['format_id'])
1727             temp_file = tempfile.NamedTemporaryFile(
1728                 suffix='.tmp', delete=False,
1729                 dir=self.get_output_path('temp') or None)
1730             temp_file.close()
1731             try:
1732                 success, _ = self.dl(temp_file.name, f, test=True)
1733             except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1734                 success = False
1735             finally:
1736                 if os.path.exists(temp_file.name):
1737                     try:
1738                         os.remove(temp_file.name)
1739                     except OSError:
1740                         self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1741             if success:
1742                 yield f
1743             else:
1744                 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1745
1746     def _default_format_spec(self, info_dict, download=True):
1747
1748         def can_merge():
1749             merger = FFmpegMergerPP(self)
1750             return merger.available and merger.can_merge()
1751
1752         prefer_best = (
1753             not self.params.get('simulate')
1754             and download
1755             and (
1756                 not can_merge()
1757                 or info_dict.get('is_live', False)
1758                 or self.outtmpl_dict['default'] == '-'))
1759         compat = (
1760             prefer_best
1761             or self.params.get('allow_multiple_audio_streams', False)
1762             or 'format-spec' in self.params.get('compat_opts', []))
1763
1764         return (
1765             'best/bestvideo+bestaudio' if prefer_best
1766             else 'bestvideo*+bestaudio/best' if not compat
1767             else 'bestvideo+bestaudio/best')
1768
1769     def build_format_selector(self, format_spec):
1770         def syntax_error(note, start):
1771             message = (
1772                 'Invalid format specification: '
1773                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1774             return SyntaxError(message)
1775
1776         PICKFIRST = 'PICKFIRST'
1777         MERGE = 'MERGE'
1778         SINGLE = 'SINGLE'
1779         GROUP = 'GROUP'
1780         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1781
1782         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1783                                   'video': self.params.get('allow_multiple_video_streams', False)}
1784
1785         check_formats = self.params.get('check_formats') == 'selected'
1786
1787         def _parse_filter(tokens):
1788             filter_parts = []
1789             for type, string, start, _, _ in tokens:
1790                 if type == tokenize.OP and string == ']':
1791                     return ''.join(filter_parts)
1792                 else:
1793                     filter_parts.append(string)
1794
1795         def _remove_unused_ops(tokens):
1796             # Remove operators that we don't use and join them with the surrounding strings
1797             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1798             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1799             last_string, last_start, last_end, last_line = None, None, None, None
1800             for type, string, start, end, line in tokens:
1801                 if type == tokenize.OP and string == '[':
1802                     if last_string:
1803                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1804                         last_string = None
1805                     yield type, string, start, end, line
1806                     # everything inside brackets will be handled by _parse_filter
1807                     for type, string, start, end, line in tokens:
1808                         yield type, string, start, end, line
1809                         if type == tokenize.OP and string == ']':
1810                             break
1811                 elif type == tokenize.OP and string in ALLOWED_OPS:
1812                     if last_string:
1813                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1814                         last_string = None
1815                     yield type, string, start, end, line
1816                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1817                     if not last_string:
1818                         last_string = string
1819                         last_start = start
1820                         last_end = end
1821                     else:
1822                         last_string += string
1823             if last_string:
1824                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1825
1826         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1827             selectors = []
1828             current_selector = None
1829             for type, string, start, _, _ in tokens:
1830                 # ENCODING is only defined in python 3.x
1831                 if type == getattr(tokenize, 'ENCODING', None):
1832                     continue
1833                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1834                     current_selector = FormatSelector(SINGLE, string, [])
1835                 elif type == tokenize.OP:
1836                     if string == ')':
1837                         if not inside_group:
1838                             # ')' will be handled by the parentheses group
1839                             tokens.restore_last_token()
1840                         break
1841                     elif inside_merge and string in ['/', ',']:
1842                         tokens.restore_last_token()
1843                         break
1844                     elif inside_choice and string == ',':
1845                         tokens.restore_last_token()
1846                         break
1847                     elif string == ',':
1848                         if not current_selector:
1849                             raise syntax_error('"," must follow a format selector', start)
1850                         selectors.append(current_selector)
1851                         current_selector = None
1852                     elif string == '/':
1853                         if not current_selector:
1854                             raise syntax_error('"/" must follow a format selector', start)
1855                         first_choice = current_selector
1856                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1857                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1858                     elif string == '[':
1859                         if not current_selector:
1860                             current_selector = FormatSelector(SINGLE, 'best', [])
1861                         format_filter = _parse_filter(tokens)
1862                         current_selector.filters.append(format_filter)
1863                     elif string == '(':
1864                         if current_selector:
1865                             raise syntax_error('Unexpected "("', start)
1866                         group = _parse_format_selection(tokens, inside_group=True)
1867                         current_selector = FormatSelector(GROUP, group, [])
1868                     elif string == '+':
1869                         if not current_selector:
1870                             raise syntax_error('Unexpected "+"', start)
1871                         selector_1 = current_selector
1872                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1873                         if not selector_2:
1874                             raise syntax_error('Expected a selector', start)
1875                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1876                     else:
1877                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1878                 elif type == tokenize.ENDMARKER:
1879                     break
1880             if current_selector:
1881                 selectors.append(current_selector)
1882             return selectors
1883
1884         def _merge(formats_pair):
1885             format_1, format_2 = formats_pair
1886
1887             formats_info = []
1888             formats_info.extend(format_1.get('requested_formats', (format_1,)))
1889             formats_info.extend(format_2.get('requested_formats', (format_2,)))
1890
1891             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1892                 get_no_more = {'video': False, 'audio': False}
1893                 for (i, fmt_info) in enumerate(formats_info):
1894                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
1895                         formats_info.pop(i)
1896                         continue
1897                     for aud_vid in ['audio', 'video']:
1898                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1899                             if get_no_more[aud_vid]:
1900                                 formats_info.pop(i)
1901                                 break
1902                             get_no_more[aud_vid] = True
1903
1904             if len(formats_info) == 1:
1905                 return formats_info[0]
1906
1907             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1908             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1909
1910             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1911             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1912
1913             output_ext = self.params.get('merge_output_format')
1914             if not output_ext:
1915                 if the_only_video:
1916                     output_ext = the_only_video['ext']
1917                 elif the_only_audio and not video_fmts:
1918                     output_ext = the_only_audio['ext']
1919                 else:
1920                     output_ext = 'mkv'
1921
1922             filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
1923
1924             new_dict = {
1925                 'requested_formats': formats_info,
1926                 'format': '+'.join(filtered('format')),
1927                 'format_id': '+'.join(filtered('format_id')),
1928                 'ext': output_ext,
1929                 'protocol': '+'.join(map(determine_protocol, formats_info)),
1930                 'language': '+'.join(orderedSet(filtered('language'))),
1931                 'format_note': '+'.join(orderedSet(filtered('format_note'))),
1932                 'filesize_approx': sum(filtered('filesize', 'filesize_approx')),
1933                 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
1934             }
1935
1936             if the_only_video:
1937                 new_dict.update({
1938                     'width': the_only_video.get('width'),
1939                     'height': the_only_video.get('height'),
1940                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1941                     'fps': the_only_video.get('fps'),
1942                     'dynamic_range': the_only_video.get('dynamic_range'),
1943                     'vcodec': the_only_video.get('vcodec'),
1944                     'vbr': the_only_video.get('vbr'),
1945                     'stretched_ratio': the_only_video.get('stretched_ratio'),
1946                 })
1947
1948             if the_only_audio:
1949                 new_dict.update({
1950                     'acodec': the_only_audio.get('acodec'),
1951                     'abr': the_only_audio.get('abr'),
1952                     'asr': the_only_audio.get('asr'),
1953                 })
1954
1955             return new_dict
1956
1957         def _check_formats(formats):
1958             if not check_formats:
1959                 yield from formats
1960                 return
1961             yield from self._check_formats(formats)
1962
1963         def _build_selector_function(selector):
1964             if isinstance(selector, list):  # ,
1965                 fs = [_build_selector_function(s) for s in selector]
1966
1967                 def selector_function(ctx):
1968                     for f in fs:
1969                         yield from f(ctx)
1970                 return selector_function
1971
1972             elif selector.type == GROUP:  # ()
1973                 selector_function = _build_selector_function(selector.selector)
1974
1975             elif selector.type == PICKFIRST:  # /
1976                 fs = [_build_selector_function(s) for s in selector.selector]
1977
1978                 def selector_function(ctx):
1979                     for f in fs:
1980                         picked_formats = list(f(ctx))
1981                         if picked_formats:
1982                             return picked_formats
1983                     return []
1984
1985             elif selector.type == MERGE:  # +
1986                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1987
1988                 def selector_function(ctx):
1989                     for pair in itertools.product(
1990                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1991                         yield _merge(pair)
1992
1993             elif selector.type == SINGLE:  # atom
1994                 format_spec = selector.selector or 'best'
1995
1996                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
1997                 if format_spec == 'all':
1998                     def selector_function(ctx):
1999                         yield from _check_formats(ctx['formats'])
2000                 elif format_spec == 'mergeall':
2001                     def selector_function(ctx):
2002                         formats = list(_check_formats(ctx['formats']))
2003                         if not formats:
2004                             return
2005                         merged_format = formats[-1]
2006                         for f in formats[-2::-1]:
2007                             merged_format = _merge((merged_format, f))
2008                         yield merged_format
2009
2010                 else:
2011                     format_fallback, format_reverse, format_idx = False, True, 1
2012                     mobj = re.match(
2013                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2014                         format_spec)
2015                     if mobj is not None:
2016                         format_idx = int_or_none(mobj.group('n'), default=1)
2017                         format_reverse = mobj.group('bw')[0] == 'b'
2018                         format_type = (mobj.group('type') or [None])[0]
2019                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2020                         format_modified = mobj.group('mod') is not None
2021
2022                         format_fallback = not format_type and not format_modified  # for b, w
2023                         _filter_f = (
2024                             (lambda f: f.get('%scodec' % format_type) != 'none')
2025                             if format_type and format_modified  # bv*, ba*, wv*, wa*
2026                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2027                             if format_type  # bv, ba, wv, wa
2028                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2029                             if not format_modified  # b, w
2030                             else lambda f: True)  # b*, w*
2031                         filter_f = lambda f: _filter_f(f) and (
2032                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2033                     else:
2034                         if format_spec in self._format_selection_exts['audio']:
2035                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2036                         elif format_spec in self._format_selection_exts['video']:
2037                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2038                         elif format_spec in self._format_selection_exts['storyboards']:
2039                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2040                         else:
2041                             filter_f = lambda f: f.get('format_id') == format_spec  # id
2042
2043                     def selector_function(ctx):
2044                         formats = list(ctx['formats'])
2045                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2046                         if format_fallback and ctx['incomplete_formats'] and not matches:
2047                             # for extractors with incomplete formats (audio only (soundcloud)
2048                             # or video only (imgur)) best/worst will fallback to
2049                             # best/worst {video,audio}-only format
2050                             matches = formats
2051                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2052                         try:
2053                             yield matches[format_idx - 1]
2054                         except IndexError:
2055                             return
2056
2057             filters = [self._build_format_filter(f) for f in selector.filters]
2058
2059             def final_selector(ctx):
2060                 ctx_copy = copy.deepcopy(ctx)
2061                 for _filter in filters:
2062                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2063                 return selector_function(ctx_copy)
2064             return final_selector
2065
2066         stream = io.BytesIO(format_spec.encode('utf-8'))
2067         try:
2068             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
2069         except tokenize.TokenError:
2070             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2071
2072         class TokenIterator(object):
2073             def __init__(self, tokens):
2074                 self.tokens = tokens
2075                 self.counter = 0
2076
2077             def __iter__(self):
2078                 return self
2079
2080             def __next__(self):
2081                 if self.counter >= len(self.tokens):
2082                     raise StopIteration()
2083                 value = self.tokens[self.counter]
2084                 self.counter += 1
2085                 return value
2086
2087             next = __next__
2088
2089             def restore_last_token(self):
2090                 self.counter -= 1
2091
2092         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2093         return _build_selector_function(parsed_selector)
2094
2095     def _calc_headers(self, info_dict):
2096         res = std_headers.copy()
2097
2098         add_headers = info_dict.get('http_headers')
2099         if add_headers:
2100             res.update(add_headers)
2101
2102         cookies = self._calc_cookies(info_dict)
2103         if cookies:
2104             res['Cookie'] = cookies
2105
2106         if 'X-Forwarded-For' not in res:
2107             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2108             if x_forwarded_for_ip:
2109                 res['X-Forwarded-For'] = x_forwarded_for_ip
2110
2111         return res
2112
2113     def _calc_cookies(self, info_dict):
2114         pr = sanitized_Request(info_dict['url'])
2115         self.cookiejar.add_cookie_header(pr)
2116         return pr.get_header('Cookie')
2117
2118     def _sort_thumbnails(self, thumbnails):
2119         thumbnails.sort(key=lambda t: (
2120             t.get('preference') if t.get('preference') is not None else -1,
2121             t.get('width') if t.get('width') is not None else -1,
2122             t.get('height') if t.get('height') is not None else -1,
2123             t.get('id') if t.get('id') is not None else '',
2124             t.get('url')))
2125
2126     def _sanitize_thumbnails(self, info_dict):
2127         thumbnails = info_dict.get('thumbnails')
2128         if thumbnails is None:
2129             thumbnail = info_dict.get('thumbnail')
2130             if thumbnail:
2131                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2132         if not thumbnails:
2133             return
2134
2135         def check_thumbnails(thumbnails):
2136             for t in thumbnails:
2137                 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2138                 try:
2139                     self.urlopen(HEADRequest(t['url']))
2140                 except network_exceptions as err:
2141                     self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2142                     continue
2143                 yield t
2144
2145         self._sort_thumbnails(thumbnails)
2146         for i, t in enumerate(thumbnails):
2147             if t.get('id') is None:
2148                 t['id'] = '%d' % i
2149             if t.get('width') and t.get('height'):
2150                 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2151             t['url'] = sanitize_url(t['url'])
2152
2153         if self.params.get('check_formats') is True:
2154             info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1])).reverse()
2155         else:
2156             info_dict['thumbnails'] = thumbnails
2157
2158     def process_video_result(self, info_dict, download=True):
2159         assert info_dict.get('_type', 'video') == 'video'
2160
2161         if 'id' not in info_dict:
2162             raise ExtractorError('Missing "id" field in extractor result')
2163         if 'title' not in info_dict:
2164             raise ExtractorError('Missing "title" field in extractor result',
2165                                  video_id=info_dict['id'], ie=info_dict['extractor'])
2166
2167         def report_force_conversion(field, field_not, conversion):
2168             self.report_warning(
2169                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2170                 % (field, field_not, conversion))
2171
2172         def sanitize_string_field(info, string_field):
2173             field = info.get(string_field)
2174             if field is None or isinstance(field, compat_str):
2175                 return
2176             report_force_conversion(string_field, 'a string', 'string')
2177             info[string_field] = compat_str(field)
2178
2179         def sanitize_numeric_fields(info):
2180             for numeric_field in self._NUMERIC_FIELDS:
2181                 field = info.get(numeric_field)
2182                 if field is None or isinstance(field, compat_numeric_types):
2183                     continue
2184                 report_force_conversion(numeric_field, 'numeric', 'int')
2185                 info[numeric_field] = int_or_none(field)
2186
2187         sanitize_string_field(info_dict, 'id')
2188         sanitize_numeric_fields(info_dict)
2189
2190         if 'playlist' not in info_dict:
2191             # It isn't part of a playlist
2192             info_dict['playlist'] = None
2193             info_dict['playlist_index'] = None
2194
2195         self._sanitize_thumbnails(info_dict)
2196
2197         thumbnail = info_dict.get('thumbnail')
2198         thumbnails = info_dict.get('thumbnails')
2199         if thumbnail:
2200             info_dict['thumbnail'] = sanitize_url(thumbnail)
2201         elif thumbnails:
2202             info_dict['thumbnail'] = thumbnails[-1]['url']
2203
2204         if info_dict.get('display_id') is None and 'id' in info_dict:
2205             info_dict['display_id'] = info_dict['id']
2206
2207         if info_dict.get('duration') is not None:
2208             info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2209
2210         for ts_key, date_key in (
2211                 ('timestamp', 'upload_date'),
2212                 ('release_timestamp', 'release_date'),
2213         ):
2214             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2215                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2216                 # see http://bugs.python.org/issue1646728)
2217                 try:
2218                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2219                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2220                 except (ValueError, OverflowError, OSError):
2221                     pass
2222
2223         live_keys = ('is_live', 'was_live')
2224         live_status = info_dict.get('live_status')
2225         if live_status is None:
2226             for key in live_keys:
2227                 if info_dict.get(key) is False:
2228                     continue
2229                 if info_dict.get(key):
2230                     live_status = key
2231                 break
2232             if all(info_dict.get(key) is False for key in live_keys):
2233                 live_status = 'not_live'
2234         if live_status:
2235             info_dict['live_status'] = live_status
2236             for key in live_keys:
2237                 if info_dict.get(key) is None:
2238                     info_dict[key] = (live_status == key)
2239
2240         # Auto generate title fields corresponding to the *_number fields when missing
2241         # in order to always have clean titles. This is very common for TV series.
2242         for field in ('chapter', 'season', 'episode'):
2243             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2244                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2245
2246         for cc_kind in ('subtitles', 'automatic_captions'):
2247             cc = info_dict.get(cc_kind)
2248             if cc:
2249                 for _, subtitle in cc.items():
2250                     for subtitle_format in subtitle:
2251                         if subtitle_format.get('url'):
2252                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2253                         if subtitle_format.get('ext') is None:
2254                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2255
2256         automatic_captions = info_dict.get('automatic_captions')
2257         subtitles = info_dict.get('subtitles')
2258
2259         info_dict['requested_subtitles'] = self.process_subtitles(
2260             info_dict['id'], subtitles, automatic_captions)
2261
2262         if info_dict.get('formats') is None:
2263             # There's only one format available
2264             formats = [info_dict]
2265         else:
2266             formats = info_dict['formats']
2267
2268         info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
2269         if not self.params.get('allow_unplayable_formats'):
2270             formats = [f for f in formats if not f.get('has_drm')]
2271
2272         if not formats:
2273             self.raise_no_formats(info_dict)
2274
2275         def is_wellformed(f):
2276             url = f.get('url')
2277             if not url:
2278                 self.report_warning(
2279                     '"url" field is missing or empty - skipping format, '
2280                     'there is an error in extractor')
2281                 return False
2282             if isinstance(url, bytes):
2283                 sanitize_string_field(f, 'url')
2284             return True
2285
2286         # Filter out malformed formats for better extraction robustness
2287         formats = list(filter(is_wellformed, formats))
2288
2289         formats_dict = {}
2290
2291         # We check that all the formats have the format and format_id fields
2292         for i, format in enumerate(formats):
2293             sanitize_string_field(format, 'format_id')
2294             sanitize_numeric_fields(format)
2295             format['url'] = sanitize_url(format['url'])
2296             if not format.get('format_id'):
2297                 format['format_id'] = compat_str(i)
2298             else:
2299                 # Sanitize format_id from characters used in format selector expression
2300                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2301             format_id = format['format_id']
2302             if format_id not in formats_dict:
2303                 formats_dict[format_id] = []
2304             formats_dict[format_id].append(format)
2305
2306         # Make sure all formats have unique format_id
2307         common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2308         for format_id, ambiguous_formats in formats_dict.items():
2309             ambigious_id = len(ambiguous_formats) > 1
2310             for i, format in enumerate(ambiguous_formats):
2311                 if ambigious_id:
2312                     format['format_id'] = '%s-%d' % (format_id, i)
2313                 if format.get('ext') is None:
2314                     format['ext'] = determine_ext(format['url']).lower()
2315                 # Ensure there is no conflict between id and ext in format selection
2316                 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2317                 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2318                     format['format_id'] = 'f%s' % format['format_id']
2319
2320         for i, format in enumerate(formats):
2321             if format.get('format') is None:
2322                 format['format'] = '{id} - {res}{note}'.format(
2323                     id=format['format_id'],
2324                     res=self.format_resolution(format),
2325                     note=format_field(format, 'format_note', ' (%s)'),
2326                 )
2327             if format.get('protocol') is None:
2328                 format['protocol'] = determine_protocol(format)
2329             if format.get('resolution') is None:
2330                 format['resolution'] = self.format_resolution(format, default=None)
2331             if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2332                 format['dynamic_range'] = 'SDR'
2333             if (info_dict.get('duration') and format.get('tbr')
2334                     and not format.get('filesize') and not format.get('filesize_approx')):
2335                 format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
2336
2337             # Add HTTP headers, so that external programs can use them from the
2338             # json output
2339             full_format_info = info_dict.copy()
2340             full_format_info.update(format)
2341             format['http_headers'] = self._calc_headers(full_format_info)
2342         # Remove private housekeeping stuff
2343         if '__x_forwarded_for_ip' in info_dict:
2344             del info_dict['__x_forwarded_for_ip']
2345
2346         # TODO Central sorting goes here
2347
2348         if self.params.get('check_formats') is True:
2349             formats = LazyList(self._check_formats(formats[::-1])).reverse()
2350
2351         if not formats or formats[0] is not info_dict:
2352             # only set the 'formats' fields if the original info_dict list them
2353             # otherwise we end up with a circular reference, the first (and unique)
2354             # element in the 'formats' field in info_dict is info_dict itself,
2355             # which can't be exported to json
2356             info_dict['formats'] = formats
2357
2358         info_dict, _ = self.pre_process(info_dict)
2359
2360         if self.params.get('list_thumbnails'):
2361             self.list_thumbnails(info_dict)
2362         if self.params.get('listformats'):
2363             if not info_dict.get('formats') and not info_dict.get('url'):
2364                 self.to_screen('%s has no formats' % info_dict['id'])
2365             else:
2366                 self.list_formats(info_dict)
2367         if self.params.get('listsubtitles'):
2368             if 'automatic_captions' in info_dict:
2369                 self.list_subtitles(
2370                     info_dict['id'], automatic_captions, 'automatic captions')
2371             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2372         list_only = self.params.get('simulate') is None and (
2373             self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2374         if list_only:
2375             # Without this printing, -F --print-json will not work
2376             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2377             return
2378
2379         format_selector = self.format_selector
2380         if format_selector is None:
2381             req_format = self._default_format_spec(info_dict, download=download)
2382             self.write_debug('Default format spec: %s' % req_format)
2383             format_selector = self.build_format_selector(req_format)
2384
2385         # While in format selection we may need to have an access to the original
2386         # format set in order to calculate some metrics or do some processing.
2387         # For now we need to be able to guess whether original formats provided
2388         # by extractor are incomplete or not (i.e. whether extractor provides only
2389         # video-only or audio-only formats) for proper formats selection for
2390         # extractors with such incomplete formats (see
2391         # https://github.com/ytdl-org/youtube-dl/pull/5556).
2392         # Since formats may be filtered during format selection and may not match
2393         # the original formats the results may be incorrect. Thus original formats
2394         # or pre-calculated metrics should be passed to format selection routines
2395         # as well.
2396         # We will pass a context object containing all necessary additional data
2397         # instead of just formats.
2398         # This fixes incorrect format selection issue (see
2399         # https://github.com/ytdl-org/youtube-dl/issues/10083).
2400         incomplete_formats = (
2401             # All formats are video-only or
2402             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2403             # all formats are audio-only
2404             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2405
2406         ctx = {
2407             'formats': formats,
2408             'incomplete_formats': incomplete_formats,
2409         }
2410
2411         formats_to_download = list(format_selector(ctx))
2412         if not formats_to_download:
2413             if not self.params.get('ignore_no_formats_error'):
2414                 raise ExtractorError('Requested format is not available', expected=True,
2415                                      video_id=info_dict['id'], ie=info_dict['extractor'])
2416             else:
2417                 self.report_warning('Requested format is not available')
2418                 # Process what we can, even without any available formats.
2419                 self.process_info(dict(info_dict))
2420         elif download:
2421             self.to_screen(
2422                 '[info] %s: Downloading %d format(s): %s' % (
2423                     info_dict['id'], len(formats_to_download),
2424                     ", ".join([f['format_id'] for f in formats_to_download])))
2425             for fmt in formats_to_download:
2426                 new_info = dict(info_dict)
2427                 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2428                 new_info['__original_infodict'] = info_dict
2429                 new_info.update(fmt)
2430                 self.process_info(new_info)
2431         # We update the info dict with the selected best quality format (backwards compatibility)
2432         if formats_to_download:
2433             info_dict.update(formats_to_download[-1])
2434         return info_dict
2435
2436     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2437         """Select the requested subtitles and their format"""
2438         available_subs = {}
2439         if normal_subtitles and self.params.get('writesubtitles'):
2440             available_subs.update(normal_subtitles)
2441         if automatic_captions and self.params.get('writeautomaticsub'):
2442             for lang, cap_info in automatic_captions.items():
2443                 if lang not in available_subs:
2444                     available_subs[lang] = cap_info
2445
2446         if (not self.params.get('writesubtitles') and not
2447                 self.params.get('writeautomaticsub') or not
2448                 available_subs):
2449             return None
2450
2451         all_sub_langs = available_subs.keys()
2452         if self.params.get('allsubtitles', False):
2453             requested_langs = all_sub_langs
2454         elif self.params.get('subtitleslangs', False):
2455             # A list is used so that the order of languages will be the same as
2456             # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2457             requested_langs = []
2458             for lang_re in self.params.get('subtitleslangs'):
2459                 if lang_re == 'all':
2460                     requested_langs.extend(all_sub_langs)
2461                     continue
2462                 discard = lang_re[0] == '-'
2463                 if discard:
2464                     lang_re = lang_re[1:]
2465                 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2466                 if discard:
2467                     for lang in current_langs:
2468                         while lang in requested_langs:
2469                             requested_langs.remove(lang)
2470                 else:
2471                     requested_langs.extend(current_langs)
2472             requested_langs = orderedSet(requested_langs)
2473         elif 'en' in available_subs:
2474             requested_langs = ['en']
2475         else:
2476             requested_langs = [list(all_sub_langs)[0]]
2477         if requested_langs:
2478             self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2479
2480         formats_query = self.params.get('subtitlesformat', 'best')
2481         formats_preference = formats_query.split('/') if formats_query else []
2482         subs = {}
2483         for lang in requested_langs:
2484             formats = available_subs.get(lang)
2485             if formats is None:
2486                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2487                 continue
2488             for ext in formats_preference:
2489                 if ext == 'best':
2490                     f = formats[-1]
2491                     break
2492                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2493                 if matches:
2494                     f = matches[-1]
2495                     break
2496             else:
2497                 f = formats[-1]
2498                 self.report_warning(
2499                     'No subtitle format found matching "%s" for language %s, '
2500                     'using %s' % (formats_query, lang, f['ext']))
2501             subs[lang] = f
2502         return subs
2503
2504     def __forced_printings(self, info_dict, filename, incomplete):
2505         def print_mandatory(field, actual_field=None):
2506             if actual_field is None:
2507                 actual_field = field
2508             if (self.params.get('force%s' % field, False)
2509                     and (not incomplete or info_dict.get(actual_field) is not None)):
2510                 self.to_stdout(info_dict[actual_field])
2511
2512         def print_optional(field):
2513             if (self.params.get('force%s' % field, False)
2514                     and info_dict.get(field) is not None):
2515                 self.to_stdout(info_dict[field])
2516
2517         info_dict = info_dict.copy()
2518         if filename is not None:
2519             info_dict['filename'] = filename
2520         if info_dict.get('requested_formats') is not None:
2521             # For RTMP URLs, also include the playpath
2522             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2523         elif 'url' in info_dict:
2524             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2525
2526         if self.params.get('forceprint') or self.params.get('forcejson'):
2527             self.post_extract(info_dict)
2528         for tmpl in self.params.get('forceprint', []):
2529             mobj = re.match(r'\w+(=?)$', tmpl)
2530             if mobj and mobj.group(1):
2531                 tmpl = f'{tmpl[:-1]} = %({tmpl[:-1]})s'
2532             elif mobj:
2533                 tmpl = '%({})s'.format(tmpl)
2534             self.to_stdout(self.evaluate_outtmpl(tmpl, info_dict))
2535
2536         print_mandatory('title')
2537         print_mandatory('id')
2538         print_mandatory('url', 'urls')
2539         print_optional('thumbnail')
2540         print_optional('description')
2541         print_optional('filename')
2542         if self.params.get('forceduration') and info_dict.get('duration') is not None:
2543             self.to_stdout(formatSeconds(info_dict['duration']))
2544         print_mandatory('format')
2545
2546         if self.params.get('forcejson'):
2547             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2548
2549     def dl(self, name, info, subtitle=False, test=False):
2550         if not info.get('url'):
2551             self.raise_no_formats(info, True)
2552
2553         if test:
2554             verbose = self.params.get('verbose')
2555             params = {
2556                 'test': True,
2557                 'quiet': self.params.get('quiet') or not verbose,
2558                 'verbose': verbose,
2559                 'noprogress': not verbose,
2560                 'nopart': True,
2561                 'skip_unavailable_fragments': False,
2562                 'keep_fragments': False,
2563                 'overwrites': True,
2564                 '_no_ytdl_file': True,
2565             }
2566         else:
2567             params = self.params
2568         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2569         if not test:
2570             for ph in self._progress_hooks:
2571                 fd.add_progress_hook(ph)
2572             urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2573             self.write_debug('Invoking downloader on "%s"' % urls)
2574
2575         new_info = copy.deepcopy(self._copy_infodict(info))
2576         if new_info.get('http_headers') is None:
2577             new_info['http_headers'] = self._calc_headers(new_info)
2578         return fd.download(name, new_info, subtitle)
2579
2580     def process_info(self, info_dict):
2581         """Process a single resolved IE result."""
2582
2583         assert info_dict.get('_type', 'video') == 'video'
2584
2585         max_downloads = self.params.get('max_downloads')
2586         if max_downloads is not None:
2587             if self._num_downloads >= int(max_downloads):
2588                 raise MaxDownloadsReached()
2589
2590         # TODO: backward compatibility, to be removed
2591         info_dict['fulltitle'] = info_dict['title']
2592
2593         if 'format' not in info_dict and 'ext' in info_dict:
2594             info_dict['format'] = info_dict['ext']
2595
2596         if self._match_entry(info_dict) is not None:
2597             return
2598
2599         self.post_extract(info_dict)
2600         self._num_downloads += 1
2601
2602         # info_dict['_filename'] needs to be set for backward compatibility
2603         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2604         temp_filename = self.prepare_filename(info_dict, 'temp')
2605         files_to_move = {}
2606
2607         # Forced printings
2608         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2609
2610         if self.params.get('simulate'):
2611             if self.params.get('force_write_download_archive', False):
2612                 self.record_download_archive(info_dict)
2613             # Do nothing else if in simulate mode
2614             return
2615
2616         if full_filename is None:
2617             return
2618         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2619             return
2620         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2621             return
2622
2623         if self._write_description('video', info_dict,
2624                                    self.prepare_filename(info_dict, 'description')) is None:
2625             return
2626
2627         sub_files = self._write_subtitles(info_dict, temp_filename)
2628         if sub_files is None:
2629             return
2630         files_to_move.update(dict(sub_files))
2631
2632         thumb_files = self._write_thumbnails(
2633             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2634         if thumb_files is None:
2635             return
2636         files_to_move.update(dict(thumb_files))
2637
2638         infofn = self.prepare_filename(info_dict, 'infojson')
2639         _infojson_written = self._write_info_json('video', info_dict, infofn)
2640         if _infojson_written:
2641             info_dict['__infojson_filename'] = infofn
2642         elif _infojson_written is None:
2643             return
2644
2645         # Note: Annotations are deprecated
2646         annofn = None
2647         if self.params.get('writeannotations', False):
2648             annofn = self.prepare_filename(info_dict, 'annotation')
2649         if annofn:
2650             if not self._ensure_dir_exists(encodeFilename(annofn)):
2651                 return
2652             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2653                 self.to_screen('[info] Video annotations are already present')
2654             elif not info_dict.get('annotations'):
2655                 self.report_warning('There are no annotations to write.')
2656             else:
2657                 try:
2658                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2659                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2660                         annofile.write(info_dict['annotations'])
2661                 except (KeyError, TypeError):
2662                     self.report_warning('There are no annotations to write.')
2663                 except (OSError, IOError):
2664                     self.report_error('Cannot write annotations file: ' + annofn)
2665                     return
2666
2667         # Write internet shortcut files
2668         def _write_link_file(link_type):
2669             if 'webpage_url' not in info_dict:
2670                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2671                 return False
2672             linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
2673             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2674                 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2675                 return True
2676             try:
2677                 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2678                 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2679                              newline='\r\n' if link_type == 'url' else '\n') as linkfile:
2680                     template_vars = {'url': iri_to_uri(info_dict['webpage_url'])}
2681                     if link_type == 'desktop':
2682                         template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2683                     linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
2684             except (OSError, IOError):
2685                 self.report_error(f'Cannot write internet shortcut {linkfn}')
2686                 return False
2687             return True
2688
2689         write_links = {
2690             'url': self.params.get('writeurllink'),
2691             'webloc': self.params.get('writewebloclink'),
2692             'desktop': self.params.get('writedesktoplink'),
2693         }
2694         if self.params.get('writelink'):
2695             link_type = ('webloc' if sys.platform == 'darwin'
2696                          else 'desktop' if sys.platform.startswith('linux')
2697                          else 'url')
2698             write_links[link_type] = True
2699
2700         if any(should_write and not _write_link_file(link_type)
2701                for link_type, should_write in write_links.items()):
2702             return
2703
2704         try:
2705             info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2706         except PostProcessingError as err:
2707             self.report_error('Preprocessing: %s' % str(err))
2708             return
2709
2710         must_record_download_archive = False
2711         if self.params.get('skip_download', False):
2712             info_dict['filepath'] = temp_filename
2713             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2714             info_dict['__files_to_move'] = files_to_move
2715             info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2716         else:
2717             # Download
2718             info_dict.setdefault('__postprocessors', [])
2719             try:
2720
2721                 def existing_file(*filepaths):
2722                     ext = info_dict.get('ext')
2723                     final_ext = self.params.get('final_ext', ext)
2724                     existing_files = []
2725                     for file in orderedSet(filepaths):
2726                         if final_ext != ext:
2727                             converted = replace_extension(file, final_ext, ext)
2728                             if os.path.exists(encodeFilename(converted)):
2729                                 existing_files.append(converted)
2730                         if os.path.exists(encodeFilename(file)):
2731                             existing_files.append(file)
2732
2733                     if not existing_files or self.params.get('overwrites', False):
2734                         for file in orderedSet(existing_files):
2735                             self.report_file_delete(file)
2736                             os.remove(encodeFilename(file))
2737                         return None
2738
2739                     info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2740                     return existing_files[0]
2741
2742                 success = True
2743                 if info_dict.get('requested_formats') is not None:
2744
2745                     def compatible_formats(formats):
2746                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2747                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2748                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2749                         if len(video_formats) > 2 or len(audio_formats) > 2:
2750                             return False
2751
2752                         # Check extension
2753                         exts = set(format.get('ext') for format in formats)
2754                         COMPATIBLE_EXTS = (
2755                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2756                             set(('webm',)),
2757                         )
2758                         for ext_sets in COMPATIBLE_EXTS:
2759                             if ext_sets.issuperset(exts):
2760                                 return True
2761                         # TODO: Check acodec/vcodec
2762                         return False
2763
2764                     requested_formats = info_dict['requested_formats']
2765                     old_ext = info_dict['ext']
2766                     if self.params.get('merge_output_format') is None:
2767                         if not compatible_formats(requested_formats):
2768                             info_dict['ext'] = 'mkv'
2769                             self.report_warning(
2770                                 'Requested formats are incompatible for merge and will be merged into mkv')
2771                         if (info_dict['ext'] == 'webm'
2772                                 and info_dict.get('thumbnails')
2773                                 # check with type instead of pp_key, __name__, or isinstance
2774                                 # since we dont want any custom PPs to trigger this
2775                                 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
2776                             info_dict['ext'] = 'mkv'
2777                             self.report_warning(
2778                                 'webm doesn\'t support embedding a thumbnail, mkv will be used')
2779                     new_ext = info_dict['ext']
2780
2781                     def correct_ext(filename, ext=new_ext):
2782                         if filename == '-':
2783                             return filename
2784                         filename_real_ext = os.path.splitext(filename)[1][1:]
2785                         filename_wo_ext = (
2786                             os.path.splitext(filename)[0]
2787                             if filename_real_ext in (old_ext, new_ext)
2788                             else filename)
2789                         return '%s.%s' % (filename_wo_ext, ext)
2790
2791                     # Ensure filename always has a correct extension for successful merge
2792                     full_filename = correct_ext(full_filename)
2793                     temp_filename = correct_ext(temp_filename)
2794                     dl_filename = existing_file(full_filename, temp_filename)
2795                     info_dict['__real_download'] = False
2796
2797                     if dl_filename is not None:
2798                         self.report_file_already_downloaded(dl_filename)
2799                     elif get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-'):
2800                         info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
2801                         success, real_download = self.dl(temp_filename, info_dict)
2802                         info_dict['__real_download'] = real_download
2803                     else:
2804                         downloaded = []
2805                         merger = FFmpegMergerPP(self)
2806                         if self.params.get('allow_unplayable_formats'):
2807                             self.report_warning(
2808                                 'You have requested merging of multiple formats '
2809                                 'while also allowing unplayable formats to be downloaded. '
2810                                 'The formats won\'t be merged to prevent data corruption.')
2811                         elif not merger.available:
2812                             self.report_warning(
2813                                 'You have requested merging of multiple formats but ffmpeg is not installed. '
2814                                 'The formats won\'t be merged.')
2815
2816                         if temp_filename == '-':
2817                             reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict)
2818                                       else 'but the formats are incompatible for simultaneous download' if merger.available
2819                                       else 'but ffmpeg is not installed')
2820                             self.report_warning(
2821                                 f'You have requested downloading multiple formats to stdout {reason}. '
2822                                 'The formats will be streamed one after the other')
2823                             fname = temp_filename
2824                         for f in requested_formats:
2825                             new_info = dict(info_dict)
2826                             del new_info['requested_formats']
2827                             new_info.update(f)
2828                             if temp_filename != '-':
2829                                 fname = prepend_extension(
2830                                     correct_ext(temp_filename, new_info['ext']),
2831                                     'f%s' % f['format_id'], new_info['ext'])
2832                                 if not self._ensure_dir_exists(fname):
2833                                     return
2834                                 f['filepath'] = fname
2835                                 downloaded.append(fname)
2836                             partial_success, real_download = self.dl(fname, new_info)
2837                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
2838                             success = success and partial_success
2839                         if merger.available and not self.params.get('allow_unplayable_formats'):
2840                             info_dict['__postprocessors'].append(merger)
2841                             info_dict['__files_to_merge'] = downloaded
2842                             # Even if there were no downloads, it is being merged only now
2843                             info_dict['__real_download'] = True
2844                         else:
2845                             for file in downloaded:
2846                                 files_to_move[file] = None
2847                 else:
2848                     # Just a single file
2849                     dl_filename = existing_file(full_filename, temp_filename)
2850                     if dl_filename is None or dl_filename == temp_filename:
2851                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
2852                         # So we should try to resume the download
2853                         success, real_download = self.dl(temp_filename, info_dict)
2854                         info_dict['__real_download'] = real_download
2855                     else:
2856                         self.report_file_already_downloaded(dl_filename)
2857
2858                 dl_filename = dl_filename or temp_filename
2859                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2860
2861             except network_exceptions as err:
2862                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2863                 return
2864             except (OSError, IOError) as err:
2865                 raise UnavailableVideoError(err)
2866             except (ContentTooShortError, ) as err:
2867                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2868                 return
2869
2870             if success and full_filename != '-':
2871
2872                 def fixup():
2873                     do_fixup = True
2874                     fixup_policy = self.params.get('fixup')
2875                     vid = info_dict['id']
2876
2877                     if fixup_policy in ('ignore', 'never'):
2878                         return
2879                     elif fixup_policy == 'warn':
2880                         do_fixup = False
2881                     elif fixup_policy != 'force':
2882                         assert fixup_policy in ('detect_or_warn', None)
2883                         if not info_dict.get('__real_download'):
2884                             do_fixup = False
2885
2886                     def ffmpeg_fixup(cndn, msg, cls):
2887                         if not cndn:
2888                             return
2889                         if not do_fixup:
2890                             self.report_warning(f'{vid}: {msg}')
2891                             return
2892                         pp = cls(self)
2893                         if pp.available:
2894                             info_dict['__postprocessors'].append(pp)
2895                         else:
2896                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
2897
2898                     stretched_ratio = info_dict.get('stretched_ratio')
2899                     ffmpeg_fixup(
2900                         stretched_ratio not in (1, None),
2901                         f'Non-uniform pixel ratio {stretched_ratio}',
2902                         FFmpegFixupStretchedPP)
2903
2904                     ffmpeg_fixup(
2905                         (info_dict.get('requested_formats') is None
2906                          and info_dict.get('container') == 'm4a_dash'
2907                          and info_dict.get('ext') == 'm4a'),
2908                         'writing DASH m4a. Only some players support this container',
2909                         FFmpegFixupM4aPP)
2910
2911                     downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
2912                     downloader = downloader.__name__ if downloader else None
2913                     ffmpeg_fixup(info_dict.get('requested_formats') is None and downloader == 'HlsFD',
2914                                  'malformed AAC bitstream detected', FFmpegFixupM3u8PP)
2915                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP)
2916                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed duration detected', FFmpegFixupDurationPP)
2917
2918                 fixup()
2919                 try:
2920                     info_dict = self.post_process(dl_filename, info_dict, files_to_move)
2921                 except PostProcessingError as err:
2922                     self.report_error('Postprocessing: %s' % str(err))
2923                     return
2924                 try:
2925                     for ph in self._post_hooks:
2926                         ph(info_dict['filepath'])
2927                 except Exception as err:
2928                     self.report_error('post hooks: %s' % str(err))
2929                     return
2930                 must_record_download_archive = True
2931
2932         if must_record_download_archive or self.params.get('force_write_download_archive', False):
2933             self.record_download_archive(info_dict)
2934         max_downloads = self.params.get('max_downloads')
2935         if max_downloads is not None and self._num_downloads >= int(max_downloads):
2936             raise MaxDownloadsReached()
2937
2938     def download(self, url_list):
2939         """Download a given list of URLs."""
2940         outtmpl = self.outtmpl_dict['default']
2941         if (len(url_list) > 1
2942                 and outtmpl != '-'
2943                 and '%' not in outtmpl
2944                 and self.params.get('max_downloads') != 1):
2945             raise SameFileError(outtmpl)
2946
2947         for url in url_list:
2948             try:
2949                 # It also downloads the videos
2950                 res = self.extract_info(
2951                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2952             except UnavailableVideoError:
2953                 self.report_error('unable to download video')
2954             except DownloadCancelled as e:
2955                 self.to_screen(f'[info] {e.msg}')
2956                 raise
2957             else:
2958                 if self.params.get('dump_single_json', False):
2959                     self.post_extract(res)
2960                     self.to_stdout(json.dumps(self.sanitize_info(res)))
2961
2962         return self._download_retcode
2963
2964     def download_with_info_file(self, info_filename):
2965         with contextlib.closing(fileinput.FileInput(
2966                 [info_filename], mode='r',
2967                 openhook=fileinput.hook_encoded('utf-8'))) as f:
2968             # FileInput doesn't have a read method, we can't call json.load
2969             info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
2970         try:
2971             self.process_ie_result(info, download=True)
2972         except (DownloadError, EntryNotInPlaylist, ThrottledDownload):
2973             webpage_url = info.get('webpage_url')
2974             if webpage_url is not None:
2975                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2976                 return self.download([webpage_url])
2977             else:
2978                 raise
2979         return self._download_retcode
2980
2981     @staticmethod
2982     def sanitize_info(info_dict, remove_private_keys=False):
2983         ''' Sanitize the infodict for converting to json '''
2984         if info_dict is None:
2985             return info_dict
2986         info_dict.setdefault('epoch', int(time.time()))
2987         remove_keys = {'__original_infodict'}  # Always remove this since this may contain a copy of the entire dict
2988         keep_keys = ['_type'],  # Always keep this to facilitate load-info-json
2989         if remove_private_keys:
2990             remove_keys |= {
2991                 'requested_formats', 'requested_subtitles', 'requested_entries',
2992                 'filepath', 'entries', 'original_url', 'playlist_autonumber',
2993             }
2994             empty_values = (None, {}, [], set(), tuple())
2995             reject = lambda k, v: k not in keep_keys and (
2996                 k.startswith('_') or k in remove_keys or v in empty_values)
2997         else:
2998             reject = lambda k, v: k in remove_keys
2999         filter_fn = lambda obj: (
3000             list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))
3001             else obj if not isinstance(obj, dict)
3002             else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
3003         return filter_fn(info_dict)
3004
3005     @staticmethod
3006     def filter_requested_info(info_dict, actually_filter=True):
3007         ''' Alias of sanitize_info for backward compatibility '''
3008         return YoutubeDL.sanitize_info(info_dict, actually_filter)
3009
3010     def run_pp(self, pp, infodict):
3011         files_to_delete = []
3012         if '__files_to_move' not in infodict:
3013             infodict['__files_to_move'] = {}
3014         try:
3015             files_to_delete, infodict = pp.run(infodict)
3016         except PostProcessingError as e:
3017             # Must be True and not 'only_download'
3018             if self.params.get('ignoreerrors') is True:
3019                 self.report_error(e)
3020                 return infodict
3021             raise
3022
3023         if not files_to_delete:
3024             return infodict
3025         if self.params.get('keepvideo', False):
3026             for f in files_to_delete:
3027                 infodict['__files_to_move'].setdefault(f, '')
3028         else:
3029             for old_filename in set(files_to_delete):
3030                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3031                 try:
3032                     os.remove(encodeFilename(old_filename))
3033                 except (IOError, OSError):
3034                     self.report_warning('Unable to remove downloaded original file')
3035                 if old_filename in infodict['__files_to_move']:
3036                     del infodict['__files_to_move'][old_filename]
3037         return infodict
3038
3039     @staticmethod
3040     def post_extract(info_dict):
3041         def actual_post_extract(info_dict):
3042             if info_dict.get('_type') in ('playlist', 'multi_video'):
3043                 for video_dict in info_dict.get('entries', {}):
3044                     actual_post_extract(video_dict or {})
3045                 return
3046
3047             post_extractor = info_dict.get('__post_extractor') or (lambda: {})
3048             extra = post_extractor().items()
3049             info_dict.update(extra)
3050             info_dict.pop('__post_extractor', None)
3051
3052             original_infodict = info_dict.get('__original_infodict') or {}
3053             original_infodict.update(extra)
3054             original_infodict.pop('__post_extractor', None)
3055
3056         actual_post_extract(info_dict or {})
3057
3058     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3059         info = dict(ie_info)
3060         info['__files_to_move'] = files_to_move or {}
3061         for pp in self._pps[key]:
3062             info = self.run_pp(pp, info)
3063         return info, info.pop('__files_to_move', None)
3064
3065     def post_process(self, filename, ie_info, files_to_move=None):
3066         """Run all the postprocessors on the given file."""
3067         info = dict(ie_info)
3068         info['filepath'] = filename
3069         info['__files_to_move'] = files_to_move or {}
3070
3071         for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
3072             info = self.run_pp(pp, info)
3073         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3074         del info['__files_to_move']
3075         for pp in self._pps['after_move']:
3076             info = self.run_pp(pp, info)
3077         return info
3078
3079     def _make_archive_id(self, info_dict):
3080         video_id = info_dict.get('id')
3081         if not video_id:
3082             return
3083         # Future-proof against any change in case
3084         # and backwards compatibility with prior versions
3085         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3086         if extractor is None:
3087             url = str_or_none(info_dict.get('url'))
3088             if not url:
3089                 return
3090             # Try to find matching extractor for the URL and take its ie_key
3091             for ie_key, ie in self._ies.items():
3092                 if ie.suitable(url):
3093                     extractor = ie_key
3094                     break
3095             else:
3096                 return
3097         return '%s %s' % (extractor.lower(), video_id)
3098
3099     def in_download_archive(self, info_dict):
3100         fn = self.params.get('download_archive')
3101         if fn is None:
3102             return False
3103
3104         vid_id = self._make_archive_id(info_dict)
3105         if not vid_id:
3106             return False  # Incomplete video information
3107
3108         return vid_id in self.archive
3109
3110     def record_download_archive(self, info_dict):
3111         fn = self.params.get('download_archive')
3112         if fn is None:
3113             return
3114         vid_id = self._make_archive_id(info_dict)
3115         assert vid_id
3116         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3117             archive_file.write(vid_id + '\n')
3118         self.archive.add(vid_id)
3119
3120     @staticmethod
3121     def format_resolution(format, default='unknown'):
3122         is_images = format.get('vcodec') == 'none' and format.get('acodec') == 'none'
3123         if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3124             return 'audio only'
3125         if format.get('resolution') is not None:
3126             return format['resolution']
3127         if format.get('width') and format.get('height'):
3128             res = '%dx%d' % (format['width'], format['height'])
3129         elif format.get('height'):
3130             res = '%sp' % format['height']
3131         elif format.get('width'):
3132             res = '%dx?' % format['width']
3133         elif is_images:
3134             return 'images'
3135         else:
3136             return default
3137         return f'{res} images' if is_images else res
3138
3139     def _format_note(self, fdict):
3140         res = ''
3141         if fdict.get('ext') in ['f4f', 'f4m']:
3142             res += '(unsupported) '
3143         if fdict.get('language'):
3144             if res:
3145                 res += ' '
3146             res += '[%s] ' % fdict['language']
3147         if fdict.get('format_note') is not None:
3148             res += fdict['format_note'] + ' '
3149         if fdict.get('tbr') is not None:
3150             res += '%4dk ' % fdict['tbr']
3151         if fdict.get('container') is not None:
3152             if res:
3153                 res += ', '
3154             res += '%s container' % fdict['container']
3155         if (fdict.get('vcodec') is not None
3156                 and fdict.get('vcodec') != 'none'):
3157             if res:
3158                 res += ', '
3159             res += fdict['vcodec']
3160             if fdict.get('vbr') is not None:
3161                 res += '@'
3162         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3163             res += 'video@'
3164         if fdict.get('vbr') is not None:
3165             res += '%4dk' % fdict['vbr']
3166         if fdict.get('fps') is not None:
3167             if res:
3168                 res += ', '
3169             res += '%sfps' % fdict['fps']
3170         if fdict.get('acodec') is not None:
3171             if res:
3172                 res += ', '
3173             if fdict['acodec'] == 'none':
3174                 res += 'video only'
3175             else:
3176                 res += '%-5s' % fdict['acodec']
3177         elif fdict.get('abr') is not None:
3178             if res:
3179                 res += ', '
3180             res += 'audio'
3181         if fdict.get('abr') is not None:
3182             res += '@%3dk' % fdict['abr']
3183         if fdict.get('asr') is not None:
3184             res += ' (%5dHz)' % fdict['asr']
3185         if fdict.get('filesize') is not None:
3186             if res:
3187                 res += ', '
3188             res += format_bytes(fdict['filesize'])
3189         elif fdict.get('filesize_approx') is not None:
3190             if res:
3191                 res += ', '
3192             res += '~' + format_bytes(fdict['filesize_approx'])
3193         return res
3194
3195     def _list_format_headers(self, *headers):
3196         if self.params.get('listformats_table', True) is not False:
3197             return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
3198         return headers
3199
3200     def list_formats(self, info_dict):
3201         formats = info_dict.get('formats', [info_dict])
3202         new_format = self.params.get('listformats_table', True) is not False
3203         if new_format:
3204             tbr_digits = number_of_digits(max(f.get('tbr') or 0 for f in formats))
3205             vbr_digits = number_of_digits(max(f.get('vbr') or 0 for f in formats))
3206             abr_digits = number_of_digits(max(f.get('abr') or 0 for f in formats))
3207             delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3208             table = [
3209                 [
3210                     self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
3211                     format_field(f, 'ext'),
3212                     self.format_resolution(f),
3213                     format_field(f, 'fps', '%3d'),
3214                     format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3215                     delim,
3216                     format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
3217                     format_field(f, 'tbr', f'%{tbr_digits}dk'),
3218                     shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
3219                     delim,
3220                     format_field(f, 'vcodec', default='unknown').replace('none', ''),
3221                     format_field(f, 'vbr', f'%{vbr_digits}dk'),
3222                     format_field(f, 'acodec', default='unknown').replace('none', ''),
3223                     format_field(f, 'abr', f'%{abr_digits}dk'),
3224                     format_field(f, 'asr', '%5dHz'),
3225                     join_nonempty(
3226                         self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3227                         format_field(f, 'language', '[%s]'),
3228                         format_field(f, 'format_note'),
3229                         format_field(f, 'container', ignore=(None, f.get('ext'))),
3230                         delim=', '),
3231                 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3232             header_line = self._list_format_headers(
3233                 'ID', 'EXT', 'RESOLUTION', 'FPS', 'HDR', delim, ' FILESIZE', '  TBR', 'PROTO',
3234                 delim, 'VCODEC', '  VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO')
3235         else:
3236             table = [
3237                 [
3238                     format_field(f, 'format_id'),
3239                     format_field(f, 'ext'),
3240                     self.format_resolution(f),
3241                     self._format_note(f)]
3242                 for f in formats
3243                 if f.get('preference') is None or f['preference'] >= -1000]
3244             header_line = ['format code', 'extension', 'resolution', 'note']
3245
3246         self.to_screen(
3247             '[info] Available formats for %s:' % info_dict['id'])
3248         self.to_stdout(render_table(
3249             header_line, table,
3250             extraGap=(0 if new_format else 1),
3251             hideEmpty=new_format,
3252             delim=new_format and self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True)))
3253
3254     def list_thumbnails(self, info_dict):
3255         thumbnails = list(info_dict.get('thumbnails'))
3256         if not thumbnails:
3257             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3258             return
3259
3260         self.to_screen(
3261             '[info] Thumbnails for %s:' % info_dict['id'])
3262         self.to_stdout(render_table(
3263             self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3264             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
3265
3266     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3267         if not subtitles:
3268             self.to_screen('%s has no %s' % (video_id, name))
3269             return
3270         self.to_screen(
3271             'Available %s for %s:' % (name, video_id))
3272
3273         def _row(lang, formats):
3274             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3275             if len(set(names)) == 1:
3276                 names = [] if names[0] == 'unknown' else names[:1]
3277             return [lang, ', '.join(names), ', '.join(exts)]
3278
3279         self.to_stdout(render_table(
3280             self._list_format_headers('Language', 'Name', 'Formats'),
3281             [_row(lang, formats) for lang, formats in subtitles.items()],
3282             hideEmpty=True))
3283
3284     def urlopen(self, req):
3285         """ Start an HTTP download """
3286         if isinstance(req, compat_basestring):
3287             req = sanitized_Request(req)
3288         return self._opener.open(req, timeout=self._socket_timeout)
3289
3290     def print_debug_header(self):
3291         if not self.params.get('verbose'):
3292             return
3293
3294         def get_encoding(stream):
3295             ret = getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)
3296             if not supports_terminal_sequences(stream):
3297                 ret += ' (No ANSI)'
3298             return ret
3299
3300         encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
3301             locale.getpreferredencoding(),
3302             sys.getfilesystemencoding(),
3303             get_encoding(self._screen_file), get_encoding(self._err_file),
3304             self.get_encoding())
3305
3306         logger = self.params.get('logger')
3307         if logger:
3308             write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3309             write_debug(encoding_str)
3310         else:
3311             write_string(f'[debug] {encoding_str}\n', encoding=None)
3312             write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3313
3314         source = detect_variant()
3315         write_debug('yt-dlp version %s%s' % (__version__, '' if source == 'unknown' else f' ({source})'))
3316         if not _LAZY_LOADER:
3317             if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3318                 write_debug('Lazy loading extractors is forcibly disabled')
3319             else:
3320                 write_debug('Lazy loading extractors is disabled')
3321         if plugin_extractors or plugin_postprocessors:
3322             write_debug('Plugins: %s' % [
3323                 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3324                 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3325         if self.params.get('compat_opts'):
3326             write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
3327         try:
3328             sp = Popen(
3329                 ['git', 'rev-parse', '--short', 'HEAD'],
3330                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3331                 cwd=os.path.dirname(os.path.abspath(__file__)))
3332             out, err = sp.communicate_or_kill()
3333             out = out.decode().strip()
3334             if re.match('[0-9a-f]+', out):
3335                 write_debug('Git HEAD: %s' % out)
3336         except Exception:
3337             try:
3338                 sys.exc_clear()
3339             except Exception:
3340                 pass
3341
3342         def python_implementation():
3343             impl_name = platform.python_implementation()
3344             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3345                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3346             return impl_name
3347
3348         write_debug('Python version %s (%s %s) - %s' % (
3349             platform.python_version(),
3350             python_implementation(),
3351             platform.architecture()[0],
3352             platform_name()))
3353
3354         exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3355         ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3356         if ffmpeg_features:
3357             exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features)
3358
3359         exe_versions['rtmpdump'] = rtmpdump_version()
3360         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3361         exe_str = ', '.join(
3362             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3363         ) or 'none'
3364         write_debug('exe versions: %s' % exe_str)
3365
3366         from .downloader.websocket import has_websockets
3367         from .postprocessor.embedthumbnail import has_mutagen
3368         from .cookies import SQLITE_AVAILABLE, KEYRING_AVAILABLE
3369
3370         lib_str = ', '.join(sorted(filter(None, (
3371             compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
3372             has_websockets and 'websockets',
3373             has_mutagen and 'mutagen',
3374             SQLITE_AVAILABLE and 'sqlite',
3375             KEYRING_AVAILABLE and 'keyring',
3376         )))) or 'none'
3377         write_debug('Optional libraries: %s' % lib_str)
3378
3379         proxy_map = {}
3380         for handler in self._opener.handlers:
3381             if hasattr(handler, 'proxies'):
3382                 proxy_map.update(handler.proxies)
3383         write_debug(f'Proxy map: {proxy_map}')
3384
3385         # Not implemented
3386         if False and self.params.get('call_home'):
3387             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3388             write_debug('Public IP address: %s' % ipaddr)
3389             latest_version = self.urlopen(
3390                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3391             if version_tuple(latest_version) > version_tuple(__version__):
3392                 self.report_warning(
3393                     'You are using an outdated version (newest version: %s)! '
3394                     'See https://yt-dl.org/update if you need help updating.' %
3395                     latest_version)
3396
3397     def _setup_opener(self):
3398         timeout_val = self.params.get('socket_timeout')
3399         self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3400
3401         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3402         opts_cookiefile = self.params.get('cookiefile')
3403         opts_proxy = self.params.get('proxy')
3404
3405         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3406
3407         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3408         if opts_proxy is not None:
3409             if opts_proxy == '':
3410                 proxies = {}
3411             else:
3412                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3413         else:
3414             proxies = compat_urllib_request.getproxies()
3415             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3416             if 'http' in proxies and 'https' not in proxies:
3417                 proxies['https'] = proxies['http']
3418         proxy_handler = PerRequestProxyHandler(proxies)
3419
3420         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3421         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3422         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3423         redirect_handler = YoutubeDLRedirectHandler()
3424         data_handler = compat_urllib_request_DataHandler()
3425
3426         # When passing our own FileHandler instance, build_opener won't add the
3427         # default FileHandler and allows us to disable the file protocol, which
3428         # can be used for malicious purposes (see
3429         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3430         file_handler = compat_urllib_request.FileHandler()
3431
3432         def file_open(*args, **kwargs):
3433             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3434         file_handler.file_open = file_open
3435
3436         opener = compat_urllib_request.build_opener(
3437             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3438
3439         # Delete the default user-agent header, which would otherwise apply in
3440         # cases where our custom HTTP handler doesn't come into play
3441         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3442         opener.addheaders = []
3443         self._opener = opener
3444
3445     def encode(self, s):
3446         if isinstance(s, bytes):
3447             return s  # Already encoded
3448
3449         try:
3450             return s.encode(self.get_encoding())
3451         except UnicodeEncodeError as err:
3452             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3453             raise
3454
3455     def get_encoding(self):
3456         encoding = self.params.get('encoding')
3457         if encoding is None:
3458             encoding = preferredencoding()
3459         return encoding
3460
3461     def _write_info_json(self, label, ie_result, infofn):
3462         ''' Write infojson and returns True = written, False = skip, None = error '''
3463         if not self.params.get('writeinfojson'):
3464             return False
3465         elif not infofn:
3466             self.write_debug(f'Skipping writing {label} infojson')
3467             return False
3468         elif not self._ensure_dir_exists(infofn):
3469             return None
3470         elif not self.params.get('overwrites', True) and os.path.exists(infofn):
3471             self.to_screen(f'[info] {label.title()} metadata is already present')
3472         else:
3473             self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3474             try:
3475                 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3476             except (OSError, IOError):
3477                 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3478                 return None
3479         return True
3480
3481     def _write_description(self, label, ie_result, descfn):
3482         ''' Write description and returns True = written, False = skip, None = error '''
3483         if not self.params.get('writedescription'):
3484             return False
3485         elif not descfn:
3486             self.write_debug(f'Skipping writing {label} description')
3487             return False
3488         elif not self._ensure_dir_exists(descfn):
3489             return None
3490         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3491             self.to_screen(f'[info] {label.title()} description is already present')
3492         elif ie_result.get('description') is None:
3493             self.report_warning(f'There\'s no {label} description to write')
3494             return False
3495         else:
3496             try:
3497                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3498                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3499                     descfile.write(ie_result['description'])
3500             except (OSError, IOError):
3501                 self.report_error(f'Cannot write {label} description file {descfn}')
3502                 return None
3503         return True
3504
3505     def _write_subtitles(self, info_dict, filename):
3506         ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3507         ret = []
3508         subtitles = info_dict.get('requested_subtitles')
3509         if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3510             # subtitles download errors are already managed as troubles in relevant IE
3511             # that way it will silently go on when used with unsupporting IE
3512             return ret
3513
3514         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3515         if not sub_filename_base:
3516             self.to_screen('[info] Skipping writing video subtitles')
3517             return ret
3518         for sub_lang, sub_info in subtitles.items():
3519             sub_format = sub_info['ext']
3520             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3521             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3522             if not self.params.get('overwrites', True) and os.path.exists(sub_filename):
3523                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3524                 sub_info['filepath'] = sub_filename
3525                 ret.append((sub_filename, sub_filename_final))
3526                 continue
3527
3528             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3529             if sub_info.get('data') is not None:
3530                 try:
3531                     # Use newline='' to prevent conversion of newline characters
3532                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
3533                     with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3534                         subfile.write(sub_info['data'])
3535                     sub_info['filepath'] = sub_filename
3536                     ret.append((sub_filename, sub_filename_final))
3537                     continue
3538                 except (OSError, IOError):
3539                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
3540                     return None
3541
3542             try:
3543                 sub_copy = sub_info.copy()
3544                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3545                 self.dl(sub_filename, sub_copy, subtitle=True)
3546                 sub_info['filepath'] = sub_filename
3547                 ret.append((sub_filename, sub_filename_final))
3548             except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3549                 self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
3550                 continue
3551         return ret
3552
3553     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3554         ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3555         write_all = self.params.get('write_all_thumbnails', False)
3556         thumbnails, ret = [], []
3557         if write_all or self.params.get('writethumbnail', False):
3558             thumbnails = info_dict.get('thumbnails') or []
3559         multiple = write_all and len(thumbnails) > 1
3560
3561         if thumb_filename_base is None:
3562             thumb_filename_base = filename
3563         if thumbnails and not thumb_filename_base:
3564             self.write_debug(f'Skipping writing {label} thumbnail')
3565             return ret
3566
3567         for t in thumbnails[::-1]:
3568             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3569             thumb_display_id = f'{label} thumbnail' + (f' {t["id"]}' if multiple else '')
3570             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3571             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3572
3573             if not self.params.get('overwrites', True) and os.path.exists(thumb_filename):
3574                 ret.append((thumb_filename, thumb_filename_final))
3575                 t['filepath'] = thumb_filename
3576                 self.to_screen(f'[info] {thumb_display_id.title()} is already present')
3577             else:
3578                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3579                 try:
3580                     uf = self.urlopen(t['url'])
3581                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3582                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3583                         shutil.copyfileobj(uf, thumbf)
3584                     ret.append((thumb_filename, thumb_filename_final))
3585                     t['filepath'] = thumb_filename
3586                 except network_exceptions as err:
3587                     self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3588             if ret and not write_all:
3589                 break
3590         return ret