yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import functools
  13 import io
  14 import itertools
  15 import json
  16 import locale
  17 import operator
  18 import os
  19 import platform
  20 import re
  21 import shutil
  22 import subprocess
  23 import sys
  24 import tempfile
  25 import time
  26 import tokenize
  27 import traceback
  28 import random
  29 import unicodedata
  30
  31 from string import ascii_letters
  32
  33 from .compat import (
  34     compat_basestring,
  35     compat_get_terminal_size,
  36     compat_kwargs,
  37     compat_numeric_types,
  38     compat_os_name,
  39     compat_pycrypto_AES,
  40     compat_shlex_quote,
  41     compat_str,
  42     compat_tokenize_tokenize,
  43     compat_urllib_error,
  44     compat_urllib_request,
  45     compat_urllib_request_DataHandler,
  46     windows_enable_vt_mode,
  47 )
  48 from .cookies import load_cookies
  49 from .utils import (
  50     age_restricted,
  51     args_to_str,
  52     ContentTooShortError,
  53     date_from_str,
  54     DateRange,
  55     DEFAULT_OUTTMPL,
  56     determine_ext,
  57     determine_protocol,
  58     DOT_DESKTOP_LINK_TEMPLATE,
  59     DOT_URL_LINK_TEMPLATE,
  60     DOT_WEBLOC_LINK_TEMPLATE,
  61     DownloadError,
  62     encode_compat_str,
  63     encodeFilename,
  64     EntryNotInPlaylist,
  65     error_to_compat_str,
  66     ExistingVideoReached,
  67     expand_path,
  68     ExtractorError,
  69     float_or_none,
  70     format_bytes,
  71     format_field,
  72     formatSeconds,
  73     GeoRestrictedError,
  74     HEADRequest,
  75     int_or_none,
  76     iri_to_uri,
  77     ISO3166Utils,
  78     LazyList,
  79     locked_file,
  80     make_dir,
  81     make_HTTPS_handler,
  82     MaxDownloadsReached,
  83     network_exceptions,
  84     orderedSet,
  85     OUTTMPL_TYPES,
  86     PagedList,
  87     parse_filesize,
  88     PerRequestProxyHandler,
  89     platform_name,
  90     PostProcessingError,
  91     preferredencoding,
  92     prepend_extension,
  93     process_communicate_or_kill,
  94     register_socks_protocols,
  95     RejectedVideoReached,
  96     render_table,
  97     replace_extension,
  98     SameFileError,
  99     sanitize_filename,
 100     sanitize_path,
 101     sanitize_url,
 102     sanitized_Request,
 103     std_headers,
 104     STR_FORMAT_RE_TMPL,
 105     STR_FORMAT_TYPES,
 106     str_or_none,
 107     strftime_or_none,
 108     subtitles_filename,
 109     supports_terminal_sequences,
 110     TERMINAL_SEQUENCES,
 111     ThrottledDownload,
 112     to_high_limit_path,
 113     traverse_obj,
 114     try_get,
 115     UnavailableVideoError,
 116     url_basename,
 117     variadic,
 118     version_tuple,
 119     write_json_file,
 120     write_string,
 121     YoutubeDLCookieProcessor,
 122     YoutubeDLHandler,
 123     YoutubeDLRedirectHandler,
 124 )
 125 from .cache import Cache
 126 from .extractor import (
 127     gen_extractor_classes,
 128     get_info_extractor,
 129     _LAZY_LOADER,
 130     _PLUGIN_CLASSES as plugin_extractors
 131 )
 132 from .extractor.openload import PhantomJSwrapper
 133 from .downloader import (
 134     FFmpegFD,
 135     get_suitable_downloader,
 136     shorten_protocol_name
 137 )
 138 from .downloader.rtmp import rtmpdump_version
 139 from .postprocessor import (
 140     get_postprocessor,
 141     EmbedThumbnailPP,
 142     FFmpegFixupDurationPP,
 143     FFmpegFixupM3u8PP,
 144     FFmpegFixupM4aPP,
 145     FFmpegFixupStretchedPP,
 146     FFmpegFixupTimestampPP,
 147     FFmpegMergerPP,
 148     FFmpegPostProcessor,
 149     MoveFilesAfterDownloadPP,
 150     _PLUGIN_CLASSES as plugin_postprocessors
 151 )
 152 from .update import detect_variant
 153 from .version import __version__
 154
 155 if compat_os_name == 'nt':
 156     import ctypes
 157
 158
 159 class YoutubeDL(object):
 160     """YoutubeDL class.
 161
 162     YoutubeDL objects are the ones responsible of downloading the
 163     actual video file and writing it to disk if the user has requested
 164     it, among some other tasks. In most cases there should be one per
 165     program. As, given a video URL, the downloader doesn't know how to
 166     extract all the needed information, task that InfoExtractors do, it
 167     has to pass the URL to one of them.
 168
 169     For this, YoutubeDL objects have a method that allows
 170     InfoExtractors to be registered in a given order. When it is passed
 171     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 172     finds that reports being able to handle it. The InfoExtractor extracts
 173     all the information about the video or videos the URL refers to, and
 174     YoutubeDL process the extracted information, possibly using a File
 175     Downloader to download the video.
 176
 177     YoutubeDL objects accept a lot of parameters. In order not to saturate
 178     the object constructor with arguments, it receives a dictionary of
 179     options instead. These options are available through the params
 180     attribute for the InfoExtractors to use. The YoutubeDL also
 181     registers itself as the downloader in charge for the InfoExtractors
 182     that are added to it, so this is a "mutual registration".
 183
 184     Available options:
 185
 186     username:          Username for authentication purposes.
 187     password:          Password for authentication purposes.
 188     videopassword:     Password for accessing a video.
 189     ap_mso:            Adobe Pass multiple-system operator identifier.
 190     ap_username:       Multiple-system operator account username.
 191     ap_password:       Multiple-system operator account password.
 192     usenetrc:          Use netrc for authentication instead.
 193     verbose:           Print additional info to stdout.
 194     quiet:             Do not print messages to stdout.
 195     no_warnings:       Do not print out anything for warnings.
 196     forceprint:        A list of templates to force print
 197     forceurl:          Force printing final URL. (Deprecated)
 198     forcetitle:        Force printing title. (Deprecated)
 199     forceid:           Force printing ID. (Deprecated)
 200     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 201     forcedescription:  Force printing description. (Deprecated)
 202     forcefilename:     Force printing final filename. (Deprecated)
 203     forceduration:     Force printing duration. (Deprecated)
 204     forcejson:         Force printing info_dict as JSON.
 205     dump_single_json:  Force printing the info_dict of the whole playlist
 206                        (or video) as a single JSON line.
 207     force_write_download_archive: Force writing download archive regardless
 208                        of 'skip_download' or 'simulate'.
 209     simulate:          Do not download the video files. If unset (or None),
 210                        simulate only if listsubtitles, listformats or list_thumbnails is used
 211     format:            Video format code. see "FORMAT SELECTION" for more details.
 212     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 213     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 214                        extracting metadata even if the video is not actually
 215                        available for download (experimental)
 216     format_sort:       How to sort the video formats. see "Sorting Formats"
 217                        for more details.
 218     format_sort_force: Force the given format_sort. see "Sorting Formats"
 219                        for more details.
 220     allow_multiple_video_streams:   Allow multiple video streams to be merged
 221                        into a single file
 222     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 223                        into a single file
 224     check_formats      Whether to test if the formats are downloadable.
 225                        Can be True (check all), False (check none)
 226                        or None (check only if requested by extractor)
 227     paths:             Dictionary of output paths. The allowed keys are 'home'
 228                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 229     outtmpl:           Dictionary of templates for output names. Allowed keys
 230                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 231                        For compatibility with youtube-dl, a single string can also be used
 232     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 233     restrictfilenames: Do not allow "&" and spaces in file names
 234     trim_file_name:    Limit length of filename (extension excluded)
 235     windowsfilenames:  Force the filenames to be windows compatible
 236     ignoreerrors:      Do not stop on download/postprocessing errors.
 237                        Can be 'only_download' to ignore only download errors.
 238                        Default is 'only_download' for CLI, but False for API
 239     skip_playlist_after_errors: Number of allowed failures until the rest of
 240                        the playlist is skipped
 241     force_generic_extractor: Force downloader to use the generic extractor
 242     overwrites:        Overwrite all video and metadata files if True,
 243                        overwrite only non-video files if None
 244                        and don't overwrite any file if False
 245                        For compatibility with youtube-dl,
 246                        "nooverwrites" may also be used instead
 247     playliststart:     Playlist item to start at.
 248     playlistend:       Playlist item to end at.
 249     playlist_items:    Specific indices of playlist to download.
 250     playlistreverse:   Download playlist items in reverse order.
 251     playlistrandom:    Download playlist items in random order.
 252     matchtitle:        Download only matching titles.
 253     rejecttitle:       Reject downloads for matching titles.
 254     logger:            Log messages to a logging.Logger instance.
 255     logtostderr:       Log messages to stderr instead of stdout.
 256     consoletitle:       Display progress in console window's titlebar.
 257     writedescription:  Write the video description to a .description file
 258     writeinfojson:     Write the video description to a .info.json file
 259     clean_infojson:    Remove private fields from the infojson
 260     getcomments:       Extract video comments. This will not be written to disk
 261                        unless writeinfojson is also given
 262     writeannotations:  Write the video annotations to a .annotations.xml file
 263     writethumbnail:    Write the thumbnail image to a file
 264     allow_playlist_files: Whether to write playlists' description, infojson etc
 265                        also to disk when using the 'write*' options
 266     write_all_thumbnails:  Write all thumbnail formats to files
 267     writelink:         Write an internet shortcut file, depending on the
 268                        current platform (.url/.webloc/.desktop)
 269     writeurllink:      Write a Windows internet shortcut file (.url)
 270     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 271     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 272     writesubtitles:    Write the video subtitles to a file
 273     writeautomaticsub: Write the automatically generated subtitles to a file
 274     allsubtitles:      Deprecated - Use subtitleslangs = ['all']
 275                        Downloads all the subtitles of the video
 276                        (requires writesubtitles or writeautomaticsub)
 277     listsubtitles:     Lists all available subtitles for the video
 278     subtitlesformat:   The format code for subtitles
 279     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 280                        The list may contain "all" to refer to all the available
 281                        subtitles. The language can be prefixed with a "-" to
 282                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 283     keepvideo:         Keep the video file after post-processing
 284     daterange:         A DateRange object, download only if the upload_date is in the range.
 285     skip_download:     Skip the actual download of the video file
 286     cachedir:          Location of the cache files in the filesystem.
 287                        False to disable filesystem cache.
 288     noplaylist:        Download single video instead of a playlist if in doubt.
 289     age_limit:         An integer representing the user's age in years.
 290                        Unsuitable videos for the given age are skipped.
 291     min_views:         An integer representing the minimum view count the video
 292                        must have in order to not be skipped.
 293                        Videos without view count information are always
 294                        downloaded. None for no limit.
 295     max_views:         An integer representing the maximum view count.
 296                        Videos that are more popular than that are not
 297                        downloaded.
 298                        Videos without view count information are always
 299                        downloaded. None for no limit.
 300     download_archive:  File name of a file where all downloads are recorded.
 301                        Videos already present in the file are not downloaded
 302                        again.
 303     break_on_existing: Stop the download process after attempting to download a
 304                        file that is in the archive.
 305     break_on_reject:   Stop the download process when encountering a video that
 306                        has been filtered out.
 307     cookiefile:        File name where cookies should be read from and dumped to
 308     cookiesfrombrowser: A tuple containing the name of the browser and the profile
 309                        name/path from where cookies are loaded.
 310                        Eg: ('chrome', ) or (vivaldi, 'default')
 311     nocheckcertificate:Do not verify SSL certificates
 312     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 313                        At the moment, this is only supported by YouTube.
 314     proxy:             URL of the proxy server to use
 315     geo_verification_proxy:  URL of the proxy to use for IP address verification
 316                        on geo-restricted sites.
 317     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 318     bidi_workaround:   Work around buggy terminals without bidirectional text
 319                        support, using fridibi
 320     debug_printtraffic:Print out sent and received HTTP traffic
 321     include_ads:       Download ads as well
 322     default_search:    Prepend this string if an input url is not valid.
 323                        'auto' for elaborate guessing
 324     encoding:          Use this encoding instead of the system-specified.
 325     extract_flat:      Do not resolve URLs, return the immediate result.
 326                        Pass in 'in_playlist' to only show this behavior for
 327                        playlist items.
 328     postprocessors:    A list of dictionaries, each with an entry
 329                        * key:  The name of the postprocessor. See
 330                                yt_dlp/postprocessor/__init__.py for a list.
 331                        * when: When to run the postprocessor. Can be one of
 332                                pre_process|before_dl|post_process|after_move.
 333                                Assumed to be 'post_process' if not given
 334     post_hooks:        Deprecated - Register a custom postprocessor instead
 335                        A list of functions that get called as the final step
 336                        for each video file, after all postprocessors have been
 337                        called. The filename will be passed as the only argument.
 338     progress_hooks:    A list of functions that get called on download
 339                        progress, with a dictionary with the entries
 340                        * status: One of "downloading", "error", or "finished".
 341                                  Check this first and ignore unknown values.
 342                        * info_dict: The extracted info_dict
 343
 344                        If status is one of "downloading", or "finished", the
 345                        following properties may also be present:
 346                        * filename: The final filename (always present)
 347                        * tmpfilename: The filename we're currently writing to
 348                        * downloaded_bytes: Bytes on disk
 349                        * total_bytes: Size of the whole file, None if unknown
 350                        * total_bytes_estimate: Guess of the eventual file size,
 351                                                None if unavailable.
 352                        * elapsed: The number of seconds since download started.
 353                        * eta: The estimated time in seconds, None if unknown
 354                        * speed: The download speed in bytes/second, None if
 355                                 unknown
 356                        * fragment_index: The counter of the currently
 357                                          downloaded video fragment.
 358                        * fragment_count: The number of fragments (= individual
 359                                          files that will be merged)
 360
 361                        Progress hooks are guaranteed to be called at least once
 362                        (with status "finished") if the download is successful.
 363     postprocessor_hooks:  A list of functions that get called on postprocessing
 364                        progress, with a dictionary with the entries
 365                        * status: One of "started", "processing", or "finished".
 366                                  Check this first and ignore unknown values.
 367                        * postprocessor: Name of the postprocessor
 368                        * info_dict: The extracted info_dict
 369
 370                        Progress hooks are guaranteed to be called at least twice
 371                        (with status "started" and "finished") if the processing is successful.
 372     merge_output_format: Extension to use when merging formats.
 373     final_ext:         Expected final extension; used to detect when the file was
 374                        already downloaded and converted. "merge_output_format" is
 375                        replaced by this extension when given
 376     fixup:             Automatically correct known faults of the file.
 377                        One of:
 378                        - "never": do nothing
 379                        - "warn": only emit a warning
 380                        - "detect_or_warn": check whether we can do anything
 381                                            about it, warn otherwise (default)
 382     source_address:    Client-side IP address to bind to.
 383     call_home:         Boolean, true iff we are allowed to contact the
 384                        yt-dlp servers for debugging. (BROKEN)
 385     sleep_interval_requests: Number of seconds to sleep between requests
 386                        during extraction
 387     sleep_interval:    Number of seconds to sleep before each download when
 388                        used alone or a lower bound of a range for randomized
 389                        sleep before each download (minimum possible number
 390                        of seconds to sleep) when used along with
 391                        max_sleep_interval.
 392     max_sleep_interval:Upper bound of a range for randomized sleep before each
 393                        download (maximum possible number of seconds to sleep).
 394                        Must only be used along with sleep_interval.
 395                        Actual sleep time will be a random float from range
 396                        [sleep_interval; max_sleep_interval].
 397     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 398     listformats:       Print an overview of available video formats and exit.
 399     list_thumbnails:   Print a table of all thumbnails and exit.
 400     match_filter:      A function that gets called with the info_dict of
 401                        every video.
 402                        If it returns a message, the video is ignored.
 403                        If it returns None, the video is downloaded.
 404                        match_filter_func in utils.py is one example for this.
 405     no_color:          Do not emit color codes in output.
 406     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 407                        HTTP header
 408     geo_bypass_country:
 409                        Two-letter ISO 3166-2 country code that will be used for
 410                        explicit geographic restriction bypassing via faking
 411                        X-Forwarded-For HTTP header
 412     geo_bypass_ip_block:
 413                        IP range in CIDR notation that will be used similarly to
 414                        geo_bypass_country
 415
 416     The following options determine which downloader is picked:
 417     external_downloader: A dictionary of protocol keys and the executable of the
 418                        external downloader to use for it. The allowed protocols
 419                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 420                        Set the value to 'native' to use the native downloader
 421     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 422                        or {'m3u8': 'ffmpeg'} instead.
 423                        Use the native HLS downloader instead of ffmpeg/avconv
 424                        if True, otherwise use ffmpeg/avconv if False, otherwise
 425                        use downloader suggested by extractor if None.
 426     compat_opts:       Compatibility options. See "Differences in default behavior".
 427                        The following options do not work when used through the API:
 428                        filename, abort-on-error, multistreams, no-live-chat, format-sort
 429                        no-clean-infojson, no-playlist-metafiles, no-keep-subs.
 430                        Refer __init__.py for their implementation
 431     progress_template: Dictionary of templates for progress outputs.
 432                        Allowed keys are 'download', 'postprocess',
 433                        'download-title' (console title) and 'postprocess-title'.
 434                        The template is mapped on a dictionary with keys 'progress' and 'info'
 435
 436     The following parameters are not used by YoutubeDL itself, they are used by
 437     the downloader (see yt_dlp/downloader/common.py):
 438     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 439     max_filesize, test, noresizebuffer, retries, fragment_retries, continuedl,
 440     noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
 441     external_downloader_args.
 442
 443     The following options are used by the post processors:
 444     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 445                        otherwise prefer ffmpeg. (avconv support is deprecated)
 446     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 447                        to the binary or its containing directory.
 448     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 449                        and a list of additional command-line arguments for the
 450                        postprocessor/executable. The dict can also have "PP+EXE" keys
 451                        which are used when the given exe is used by the given PP.
 452                        Use 'default' as the name for arguments to passed to all PP
 453                        For compatibility with youtube-dl, a single list of args
 454                        can also be used
 455
 456     The following options are used by the extractors:
 457     extractor_retries: Number of times to retry for known errors
 458     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 459     hls_split_discontinuity: Split HLS playlists to different formats at
 460                        discontinuities such as ad breaks (default: False)
 461     extractor_args:    A dictionary of arguments to be passed to the extractors.
 462                        See "EXTRACTOR ARGUMENTS" for details.
 463                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 464     youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
 465                        If True (default), DASH manifests and related
 466                        data will be downloaded and processed by extractor.
 467                        You can reduce network I/O by disabling it if you don't
 468                        care about DASH. (only for youtube)
 469     youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
 470                        If True (default), HLS manifests and related
 471                        data will be downloaded and processed by extractor.
 472                        You can reduce network I/O by disabling it if you don't
 473                        care about HLS. (only for youtube)
 474     """
 475
 476     _NUMERIC_FIELDS = set((
 477         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 478         'timestamp', 'release_timestamp',
 479         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 480         'average_rating', 'comment_count', 'age_limit',
 481         'start_time', 'end_time',
 482         'chapter_number', 'season_number', 'episode_number',
 483         'track_number', 'disc_number', 'release_year',
 484     ))
 485
 486     params = None
 487     _ies = {}
 488     _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 489     _printed_messages = set()
 490     _first_webpage_request = True
 491     _download_retcode = None
 492     _num_downloads = None
 493     _playlist_level = 0
 494     _playlist_urls = set()
 495     _screen_file = None
 496
 497     def __init__(self, params=None, auto_init=True):
 498         """Create a FileDownloader object with the given options."""
 499         if params is None:
 500             params = {}
 501         self._ies = {}
 502         self._ies_instances = {}
 503         self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 504         self._printed_messages = set()
 505         self._first_webpage_request = True
 506         self._post_hooks = []
 507         self._progress_hooks = []
 508         self._postprocessor_hooks = []
 509         self._download_retcode = 0
 510         self._num_downloads = 0
 511         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 512         self._err_file = sys.stderr
 513         self.params = params
 514         self.cache = Cache(self)
 515
 516         windows_enable_vt_mode()
 517         self.params['no_color'] = self.params.get('no_color') or not supports_terminal_sequences(self._err_file)
 518
 519         if sys.version_info < (3, 6):
 520             self.report_warning(
 521                 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 522
 523         if self.params.get('allow_unplayable_formats'):
 524             self.report_warning(
 525                 f'You have asked for {self._color_text("unplayable formats", "blue")} to be listed/downloaded. '
 526                 'This is a developer option intended for debugging. \n'
 527                 '         If you experience any issues while using this option, '
 528                 f'{self._color_text("DO NOT", "red")} open a bug report')
 529
 530         def check_deprecated(param, option, suggestion):
 531             if self.params.get(param) is not None:
 532                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 533                 return True
 534             return False
 535
 536         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 537             if self.params.get('geo_verification_proxy') is None:
 538                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 539
 540         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 541         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 542         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 543
 544         for msg in self.params.get('warnings', []):
 545             self.report_warning(msg)
 546
 547         if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
 548             # nooverwrites was unnecessarily changed to overwrites
 549             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 550             # This ensures compatibility with both keys
 551             self.params['overwrites'] = not self.params['nooverwrites']
 552         elif self.params.get('overwrites') is None:
 553             self.params.pop('overwrites', None)
 554         else:
 555             self.params['nooverwrites'] = not self.params['overwrites']
 556
 557         if params.get('bidi_workaround', False):
 558             try:
 559                 import pty
 560                 master, slave = pty.openpty()
 561                 width = compat_get_terminal_size().columns
 562                 if width is None:
 563                     width_args = []
 564                 else:
 565                     width_args = ['-w', str(width)]
 566                 sp_kwargs = dict(
 567                     stdin=subprocess.PIPE,
 568                     stdout=slave,
 569                     stderr=self._err_file)
 570                 try:
 571                     self._output_process = subprocess.Popen(
 572                         ['bidiv'] + width_args, **sp_kwargs
 573                     )
 574                 except OSError:
 575                     self._output_process = subprocess.Popen(
 576                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 577                 self._output_channel = os.fdopen(master, 'rb')
 578             except OSError as ose:
 579                 if ose.errno == errno.ENOENT:
 580                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 581                 else:
 582                     raise
 583
 584         if (sys.platform != 'win32'
 585                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 586                 and not params.get('restrictfilenames', False)):
 587             # Unicode filesystem API will throw errors (#1474, #13027)
 588             self.report_warning(
 589                 'Assuming --restrict-filenames since file system encoding '
 590                 'cannot encode all characters. '
 591                 'Set the LC_ALL environment variable to fix this.')
 592             self.params['restrictfilenames'] = True
 593
 594         self.outtmpl_dict = self.parse_outtmpl()
 595
 596         # Creating format selector here allows us to catch syntax errors before the extraction
 597         self.format_selector = (
 598             None if self.params.get('format') is None
 599             else self.build_format_selector(self.params['format']))
 600
 601         self._setup_opener()
 602
 603         def preload_download_archive(fn):
 604             """Preload the archive, if any is specified"""
 605             if fn is None:
 606                 return False
 607             self.write_debug('Loading archive file %r\n' % fn)
 608             try:
 609                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 610                     for line in archive_file:
 611                         self.archive.add(line.strip())
 612             except IOError as ioe:
 613                 if ioe.errno != errno.ENOENT:
 614                     raise
 615                 return False
 616             return True
 617
 618         self.archive = set()
 619         preload_download_archive(self.params.get('download_archive'))
 620
 621         if auto_init:
 622             self.print_debug_header()
 623             self.add_default_info_extractors()
 624
 625         for pp_def_raw in self.params.get('postprocessors', []):
 626             pp_def = dict(pp_def_raw)
 627             when = pp_def.pop('when', 'post_process')
 628             pp_class = get_postprocessor(pp_def.pop('key'))
 629             pp = pp_class(self, **compat_kwargs(pp_def))
 630             self.add_post_processor(pp, when=when)
 631
 632         for ph in self.params.get('post_hooks', []):
 633             self.add_post_hook(ph)
 634
 635         for ph in self.params.get('progress_hooks', []):
 636             self.add_progress_hook(ph)
 637
 638         register_socks_protocols()
 639
 640     def warn_if_short_id(self, argv):
 641         # short YouTube ID starting with dash?
 642         idxs = [
 643             i for i, a in enumerate(argv)
 644             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 645         if idxs:
 646             correct_argv = (
 647                 ['yt-dlp']
 648                 + [a for i, a in enumerate(argv) if i not in idxs]
 649                 + ['--'] + [argv[i] for i in idxs]
 650             )
 651             self.report_warning(
 652                 'Long argument string detected. '
 653                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 654                 args_to_str(correct_argv))
 655
 656     def add_info_extractor(self, ie):
 657         """Add an InfoExtractor object to the end of the list."""
 658         ie_key = ie.ie_key()
 659         self._ies[ie_key] = ie
 660         if not isinstance(ie, type):
 661             self._ies_instances[ie_key] = ie
 662             ie.set_downloader(self)
 663
 664     def _get_info_extractor_class(self, ie_key):
 665         ie = self._ies.get(ie_key)
 666         if ie is None:
 667             ie = get_info_extractor(ie_key)
 668             self.add_info_extractor(ie)
 669         return ie
 670
 671     def get_info_extractor(self, ie_key):
 672         """
 673         Get an instance of an IE with name ie_key, it will try to get one from
 674         the _ies list, if there's no instance it will create a new one and add
 675         it to the extractor list.
 676         """
 677         ie = self._ies_instances.get(ie_key)
 678         if ie is None:
 679             ie = get_info_extractor(ie_key)()
 680             self.add_info_extractor(ie)
 681         return ie
 682
 683     def add_default_info_extractors(self):
 684         """
 685         Add the InfoExtractors returned by gen_extractors to the end of the list
 686         """
 687         for ie in gen_extractor_classes():
 688             self.add_info_extractor(ie)
 689
 690     def add_post_processor(self, pp, when='post_process'):
 691         """Add a PostProcessor object to the end of the chain."""
 692         self._pps[when].append(pp)
 693         pp.set_downloader(self)
 694
 695     def add_post_hook(self, ph):
 696         """Add the post hook"""
 697         self._post_hooks.append(ph)
 698
 699     def add_progress_hook(self, ph):
 700         """Add the download progress hook"""
 701         self._progress_hooks.append(ph)
 702
 703     def add_postprocessor_hook(self, ph):
 704         """Add the postprocessing progress hook"""
 705         self._postprocessor_hooks.append(ph)
 706
 707     def _bidi_workaround(self, message):
 708         if not hasattr(self, '_output_channel'):
 709             return message
 710
 711         assert hasattr(self, '_output_process')
 712         assert isinstance(message, compat_str)
 713         line_count = message.count('\n') + 1
 714         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 715         self._output_process.stdin.flush()
 716         res = ''.join(self._output_channel.readline().decode('utf-8')
 717                       for _ in range(line_count))
 718         return res[:-len('\n')]
 719
 720     def _write_string(self, message, out=None, only_once=False):
 721         if only_once:
 722             if message in self._printed_messages:
 723                 return
 724             self._printed_messages.add(message)
 725         write_string(message, out=out, encoding=self.params.get('encoding'))
 726
 727     def to_stdout(self, message, skip_eol=False, quiet=False):
 728         """Print message to stdout"""
 729         if self.params.get('logger'):
 730             self.params['logger'].debug(message)
 731         elif not quiet or self.params.get('verbose'):
 732             self._write_string(
 733                 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 734                 self._err_file if quiet else self._screen_file)
 735
 736     def to_stderr(self, message, only_once=False):
 737         """Print message to stderr"""
 738         assert isinstance(message, compat_str)
 739         if self.params.get('logger'):
 740             self.params['logger'].error(message)
 741         else:
 742             self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
 743
 744     def to_console_title(self, message):
 745         if not self.params.get('consoletitle', False):
 746             return
 747         if compat_os_name == 'nt':
 748             if ctypes.windll.kernel32.GetConsoleWindow():
 749                 # c_wchar_p() might not be necessary if `message` is
 750                 # already of type unicode()
 751                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 752         elif 'TERM' in os.environ:
 753             self._write_string('\033]0;%s\007' % message, self._screen_file)
 754
 755     def save_console_title(self):
 756         if not self.params.get('consoletitle', False):
 757             return
 758         if self.params.get('simulate'):
 759             return
 760         if compat_os_name != 'nt' and 'TERM' in os.environ:
 761             # Save the title on stack
 762             self._write_string('\033[22;0t', self._screen_file)
 763
 764     def restore_console_title(self):
 765         if not self.params.get('consoletitle', False):
 766             return
 767         if self.params.get('simulate'):
 768             return
 769         if compat_os_name != 'nt' and 'TERM' in os.environ:
 770             # Restore the title from stack
 771             self._write_string('\033[23;0t', self._screen_file)
 772
 773     def __enter__(self):
 774         self.save_console_title()
 775         return self
 776
 777     def __exit__(self, *args):
 778         self.restore_console_title()
 779
 780         if self.params.get('cookiefile') is not None:
 781             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 782
 783     def trouble(self, message=None, tb=None):
 784         """Determine action to take when a download problem appears.
 785
 786         Depending on if the downloader has been configured to ignore
 787         download errors or not, this method may throw an exception or
 788         not when errors are found, after printing the message.
 789
 790         tb, if given, is additional traceback information.
 791         """
 792         if message is not None:
 793             self.to_stderr(message)
 794         if self.params.get('verbose'):
 795             if tb is None:
 796                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 797                     tb = ''
 798                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 799                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 800                     tb += encode_compat_str(traceback.format_exc())
 801                 else:
 802                     tb_data = traceback.format_list(traceback.extract_stack())
 803                     tb = ''.join(tb_data)
 804             if tb:
 805                 self.to_stderr(tb)
 806         if not self.params.get('ignoreerrors'):
 807             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 808                 exc_info = sys.exc_info()[1].exc_info
 809             else:
 810                 exc_info = sys.exc_info()
 811             raise DownloadError(message, exc_info)
 812         self._download_retcode = 1
 813
 814     def to_screen(self, message, skip_eol=False):
 815         """Print message to stdout if not in quiet mode"""
 816         self.to_stdout(
 817             message, skip_eol, quiet=self.params.get('quiet', False))
 818
 819     def _color_text(self, text, color):
 820         if self.params.get('no_color'):
 821             return text
 822         return f'{TERMINAL_SEQUENCES[color.upper()]}{text}{TERMINAL_SEQUENCES["RESET_STYLE"]}'
 823
 824     def report_warning(self, message, only_once=False):
 825         '''
 826         Print the message to stderr, it will be prefixed with 'WARNING:'
 827         If stderr is a tty file the 'WARNING:' will be colored
 828         '''
 829         if self.params.get('logger') is not None:
 830             self.params['logger'].warning(message)
 831         else:
 832             if self.params.get('no_warnings'):
 833                 return
 834             self.to_stderr(f'{self._color_text("WARNING:", "yellow")} {message}', only_once)
 835
 836     def report_error(self, message, tb=None):
 837         '''
 838         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 839         in red if stderr is a tty file.
 840         '''
 841         self.trouble(f'{self._color_text("ERROR:", "red")} {message}', tb)
 842
 843     def write_debug(self, message, only_once=False):
 844         '''Log debug message or Print message to stderr'''
 845         if not self.params.get('verbose', False):
 846             return
 847         message = '[debug] %s' % message
 848         if self.params.get('logger'):
 849             self.params['logger'].debug(message)
 850         else:
 851             self.to_stderr(message, only_once)
 852
 853     def report_file_already_downloaded(self, file_name):
 854         """Report file has already been fully downloaded."""
 855         try:
 856             self.to_screen('[download] %s has already been downloaded' % file_name)
 857         except UnicodeEncodeError:
 858             self.to_screen('[download] The file has already been downloaded')
 859
 860     def report_file_delete(self, file_name):
 861         """Report that existing file will be deleted."""
 862         try:
 863             self.to_screen('Deleting existing file %s' % file_name)
 864         except UnicodeEncodeError:
 865             self.to_screen('Deleting existing file')
 866
 867     def raise_no_formats(self, info, forced=False):
 868         has_drm = info.get('__has_drm')
 869         msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
 870         expected = self.params.get('ignore_no_formats_error')
 871         if forced or not expected:
 872             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
 873                                  expected=has_drm or expected)
 874         else:
 875             self.report_warning(msg)
 876
 877     def parse_outtmpl(self):
 878         outtmpl_dict = self.params.get('outtmpl', {})
 879         if not isinstance(outtmpl_dict, dict):
 880             outtmpl_dict = {'default': outtmpl_dict}
 881         outtmpl_dict.update({
 882             k: v for k, v in DEFAULT_OUTTMPL.items()
 883             if outtmpl_dict.get(k) is None})
 884         for key, val in outtmpl_dict.items():
 885             if isinstance(val, bytes):
 886                 self.report_warning(
 887                     'Parameter outtmpl is bytes, but should be a unicode string. '
 888                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 889         return outtmpl_dict
 890
 891     def get_output_path(self, dir_type='', filename=None):
 892         paths = self.params.get('paths', {})
 893         assert isinstance(paths, dict)
 894         path = os.path.join(
 895             expand_path(paths.get('home', '').strip()),
 896             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
 897             filename or '')
 898
 899         # Temporary fix for #4787
 900         # 'Treat' all problem characters by passing filename through preferredencoding
 901         # to workaround encoding issues with subprocess on python2 @ Windows
 902         if sys.version_info < (3, 0) and sys.platform == 'win32':
 903             path = encodeFilename(path, True).decode(preferredencoding())
 904         return sanitize_path(path, force=self.params.get('windowsfilenames'))
 905
 906     @staticmethod
 907     def _outtmpl_expandpath(outtmpl):
 908         # expand_path translates '%%' into '%' and '$$' into '$'
 909         # correspondingly that is not what we want since we need to keep
 910         # '%%' intact for template dict substitution step. Working around
 911         # with boundary-alike separator hack.
 912         sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 913         outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 914
 915         # outtmpl should be expand_path'ed before template dict substitution
 916         # because meta fields may contain env variables we don't want to
 917         # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 918         # title "Hello $PATH", we don't want `$PATH` to be expanded.
 919         return expand_path(outtmpl).replace(sep, '')
 920
 921     @staticmethod
 922     def escape_outtmpl(outtmpl):
 923         ''' Escape any remaining strings like %s, %abc% etc. '''
 924         return re.sub(
 925             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
 926             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
 927             outtmpl)
 928
 929     @classmethod
 930     def validate_outtmpl(cls, outtmpl):
 931         ''' @return None or Exception object '''
 932         outtmpl = re.sub(
 933             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBU]'),
 934             lambda mobj: f'{mobj.group(0)[:-1]}s',
 935             cls._outtmpl_expandpath(outtmpl))
 936         try:
 937             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
 938             return None
 939         except ValueError as err:
 940             return err
 941
 942     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
 943         """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict """
 944         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
 945
 946         info_dict = dict(info_dict)  # Do not sanitize so as not to consume LazyList
 947         for key in ('__original_infodict', '__postprocessors'):
 948             info_dict.pop(key, None)
 949         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
 950             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
 951             if info_dict.get('duration', None) is not None
 952             else None)
 953         info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
 954         if info_dict.get('resolution') is None:
 955             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
 956
 957         # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
 958         # of %(field)s to %(field)0Nd for backward compatibility
 959         field_size_compat_map = {
 960             'playlist_index': len(str(info_dict.get('_last_playlist_index') or '')),
 961             'playlist_autonumber': len(str(info_dict.get('n_entries') or '')),
 962             'autonumber': self.params.get('autonumber_size') or 5,
 963         }
 964
 965         TMPL_DICT = {}
 966         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBU]'))
 967         MATH_FUNCTIONS = {
 968             '+': float.__add__,
 969             '-': float.__sub__,
 970         }
 971         # Field is of the form key1.key2...
 972         # where keys (except first) can be string, int or slice
 973         FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
 974         MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
 975         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
 976         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
 977             (?P<negate>-)?
 978             (?P<fields>{field})
 979             (?P<maths>(?:{math_op}{math_field})*)
 980             (?:>(?P<strf_format>.+?))?
 981             (?P<alternate>(?<!\\),[^|)]+)?
 982             (?:\|(?P<default>.*?))?
 983             $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
 984
 985         def _traverse_infodict(k):
 986             k = k.split('.')
 987             if k[0] == '':
 988                 k.pop(0)
 989             return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
 990
 991         def get_value(mdict):
 992             # Object traversal
 993             value = _traverse_infodict(mdict['fields'])
 994             # Negative
 995             if mdict['negate']:
 996                 value = float_or_none(value)
 997                 if value is not None:
 998                     value *= -1
 999             # Do maths
1000             offset_key = mdict['maths']
1001             if offset_key:
1002                 value = float_or_none(value)
1003                 operator = None
1004                 while offset_key:
1005                     item = re.match(
1006                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1007                         offset_key).group(0)
1008                     offset_key = offset_key[len(item):]
1009                     if operator is None:
1010                         operator = MATH_FUNCTIONS[item]
1011                         continue
1012                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1013                     offset = float_or_none(item)
1014                     if offset is None:
1015                         offset = float_or_none(_traverse_infodict(item))
1016                     try:
1017                         value = operator(value, multiplier * offset)
1018                     except (TypeError, ZeroDivisionError):
1019                         return None
1020                     operator = None
1021             # Datetime formatting
1022             if mdict['strf_format']:
1023                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1024
1025             return value
1026
1027         na = self.params.get('outtmpl_na_placeholder', 'NA')
1028
1029         def _dumpjson_default(obj):
1030             if isinstance(obj, (set, LazyList)):
1031                 return list(obj)
1032             raise TypeError(f'Object of type {type(obj).__name__} is not JSON serializable')
1033
1034         def create_key(outer_mobj):
1035             if not outer_mobj.group('has_key'):
1036                 return f'%{outer_mobj.group(0)}'
1037             key = outer_mobj.group('key')
1038             mobj = re.match(INTERNAL_FORMAT_RE, key)
1039             initial_field = mobj.group('fields').split('.')[-1] if mobj else ''
1040             value, default = None, na
1041             while mobj:
1042                 mobj = mobj.groupdict()
1043                 default = mobj['default'] if mobj['default'] is not None else default
1044                 value = get_value(mobj)
1045                 if value is None and mobj['alternate']:
1046                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
1047                 else:
1048                     break
1049
1050             fmt = outer_mobj.group('format')
1051             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1052                 fmt = '0{:d}d'.format(field_size_compat_map[key])
1053
1054             value = default if value is None else value
1055
1056             str_fmt = f'{fmt[:-1]}s'
1057             if fmt[-1] == 'l':  # list
1058                 delim = '\n' if '#' in (outer_mobj.group('conversion') or '') else ', '
1059                 value, fmt = delim.join(variadic(value)), str_fmt
1060             elif fmt[-1] == 'j':  # json
1061                 value, fmt = json.dumps(value, default=_dumpjson_default), str_fmt
1062             elif fmt[-1] == 'q':  # quoted
1063                 value, fmt = compat_shlex_quote(str(value)), str_fmt
1064             elif fmt[-1] == 'B':  # bytes
1065                 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1066                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1067             elif fmt[-1] == 'U':  # unicode normalized
1068                 opts = outer_mobj.group('conversion') or ''
1069                 value, fmt = unicodedata.normalize(
1070                     # "+" = compatibility equivalence, "#" = NFD
1071                     'NF%s%s' % ('K' if '+' in opts else '', 'D' if '#' in opts else 'C'),
1072                     value), str_fmt
1073             elif fmt[-1] == 'c':
1074                 if value:
1075                     value = str(value)[0]
1076                 else:
1077                     fmt = str_fmt
1078             elif fmt[-1] not in 'rs':  # numeric
1079                 value = float_or_none(value)
1080                 if value is None:
1081                     value, fmt = default, 's'
1082
1083             if sanitize:
1084                 if fmt[-1] == 'r':
1085                     # If value is an object, sanitize might convert it to a string
1086                     # So we convert it to repr first
1087                     value, fmt = repr(value), str_fmt
1088                 if fmt[-1] in 'csr':
1089                     value = sanitize(initial_field, value)
1090
1091             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1092             TMPL_DICT[key] = value
1093             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1094
1095         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1096
1097     def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1098         outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1099         return self.escape_outtmpl(outtmpl) % info_dict
1100
1101     def _prepare_filename(self, info_dict, tmpl_type='default'):
1102         try:
1103             sanitize = lambda k, v: sanitize_filename(
1104                 compat_str(v),
1105                 restricted=self.params.get('restrictfilenames'),
1106                 is_id=(k == 'id' or k.endswith('_id')))
1107             outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
1108             outtmpl, template_dict = self.prepare_outtmpl(outtmpl, info_dict, sanitize)
1109             outtmpl = self.escape_outtmpl(self._outtmpl_expandpath(outtmpl))
1110             filename = outtmpl % template_dict
1111
1112             force_ext = OUTTMPL_TYPES.get(tmpl_type)
1113             if filename and force_ext is not None:
1114                 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1115
1116             # https://github.com/blackjack4494/youtube-dlc/issues/85
1117             trim_file_name = self.params.get('trim_file_name', False)
1118             if trim_file_name:
1119                 fn_groups = filename.rsplit('.')
1120                 ext = fn_groups[-1]
1121                 sub_ext = ''
1122                 if len(fn_groups) > 2:
1123                     sub_ext = fn_groups[-2]
1124                 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
1125
1126             return filename
1127         except ValueError as err:
1128             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1129             return None
1130
1131     def prepare_filename(self, info_dict, dir_type='', warn=False):
1132         """Generate the output filename."""
1133
1134         filename = self._prepare_filename(info_dict, dir_type or 'default')
1135         if not filename and dir_type not in ('', 'temp'):
1136             return ''
1137
1138         if warn:
1139             if not self.params.get('paths'):
1140                 pass
1141             elif filename == '-':
1142                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1143             elif os.path.isabs(filename):
1144                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1145         if filename == '-' or not filename:
1146             return filename
1147
1148         return self.get_output_path(dir_type, filename)
1149
1150     def _match_entry(self, info_dict, incomplete=False, silent=False):
1151         """ Returns None if the file should be downloaded """
1152
1153         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1154
1155         def check_filter():
1156             if 'title' in info_dict:
1157                 # This can happen when we're just evaluating the playlist
1158                 title = info_dict['title']
1159                 matchtitle = self.params.get('matchtitle', False)
1160                 if matchtitle:
1161                     if not re.search(matchtitle, title, re.IGNORECASE):
1162                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1163                 rejecttitle = self.params.get('rejecttitle', False)
1164                 if rejecttitle:
1165                     if re.search(rejecttitle, title, re.IGNORECASE):
1166                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1167             date = info_dict.get('upload_date')
1168             if date is not None:
1169                 dateRange = self.params.get('daterange', DateRange())
1170                 if date not in dateRange:
1171                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1172             view_count = info_dict.get('view_count')
1173             if view_count is not None:
1174                 min_views = self.params.get('min_views')
1175                 if min_views is not None and view_count < min_views:
1176                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1177                 max_views = self.params.get('max_views')
1178                 if max_views is not None and view_count > max_views:
1179                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1180             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1181                 return 'Skipping "%s" because it is age restricted' % video_title
1182
1183             match_filter = self.params.get('match_filter')
1184             if match_filter is not None:
1185                 try:
1186                     ret = match_filter(info_dict, incomplete=incomplete)
1187                 except TypeError:
1188                     # For backward compatibility
1189                     ret = None if incomplete else match_filter(info_dict)
1190                 if ret is not None:
1191                     return ret
1192             return None
1193
1194         if self.in_download_archive(info_dict):
1195             reason = '%s has already been recorded in the archive' % video_title
1196             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1197         else:
1198             reason = check_filter()
1199             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1200         if reason is not None:
1201             if not silent:
1202                 self.to_screen('[download] ' + reason)
1203             if self.params.get(break_opt, False):
1204                 raise break_err()
1205         return reason
1206
1207     @staticmethod
1208     def add_extra_info(info_dict, extra_info):
1209         '''Set the keys from extra_info in info dict if they are missing'''
1210         for key, value in extra_info.items():
1211             info_dict.setdefault(key, value)
1212
1213     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1214                      process=True, force_generic_extractor=False):
1215         """
1216         Return a list with a dictionary for each video extracted.
1217
1218         Arguments:
1219         url -- URL to extract
1220
1221         Keyword arguments:
1222         download -- whether to download videos during extraction
1223         ie_key -- extractor key hint
1224         extra_info -- dictionary containing the extra values to add to each result
1225         process -- whether to resolve all unresolved references (URLs, playlist items),
1226             must be True for download to work.
1227         force_generic_extractor -- force using the generic extractor
1228         """
1229
1230         if extra_info is None:
1231             extra_info = {}
1232
1233         if not ie_key and force_generic_extractor:
1234             ie_key = 'Generic'
1235
1236         if ie_key:
1237             ies = {ie_key: self._get_info_extractor_class(ie_key)}
1238         else:
1239             ies = self._ies
1240
1241         for ie_key, ie in ies.items():
1242             if not ie.suitable(url):
1243                 continue
1244
1245             if not ie.working():
1246                 self.report_warning('The program functionality for this site has been marked as broken, '
1247                                     'and will probably not work.')
1248
1249             temp_id = ie.get_temp_id(url)
1250             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1251                 self.to_screen("[%s] %s: has already been recorded in archive" % (
1252                                ie_key, temp_id))
1253                 break
1254             return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1255         else:
1256             self.report_error('no suitable InfoExtractor for URL %s' % url)
1257
1258     def __handle_extraction_exceptions(func):
1259         @functools.wraps(func)
1260         def wrapper(self, *args, **kwargs):
1261             try:
1262                 return func(self, *args, **kwargs)
1263             except GeoRestrictedError as e:
1264                 msg = e.msg
1265                 if e.countries:
1266                     msg += '\nThis video is available in %s.' % ', '.join(
1267                         map(ISO3166Utils.short2full, e.countries))
1268                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1269                 self.report_error(msg)
1270             except ExtractorError as e:  # An error we somewhat expected
1271                 self.report_error(compat_str(e), e.format_traceback())
1272             except ThrottledDownload:
1273                 self.to_stderr('\r')
1274                 self.report_warning('The download speed is below throttle limit. Re-extracting data')
1275                 return wrapper(self, *args, **kwargs)
1276             except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached, LazyList.IndexError):
1277                 raise
1278             except Exception as e:
1279                 if self.params.get('ignoreerrors'):
1280                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
1281                 else:
1282                     raise
1283         return wrapper
1284
1285     @__handle_extraction_exceptions
1286     def __extract_info(self, url, ie, download, extra_info, process):
1287         ie_result = ie.extract(url)
1288         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1289             return
1290         if isinstance(ie_result, list):
1291             # Backwards compatibility: old IE result format
1292             ie_result = {
1293                 '_type': 'compat_list',
1294                 'entries': ie_result,
1295             }
1296         if extra_info.get('original_url'):
1297             ie_result.setdefault('original_url', extra_info['original_url'])
1298         self.add_default_extra_info(ie_result, ie, url)
1299         if process:
1300             return self.process_ie_result(ie_result, download, extra_info)
1301         else:
1302             return ie_result
1303
1304     def add_default_extra_info(self, ie_result, ie, url):
1305         if url is not None:
1306             self.add_extra_info(ie_result, {
1307                 'webpage_url': url,
1308                 'original_url': url,
1309                 'webpage_url_basename': url_basename(url),
1310             })
1311         if ie is not None:
1312             self.add_extra_info(ie_result, {
1313                 'extractor': ie.IE_NAME,
1314                 'extractor_key': ie.ie_key(),
1315             })
1316
1317     def process_ie_result(self, ie_result, download=True, extra_info=None):
1318         """
1319         Take the result of the ie(may be modified) and resolve all unresolved
1320         references (URLs, playlist items).
1321
1322         It will also download the videos if 'download'.
1323         Returns the resolved ie_result.
1324         """
1325         if extra_info is None:
1326             extra_info = {}
1327         result_type = ie_result.get('_type', 'video')
1328
1329         if result_type in ('url', 'url_transparent'):
1330             ie_result['url'] = sanitize_url(ie_result['url'])
1331             if ie_result.get('original_url'):
1332                 extra_info.setdefault('original_url', ie_result['original_url'])
1333
1334             extract_flat = self.params.get('extract_flat', False)
1335             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1336                     or extract_flat is True):
1337                 info_copy = ie_result.copy()
1338                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1339                 if ie and not ie_result.get('id'):
1340                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1341                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1342                 self.add_extra_info(info_copy, extra_info)
1343                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1344                 if self.params.get('force_write_download_archive', False):
1345                     self.record_download_archive(info_copy)
1346                 return ie_result
1347
1348         if result_type == 'video':
1349             self.add_extra_info(ie_result, extra_info)
1350             ie_result = self.process_video_result(ie_result, download=download)
1351             additional_urls = (ie_result or {}).get('additional_urls')
1352             if additional_urls:
1353                 # TODO: Improve MetadataParserPP to allow setting a list
1354                 if isinstance(additional_urls, compat_str):
1355                     additional_urls = [additional_urls]
1356                 self.to_screen(
1357                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1358                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1359                 ie_result['additional_entries'] = [
1360                     self.extract_info(
1361                         url, download, extra_info,
1362                         force_generic_extractor=self.params.get('force_generic_extractor'))
1363                     for url in additional_urls
1364                 ]
1365             return ie_result
1366         elif result_type == 'url':
1367             # We have to add extra_info to the results because it may be
1368             # contained in a playlist
1369             return self.extract_info(
1370                 ie_result['url'], download,
1371                 ie_key=ie_result.get('ie_key'),
1372                 extra_info=extra_info)
1373         elif result_type == 'url_transparent':
1374             # Use the information from the embedding page
1375             info = self.extract_info(
1376                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1377                 extra_info=extra_info, download=False, process=False)
1378
1379             # extract_info may return None when ignoreerrors is enabled and
1380             # extraction failed with an error, don't crash and return early
1381             # in this case
1382             if not info:
1383                 return info
1384
1385             force_properties = dict(
1386                 (k, v) for k, v in ie_result.items() if v is not None)
1387             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1388                 if f in force_properties:
1389                     del force_properties[f]
1390             new_result = info.copy()
1391             new_result.update(force_properties)
1392
1393             # Extracted info may not be a video result (i.e.
1394             # info.get('_type', 'video') != video) but rather an url or
1395             # url_transparent. In such cases outer metadata (from ie_result)
1396             # should be propagated to inner one (info). For this to happen
1397             # _type of info should be overridden with url_transparent. This
1398             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1399             if new_result.get('_type') == 'url':
1400                 new_result['_type'] = 'url_transparent'
1401
1402             return self.process_ie_result(
1403                 new_result, download=download, extra_info=extra_info)
1404         elif result_type in ('playlist', 'multi_video'):
1405             # Protect from infinite recursion due to recursively nested playlists
1406             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1407             webpage_url = ie_result['webpage_url']
1408             if webpage_url in self._playlist_urls:
1409                 self.to_screen(
1410                     '[download] Skipping already downloaded playlist: %s'
1411                     % ie_result.get('title') or ie_result.get('id'))
1412                 return
1413
1414             self._playlist_level += 1
1415             self._playlist_urls.add(webpage_url)
1416             self._sanitize_thumbnails(ie_result)
1417             try:
1418                 return self.__process_playlist(ie_result, download)
1419             finally:
1420                 self._playlist_level -= 1
1421                 if not self._playlist_level:
1422                     self._playlist_urls.clear()
1423         elif result_type == 'compat_list':
1424             self.report_warning(
1425                 'Extractor %s returned a compat_list result. '
1426                 'It needs to be updated.' % ie_result.get('extractor'))
1427
1428             def _fixup(r):
1429                 self.add_extra_info(r, {
1430                     'extractor': ie_result['extractor'],
1431                     'webpage_url': ie_result['webpage_url'],
1432                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1433                     'extractor_key': ie_result['extractor_key'],
1434                 })
1435                 return r
1436             ie_result['entries'] = [
1437                 self.process_ie_result(_fixup(r), download, extra_info)
1438                 for r in ie_result['entries']
1439             ]
1440             return ie_result
1441         else:
1442             raise Exception('Invalid result type: %s' % result_type)
1443
1444     def _ensure_dir_exists(self, path):
1445         return make_dir(path, self.report_error)
1446
1447     def __process_playlist(self, ie_result, download):
1448         # We process each entry in the playlist
1449         playlist = ie_result.get('title') or ie_result.get('id')
1450         self.to_screen('[download] Downloading playlist: %s' % playlist)
1451
1452         if 'entries' not in ie_result:
1453             raise EntryNotInPlaylist()
1454         incomplete_entries = bool(ie_result.get('requested_entries'))
1455         if incomplete_entries:
1456             def fill_missing_entries(entries, indexes):
1457                 ret = [None] * max(*indexes)
1458                 for i, entry in zip(indexes, entries):
1459                     ret[i - 1] = entry
1460                 return ret
1461             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1462
1463         playlist_results = []
1464
1465         playliststart = self.params.get('playliststart', 1)
1466         playlistend = self.params.get('playlistend')
1467         # For backwards compatibility, interpret -1 as whole list
1468         if playlistend == -1:
1469             playlistend = None
1470
1471         playlistitems_str = self.params.get('playlist_items')
1472         playlistitems = None
1473         if playlistitems_str is not None:
1474             def iter_playlistitems(format):
1475                 for string_segment in format.split(','):
1476                     if '-' in string_segment:
1477                         start, end = string_segment.split('-')
1478                         for item in range(int(start), int(end) + 1):
1479                             yield int(item)
1480                     else:
1481                         yield int(string_segment)
1482             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1483
1484         ie_entries = ie_result['entries']
1485         msg = (
1486             'Downloading %d videos' if not isinstance(ie_entries, list)
1487             else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
1488
1489         if isinstance(ie_entries, list):
1490             def get_entry(i):
1491                 return ie_entries[i - 1]
1492         else:
1493             if not isinstance(ie_entries, PagedList):
1494                 ie_entries = LazyList(ie_entries)
1495
1496             def get_entry(i):
1497                 return YoutubeDL.__handle_extraction_exceptions(
1498                     lambda self, i: ie_entries[i - 1]
1499                 )(self, i)
1500
1501         entries = []
1502         items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1503         for i in items:
1504             if i == 0:
1505                 continue
1506             if playlistitems is None and playlistend is not None and playlistend < i:
1507                 break
1508             entry = None
1509             try:
1510                 entry = get_entry(i)
1511                 if entry is None:
1512                     raise EntryNotInPlaylist()
1513             except (IndexError, EntryNotInPlaylist):
1514                 if incomplete_entries:
1515                     raise EntryNotInPlaylist()
1516                 elif not playlistitems:
1517                     break
1518             entries.append(entry)
1519             try:
1520                 if entry is not None:
1521                     self._match_entry(entry, incomplete=True, silent=True)
1522             except (ExistingVideoReached, RejectedVideoReached):
1523                 break
1524         ie_result['entries'] = entries
1525
1526         # Save playlist_index before re-ordering
1527         entries = [
1528             ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
1529             for i, entry in enumerate(entries, 1)
1530             if entry is not None]
1531         n_entries = len(entries)
1532
1533         if not playlistitems and (playliststart or playlistend):
1534             playlistitems = list(range(playliststart, playliststart + n_entries))
1535         ie_result['requested_entries'] = playlistitems
1536
1537         if self.params.get('allow_playlist_files', True):
1538             ie_copy = {
1539                 'playlist': playlist,
1540                 'playlist_id': ie_result.get('id'),
1541                 'playlist_title': ie_result.get('title'),
1542                 'playlist_uploader': ie_result.get('uploader'),
1543                 'playlist_uploader_id': ie_result.get('uploader_id'),
1544                 'playlist_index': 0,
1545             }
1546             ie_copy.update(dict(ie_result))
1547
1548             if self._write_info_json('playlist', ie_result,
1549                                      self.prepare_filename(ie_copy, 'pl_infojson')) is None:
1550                 return
1551             if self._write_description('playlist', ie_result,
1552                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1553                 return
1554             # TODO: This should be passed to ThumbnailsConvertor if necessary
1555             self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1556
1557         if self.params.get('playlistreverse', False):
1558             entries = entries[::-1]
1559         if self.params.get('playlistrandom', False):
1560             random.shuffle(entries)
1561
1562         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1563
1564         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1565         failures = 0
1566         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1567         for i, entry_tuple in enumerate(entries, 1):
1568             playlist_index, entry = entry_tuple
1569             if 'playlist-index' in self.params.get('compat_opts', []):
1570                 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
1571             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1572             # This __x_forwarded_for_ip thing is a bit ugly but requires
1573             # minimal changes
1574             if x_forwarded_for:
1575                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1576             extra = {
1577                 'n_entries': n_entries,
1578                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1579                 'playlist_index': playlist_index,
1580                 'playlist_autonumber': i,
1581                 'playlist': playlist,
1582                 'playlist_id': ie_result.get('id'),
1583                 'playlist_title': ie_result.get('title'),
1584                 'playlist_uploader': ie_result.get('uploader'),
1585                 'playlist_uploader_id': ie_result.get('uploader_id'),
1586                 'extractor': ie_result['extractor'],
1587                 'webpage_url': ie_result['webpage_url'],
1588                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1589                 'extractor_key': ie_result['extractor_key'],
1590             }
1591
1592             if self._match_entry(entry, incomplete=True) is not None:
1593                 continue
1594
1595             entry_result = self.__process_iterable_entry(entry, download, extra)
1596             if not entry_result:
1597                 failures += 1
1598             if failures >= max_failures:
1599                 self.report_error(
1600                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1601                 break
1602             # TODO: skip failed (empty) entries?
1603             playlist_results.append(entry_result)
1604         ie_result['entries'] = playlist_results
1605         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1606         return ie_result
1607
1608     @__handle_extraction_exceptions
1609     def __process_iterable_entry(self, entry, download, extra_info):
1610         return self.process_ie_result(
1611             entry, download=download, extra_info=extra_info)
1612
1613     def _build_format_filter(self, filter_spec):
1614         " Returns a function to filter the formats according to the filter_spec "
1615
1616         OPERATORS = {
1617             '<': operator.lt,
1618             '<=': operator.le,
1619             '>': operator.gt,
1620             '>=': operator.ge,
1621             '=': operator.eq,
1622             '!=': operator.ne,
1623         }
1624         operator_rex = re.compile(r'''(?x)\s*
1625             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1626             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1627             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1628             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1629         m = operator_rex.fullmatch(filter_spec)
1630         if m:
1631             try:
1632                 comparison_value = int(m.group('value'))
1633             except ValueError:
1634                 comparison_value = parse_filesize(m.group('value'))
1635                 if comparison_value is None:
1636                     comparison_value = parse_filesize(m.group('value') + 'B')
1637                 if comparison_value is None:
1638                     raise ValueError(
1639                         'Invalid value %r in format specification %r' % (
1640                             m.group('value'), filter_spec))
1641             op = OPERATORS[m.group('op')]
1642
1643         if not m:
1644             STR_OPERATORS = {
1645                 '=': operator.eq,
1646                 '^=': lambda attr, value: attr.startswith(value),
1647                 '$=': lambda attr, value: attr.endswith(value),
1648                 '*=': lambda attr, value: value in attr,
1649             }
1650             str_operator_rex = re.compile(r'''(?x)\s*
1651                 (?P<key>[a-zA-Z0-9._-]+)\s*
1652                 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1653                 (?P<value>[a-zA-Z0-9._-]+)\s*
1654                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1655             m = str_operator_rex.fullmatch(filter_spec)
1656             if m:
1657                 comparison_value = m.group('value')
1658                 str_op = STR_OPERATORS[m.group('op')]
1659                 if m.group('negation'):
1660                     op = lambda attr, value: not str_op(attr, value)
1661                 else:
1662                     op = str_op
1663
1664         if not m:
1665             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1666
1667         def _filter(f):
1668             actual_value = f.get(m.group('key'))
1669             if actual_value is None:
1670                 return m.group('none_inclusive')
1671             return op(actual_value, comparison_value)
1672         return _filter
1673
1674     def _default_format_spec(self, info_dict, download=True):
1675
1676         def can_merge():
1677             merger = FFmpegMergerPP(self)
1678             return merger.available and merger.can_merge()
1679
1680         prefer_best = (
1681             not self.params.get('simulate')
1682             and download
1683             and (
1684                 not can_merge()
1685                 or info_dict.get('is_live', False)
1686                 or self.outtmpl_dict['default'] == '-'))
1687         compat = (
1688             prefer_best
1689             or self.params.get('allow_multiple_audio_streams', False)
1690             or 'format-spec' in self.params.get('compat_opts', []))
1691
1692         return (
1693             'best/bestvideo+bestaudio' if prefer_best
1694             else 'bestvideo*+bestaudio/best' if not compat
1695             else 'bestvideo+bestaudio/best')
1696
1697     def build_format_selector(self, format_spec):
1698         def syntax_error(note, start):
1699             message = (
1700                 'Invalid format specification: '
1701                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1702             return SyntaxError(message)
1703
1704         PICKFIRST = 'PICKFIRST'
1705         MERGE = 'MERGE'
1706         SINGLE = 'SINGLE'
1707         GROUP = 'GROUP'
1708         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1709
1710         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1711                                   'video': self.params.get('allow_multiple_video_streams', False)}
1712
1713         check_formats = self.params.get('check_formats')
1714
1715         def _parse_filter(tokens):
1716             filter_parts = []
1717             for type, string, start, _, _ in tokens:
1718                 if type == tokenize.OP and string == ']':
1719                     return ''.join(filter_parts)
1720                 else:
1721                     filter_parts.append(string)
1722
1723         def _remove_unused_ops(tokens):
1724             # Remove operators that we don't use and join them with the surrounding strings
1725             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1726             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1727             last_string, last_start, last_end, last_line = None, None, None, None
1728             for type, string, start, end, line in tokens:
1729                 if type == tokenize.OP and string == '[':
1730                     if last_string:
1731                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1732                         last_string = None
1733                     yield type, string, start, end, line
1734                     # everything inside brackets will be handled by _parse_filter
1735                     for type, string, start, end, line in tokens:
1736                         yield type, string, start, end, line
1737                         if type == tokenize.OP and string == ']':
1738                             break
1739                 elif type == tokenize.OP and string in ALLOWED_OPS:
1740                     if last_string:
1741                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1742                         last_string = None
1743                     yield type, string, start, end, line
1744                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1745                     if not last_string:
1746                         last_string = string
1747                         last_start = start
1748                         last_end = end
1749                     else:
1750                         last_string += string
1751             if last_string:
1752                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1753
1754         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1755             selectors = []
1756             current_selector = None
1757             for type, string, start, _, _ in tokens:
1758                 # ENCODING is only defined in python 3.x
1759                 if type == getattr(tokenize, 'ENCODING', None):
1760                     continue
1761                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1762                     current_selector = FormatSelector(SINGLE, string, [])
1763                 elif type == tokenize.OP:
1764                     if string == ')':
1765                         if not inside_group:
1766                             # ')' will be handled by the parentheses group
1767                             tokens.restore_last_token()
1768                         break
1769                     elif inside_merge and string in ['/', ',']:
1770                         tokens.restore_last_token()
1771                         break
1772                     elif inside_choice and string == ',':
1773                         tokens.restore_last_token()
1774                         break
1775                     elif string == ',':
1776                         if not current_selector:
1777                             raise syntax_error('"," must follow a format selector', start)
1778                         selectors.append(current_selector)
1779                         current_selector = None
1780                     elif string == '/':
1781                         if not current_selector:
1782                             raise syntax_error('"/" must follow a format selector', start)
1783                         first_choice = current_selector
1784                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1785                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1786                     elif string == '[':
1787                         if not current_selector:
1788                             current_selector = FormatSelector(SINGLE, 'best', [])
1789                         format_filter = _parse_filter(tokens)
1790                         current_selector.filters.append(format_filter)
1791                     elif string == '(':
1792                         if current_selector:
1793                             raise syntax_error('Unexpected "("', start)
1794                         group = _parse_format_selection(tokens, inside_group=True)
1795                         current_selector = FormatSelector(GROUP, group, [])
1796                     elif string == '+':
1797                         if not current_selector:
1798                             raise syntax_error('Unexpected "+"', start)
1799                         selector_1 = current_selector
1800                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1801                         if not selector_2:
1802                             raise syntax_error('Expected a selector', start)
1803                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1804                     else:
1805                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1806                 elif type == tokenize.ENDMARKER:
1807                     break
1808             if current_selector:
1809                 selectors.append(current_selector)
1810             return selectors
1811
1812         def _merge(formats_pair):
1813             format_1, format_2 = formats_pair
1814
1815             formats_info = []
1816             formats_info.extend(format_1.get('requested_formats', (format_1,)))
1817             formats_info.extend(format_2.get('requested_formats', (format_2,)))
1818
1819             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1820                 get_no_more = {'video': False, 'audio': False}
1821                 for (i, fmt_info) in enumerate(formats_info):
1822                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
1823                         formats_info.pop(i)
1824                         continue
1825                     for aud_vid in ['audio', 'video']:
1826                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1827                             if get_no_more[aud_vid]:
1828                                 formats_info.pop(i)
1829                                 break
1830                             get_no_more[aud_vid] = True
1831
1832             if len(formats_info) == 1:
1833                 return formats_info[0]
1834
1835             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1836             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1837
1838             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1839             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1840
1841             output_ext = self.params.get('merge_output_format')
1842             if not output_ext:
1843                 if the_only_video:
1844                     output_ext = the_only_video['ext']
1845                 elif the_only_audio and not video_fmts:
1846                     output_ext = the_only_audio['ext']
1847                 else:
1848                     output_ext = 'mkv'
1849
1850             new_dict = {
1851                 'requested_formats': formats_info,
1852                 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1853                 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1854                 'ext': output_ext,
1855             }
1856
1857             if the_only_video:
1858                 new_dict.update({
1859                     'width': the_only_video.get('width'),
1860                     'height': the_only_video.get('height'),
1861                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1862                     'fps': the_only_video.get('fps'),
1863                     'vcodec': the_only_video.get('vcodec'),
1864                     'vbr': the_only_video.get('vbr'),
1865                     'stretched_ratio': the_only_video.get('stretched_ratio'),
1866                 })
1867
1868             if the_only_audio:
1869                 new_dict.update({
1870                     'acodec': the_only_audio.get('acodec'),
1871                     'abr': the_only_audio.get('abr'),
1872                 })
1873
1874             return new_dict
1875
1876         def _check_formats(formats):
1877             if not check_formats:
1878                 yield from formats
1879                 return
1880             for f in formats:
1881                 self.to_screen('[info] Testing format %s' % f['format_id'])
1882                 temp_file = tempfile.NamedTemporaryFile(
1883                     suffix='.tmp', delete=False,
1884                     dir=self.get_output_path('temp') or None)
1885                 temp_file.close()
1886                 try:
1887                     success, _ = self.dl(temp_file.name, f, test=True)
1888                 except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1889                     success = False
1890                 finally:
1891                     if os.path.exists(temp_file.name):
1892                         try:
1893                             os.remove(temp_file.name)
1894                         except OSError:
1895                             self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1896                 if success:
1897                     yield f
1898                 else:
1899                     self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1900
1901         def _build_selector_function(selector):
1902             if isinstance(selector, list):  # ,
1903                 fs = [_build_selector_function(s) for s in selector]
1904
1905                 def selector_function(ctx):
1906                     for f in fs:
1907                         yield from f(ctx)
1908                 return selector_function
1909
1910             elif selector.type == GROUP:  # ()
1911                 selector_function = _build_selector_function(selector.selector)
1912
1913             elif selector.type == PICKFIRST:  # /
1914                 fs = [_build_selector_function(s) for s in selector.selector]
1915
1916                 def selector_function(ctx):
1917                     for f in fs:
1918                         picked_formats = list(f(ctx))
1919                         if picked_formats:
1920                             return picked_formats
1921                     return []
1922
1923             elif selector.type == MERGE:  # +
1924                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1925
1926                 def selector_function(ctx):
1927                     for pair in itertools.product(
1928                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1929                         yield _merge(pair)
1930
1931             elif selector.type == SINGLE:  # atom
1932                 format_spec = selector.selector or 'best'
1933
1934                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
1935                 if format_spec == 'all':
1936                     def selector_function(ctx):
1937                         yield from _check_formats(ctx['formats'])
1938                 elif format_spec == 'mergeall':
1939                     def selector_function(ctx):
1940                         formats = list(_check_formats(ctx['formats']))
1941                         if not formats:
1942                             return
1943                         merged_format = formats[-1]
1944                         for f in formats[-2::-1]:
1945                             merged_format = _merge((merged_format, f))
1946                         yield merged_format
1947
1948                 else:
1949                     format_fallback, format_reverse, format_idx = False, True, 1
1950                     mobj = re.match(
1951                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
1952                         format_spec)
1953                     if mobj is not None:
1954                         format_idx = int_or_none(mobj.group('n'), default=1)
1955                         format_reverse = mobj.group('bw')[0] == 'b'
1956                         format_type = (mobj.group('type') or [None])[0]
1957                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
1958                         format_modified = mobj.group('mod') is not None
1959
1960                         format_fallback = not format_type and not format_modified  # for b, w
1961                         _filter_f = (
1962                             (lambda f: f.get('%scodec' % format_type) != 'none')
1963                             if format_type and format_modified  # bv*, ba*, wv*, wa*
1964                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
1965                             if format_type  # bv, ba, wv, wa
1966                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1967                             if not format_modified  # b, w
1968                             else lambda f: True)  # b*, w*
1969                         filter_f = lambda f: _filter_f(f) and (
1970                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
1971                     else:
1972                         if format_spec in ('m4a', 'mp3', 'ogg', 'aac'):  # audio extension
1973                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
1974                         elif format_spec in ('mp4', 'flv', 'webm', '3gp'):  # video extension
1975                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
1976                         elif format_spec in ('mhtml', ):  # storyboards extension
1977                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
1978                         else:
1979                             filter_f = lambda f: f.get('format_id') == format_spec  # id
1980
1981                     def selector_function(ctx):
1982                         formats = list(ctx['formats'])
1983                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
1984                         if format_fallback and ctx['incomplete_formats'] and not matches:
1985                             # for extractors with incomplete formats (audio only (soundcloud)
1986                             # or video only (imgur)) best/worst will fallback to
1987                             # best/worst {video,audio}-only format
1988                             matches = formats
1989                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
1990                         try:
1991                             yield matches[format_idx - 1]
1992                         except IndexError:
1993                             return
1994
1995             filters = [self._build_format_filter(f) for f in selector.filters]
1996
1997             def final_selector(ctx):
1998                 ctx_copy = copy.deepcopy(ctx)
1999                 for _filter in filters:
2000                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2001                 return selector_function(ctx_copy)
2002             return final_selector
2003
2004         stream = io.BytesIO(format_spec.encode('utf-8'))
2005         try:
2006             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
2007         except tokenize.TokenError:
2008             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2009
2010         class TokenIterator(object):
2011             def __init__(self, tokens):
2012                 self.tokens = tokens
2013                 self.counter = 0
2014
2015             def __iter__(self):
2016                 return self
2017
2018             def __next__(self):
2019                 if self.counter >= len(self.tokens):
2020                     raise StopIteration()
2021                 value = self.tokens[self.counter]
2022                 self.counter += 1
2023                 return value
2024
2025             next = __next__
2026
2027             def restore_last_token(self):
2028                 self.counter -= 1
2029
2030         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2031         return _build_selector_function(parsed_selector)
2032
2033     def _calc_headers(self, info_dict):
2034         res = std_headers.copy()
2035
2036         add_headers = info_dict.get('http_headers')
2037         if add_headers:
2038             res.update(add_headers)
2039
2040         cookies = self._calc_cookies(info_dict)
2041         if cookies:
2042             res['Cookie'] = cookies
2043
2044         if 'X-Forwarded-For' not in res:
2045             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2046             if x_forwarded_for_ip:
2047                 res['X-Forwarded-For'] = x_forwarded_for_ip
2048
2049         return res
2050
2051     def _calc_cookies(self, info_dict):
2052         pr = sanitized_Request(info_dict['url'])
2053         self.cookiejar.add_cookie_header(pr)
2054         return pr.get_header('Cookie')
2055
2056     def _sanitize_thumbnails(self, info_dict):
2057         thumbnails = info_dict.get('thumbnails')
2058         if thumbnails is None:
2059             thumbnail = info_dict.get('thumbnail')
2060             if thumbnail:
2061                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2062         if thumbnails:
2063             thumbnails.sort(key=lambda t: (
2064                 t.get('preference') if t.get('preference') is not None else -1,
2065                 t.get('width') if t.get('width') is not None else -1,
2066                 t.get('height') if t.get('height') is not None else -1,
2067                 t.get('id') if t.get('id') is not None else '',
2068                 t.get('url')))
2069
2070             def thumbnail_tester():
2071                 if self.params.get('check_formats'):
2072                     test_all = True
2073                     to_screen = lambda msg: self.to_screen(f'[info] {msg}')
2074                 else:
2075                     test_all = False
2076                     to_screen = self.write_debug
2077
2078                 def test_thumbnail(t):
2079                     if not test_all and not t.get('_test_url'):
2080                         return True
2081                     to_screen('Testing thumbnail %s' % t['id'])
2082                     try:
2083                         self.urlopen(HEADRequest(t['url']))
2084                     except network_exceptions as err:
2085                         to_screen('Unable to connect to thumbnail %s URL "%s" - %s. Skipping...' % (
2086                             t['id'], t['url'], error_to_compat_str(err)))
2087                         return False
2088                     return True
2089
2090                 return test_thumbnail
2091
2092             for i, t in enumerate(thumbnails):
2093                 if t.get('id') is None:
2094                     t['id'] = '%d' % i
2095                 if t.get('width') and t.get('height'):
2096                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
2097                 t['url'] = sanitize_url(t['url'])
2098
2099             if self.params.get('check_formats') is not False:
2100                 info_dict['thumbnails'] = LazyList(filter(thumbnail_tester(), thumbnails[::-1])).reverse()
2101             else:
2102                 info_dict['thumbnails'] = thumbnails
2103
2104     def process_video_result(self, info_dict, download=True):
2105         assert info_dict.get('_type', 'video') == 'video'
2106
2107         if 'id' not in info_dict:
2108             raise ExtractorError('Missing "id" field in extractor result')
2109         if 'title' not in info_dict:
2110             raise ExtractorError('Missing "title" field in extractor result',
2111                                  video_id=info_dict['id'], ie=info_dict['extractor'])
2112
2113         def report_force_conversion(field, field_not, conversion):
2114             self.report_warning(
2115                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2116                 % (field, field_not, conversion))
2117
2118         def sanitize_string_field(info, string_field):
2119             field = info.get(string_field)
2120             if field is None or isinstance(field, compat_str):
2121                 return
2122             report_force_conversion(string_field, 'a string', 'string')
2123             info[string_field] = compat_str(field)
2124
2125         def sanitize_numeric_fields(info):
2126             for numeric_field in self._NUMERIC_FIELDS:
2127                 field = info.get(numeric_field)
2128                 if field is None or isinstance(field, compat_numeric_types):
2129                     continue
2130                 report_force_conversion(numeric_field, 'numeric', 'int')
2131                 info[numeric_field] = int_or_none(field)
2132
2133         sanitize_string_field(info_dict, 'id')
2134         sanitize_numeric_fields(info_dict)
2135
2136         if 'playlist' not in info_dict:
2137             # It isn't part of a playlist
2138             info_dict['playlist'] = None
2139             info_dict['playlist_index'] = None
2140
2141         self._sanitize_thumbnails(info_dict)
2142
2143         thumbnail = info_dict.get('thumbnail')
2144         thumbnails = info_dict.get('thumbnails')
2145         if thumbnail:
2146             info_dict['thumbnail'] = sanitize_url(thumbnail)
2147         elif thumbnails:
2148             info_dict['thumbnail'] = thumbnails[-1]['url']
2149
2150         if info_dict.get('display_id') is None and 'id' in info_dict:
2151             info_dict['display_id'] = info_dict['id']
2152
2153         for ts_key, date_key in (
2154                 ('timestamp', 'upload_date'),
2155                 ('release_timestamp', 'release_date'),
2156         ):
2157             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2158                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2159                 # see http://bugs.python.org/issue1646728)
2160                 try:
2161                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2162                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2163                 except (ValueError, OverflowError, OSError):
2164                     pass
2165
2166         live_keys = ('is_live', 'was_live')
2167         live_status = info_dict.get('live_status')
2168         if live_status is None:
2169             for key in live_keys:
2170                 if info_dict.get(key) is False:
2171                     continue
2172                 if info_dict.get(key):
2173                     live_status = key
2174                 break
2175             if all(info_dict.get(key) is False for key in live_keys):
2176                 live_status = 'not_live'
2177         if live_status:
2178             info_dict['live_status'] = live_status
2179             for key in live_keys:
2180                 if info_dict.get(key) is None:
2181                     info_dict[key] = (live_status == key)
2182
2183         # Auto generate title fields corresponding to the *_number fields when missing
2184         # in order to always have clean titles. This is very common for TV series.
2185         for field in ('chapter', 'season', 'episode'):
2186             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2187                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2188
2189         for cc_kind in ('subtitles', 'automatic_captions'):
2190             cc = info_dict.get(cc_kind)
2191             if cc:
2192                 for _, subtitle in cc.items():
2193                     for subtitle_format in subtitle:
2194                         if subtitle_format.get('url'):
2195                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2196                         if subtitle_format.get('ext') is None:
2197                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2198
2199         automatic_captions = info_dict.get('automatic_captions')
2200         subtitles = info_dict.get('subtitles')
2201
2202         info_dict['requested_subtitles'] = self.process_subtitles(
2203             info_dict['id'], subtitles, automatic_captions)
2204
2205         # We now pick which formats have to be downloaded
2206         if info_dict.get('formats') is None:
2207             # There's only one format available
2208             formats = [info_dict]
2209         else:
2210             formats = info_dict['formats']
2211
2212         info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
2213         if not self.params.get('allow_unplayable_formats'):
2214             formats = [f for f in formats if not f.get('has_drm')]
2215
2216         if not formats:
2217             self.raise_no_formats(info_dict)
2218
2219         def is_wellformed(f):
2220             url = f.get('url')
2221             if not url:
2222                 self.report_warning(
2223                     '"url" field is missing or empty - skipping format, '
2224                     'there is an error in extractor')
2225                 return False
2226             if isinstance(url, bytes):
2227                 sanitize_string_field(f, 'url')
2228             return True
2229
2230         # Filter out malformed formats for better extraction robustness
2231         formats = list(filter(is_wellformed, formats))
2232
2233         formats_dict = {}
2234
2235         # We check that all the formats have the format and format_id fields
2236         for i, format in enumerate(formats):
2237             sanitize_string_field(format, 'format_id')
2238             sanitize_numeric_fields(format)
2239             format['url'] = sanitize_url(format['url'])
2240             if not format.get('format_id'):
2241                 format['format_id'] = compat_str(i)
2242             else:
2243                 # Sanitize format_id from characters used in format selector expression
2244                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2245             format_id = format['format_id']
2246             if format_id not in formats_dict:
2247                 formats_dict[format_id] = []
2248             formats_dict[format_id].append(format)
2249
2250         # Make sure all formats have unique format_id
2251         for format_id, ambiguous_formats in formats_dict.items():
2252             if len(ambiguous_formats) > 1:
2253                 for i, format in enumerate(ambiguous_formats):
2254                     format['format_id'] = '%s-%d' % (format_id, i)
2255
2256         for i, format in enumerate(formats):
2257             if format.get('format') is None:
2258                 format['format'] = '{id} - {res}{note}'.format(
2259                     id=format['format_id'],
2260                     res=self.format_resolution(format),
2261                     note=format_field(format, 'format_note', ' (%s)'),
2262                 )
2263             # Automatically determine file extension if missing
2264             if format.get('ext') is None:
2265                 format['ext'] = determine_ext(format['url']).lower()
2266             # Automatically determine protocol if missing (useful for format
2267             # selection purposes)
2268             if format.get('protocol') is None:
2269                 format['protocol'] = determine_protocol(format)
2270             # Add HTTP headers, so that external programs can use them from the
2271             # json output
2272             full_format_info = info_dict.copy()
2273             full_format_info.update(format)
2274             format['http_headers'] = self._calc_headers(full_format_info)
2275         # Remove private housekeeping stuff
2276         if '__x_forwarded_for_ip' in info_dict:
2277             del info_dict['__x_forwarded_for_ip']
2278
2279         # TODO Central sorting goes here
2280
2281         if not formats or formats[0] is not info_dict:
2282             # only set the 'formats' fields if the original info_dict list them
2283             # otherwise we end up with a circular reference, the first (and unique)
2284             # element in the 'formats' field in info_dict is info_dict itself,
2285             # which can't be exported to json
2286             info_dict['formats'] = formats
2287
2288         info_dict, _ = self.pre_process(info_dict)
2289
2290         if self.params.get('list_thumbnails'):
2291             self.list_thumbnails(info_dict)
2292         if self.params.get('listformats'):
2293             if not info_dict.get('formats') and not info_dict.get('url'):
2294                 self.to_screen('%s has no formats' % info_dict['id'])
2295             else:
2296                 self.list_formats(info_dict)
2297         if self.params.get('listsubtitles'):
2298             if 'automatic_captions' in info_dict:
2299                 self.list_subtitles(
2300                     info_dict['id'], automatic_captions, 'automatic captions')
2301             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2302         list_only = self.params.get('simulate') is None and (
2303             self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2304         if list_only:
2305             # Without this printing, -F --print-json will not work
2306             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2307             return
2308
2309         format_selector = self.format_selector
2310         if format_selector is None:
2311             req_format = self._default_format_spec(info_dict, download=download)
2312             self.write_debug('Default format spec: %s' % req_format)
2313             format_selector = self.build_format_selector(req_format)
2314
2315         # While in format selection we may need to have an access to the original
2316         # format set in order to calculate some metrics or do some processing.
2317         # For now we need to be able to guess whether original formats provided
2318         # by extractor are incomplete or not (i.e. whether extractor provides only
2319         # video-only or audio-only formats) for proper formats selection for
2320         # extractors with such incomplete formats (see
2321         # https://github.com/ytdl-org/youtube-dl/pull/5556).
2322         # Since formats may be filtered during format selection and may not match
2323         # the original formats the results may be incorrect. Thus original formats
2324         # or pre-calculated metrics should be passed to format selection routines
2325         # as well.
2326         # We will pass a context object containing all necessary additional data
2327         # instead of just formats.
2328         # This fixes incorrect format selection issue (see
2329         # https://github.com/ytdl-org/youtube-dl/issues/10083).
2330         incomplete_formats = (
2331             # All formats are video-only or
2332             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2333             # all formats are audio-only
2334             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2335
2336         ctx = {
2337             'formats': formats,
2338             'incomplete_formats': incomplete_formats,
2339         }
2340
2341         formats_to_download = list(format_selector(ctx))
2342         if not formats_to_download:
2343             if not self.params.get('ignore_no_formats_error'):
2344                 raise ExtractorError('Requested format is not available', expected=True,
2345                                      video_id=info_dict['id'], ie=info_dict['extractor'])
2346             else:
2347                 self.report_warning('Requested format is not available')
2348                 # Process what we can, even without any available formats.
2349                 self.process_info(dict(info_dict))
2350         elif download:
2351             self.to_screen(
2352                 '[info] %s: Downloading %d format(s): %s' % (
2353                     info_dict['id'], len(formats_to_download),
2354                     ", ".join([f['format_id'] for f in formats_to_download])))
2355             for fmt in formats_to_download:
2356                 new_info = dict(info_dict)
2357                 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2358                 new_info['__original_infodict'] = info_dict
2359                 new_info.update(fmt)
2360                 self.process_info(new_info)
2361         # We update the info dict with the best quality format (backwards compatibility)
2362         if formats_to_download:
2363             info_dict.update(formats_to_download[-1])
2364         return info_dict
2365
2366     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2367         """Select the requested subtitles and their format"""
2368         available_subs = {}
2369         if normal_subtitles and self.params.get('writesubtitles'):
2370             available_subs.update(normal_subtitles)
2371         if automatic_captions and self.params.get('writeautomaticsub'):
2372             for lang, cap_info in automatic_captions.items():
2373                 if lang not in available_subs:
2374                     available_subs[lang] = cap_info
2375
2376         if (not self.params.get('writesubtitles') and not
2377                 self.params.get('writeautomaticsub') or not
2378                 available_subs):
2379             return None
2380
2381         all_sub_langs = available_subs.keys()
2382         if self.params.get('allsubtitles', False):
2383             requested_langs = all_sub_langs
2384         elif self.params.get('subtitleslangs', False):
2385             # A list is used so that the order of languages will be the same as
2386             # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2387             requested_langs = []
2388             for lang_re in self.params.get('subtitleslangs'):
2389                 if lang_re == 'all':
2390                     requested_langs.extend(all_sub_langs)
2391                     continue
2392                 discard = lang_re[0] == '-'
2393                 if discard:
2394                     lang_re = lang_re[1:]
2395                 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2396                 if discard:
2397                     for lang in current_langs:
2398                         while lang in requested_langs:
2399                             requested_langs.remove(lang)
2400                 else:
2401                     requested_langs.extend(current_langs)
2402             requested_langs = orderedSet(requested_langs)
2403         elif 'en' in available_subs:
2404             requested_langs = ['en']
2405         else:
2406             requested_langs = [list(all_sub_langs)[0]]
2407         if requested_langs:
2408             self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2409
2410         formats_query = self.params.get('subtitlesformat', 'best')
2411         formats_preference = formats_query.split('/') if formats_query else []
2412         subs = {}
2413         for lang in requested_langs:
2414             formats = available_subs.get(lang)
2415             if formats is None:
2416                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2417                 continue
2418             for ext in formats_preference:
2419                 if ext == 'best':
2420                     f = formats[-1]
2421                     break
2422                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2423                 if matches:
2424                     f = matches[-1]
2425                     break
2426             else:
2427                 f = formats[-1]
2428                 self.report_warning(
2429                     'No subtitle format found matching "%s" for language %s, '
2430                     'using %s' % (formats_query, lang, f['ext']))
2431             subs[lang] = f
2432         return subs
2433
2434     def __forced_printings(self, info_dict, filename, incomplete):
2435         def print_mandatory(field, actual_field=None):
2436             if actual_field is None:
2437                 actual_field = field
2438             if (self.params.get('force%s' % field, False)
2439                     and (not incomplete or info_dict.get(actual_field) is not None)):
2440                 self.to_stdout(info_dict[actual_field])
2441
2442         def print_optional(field):
2443             if (self.params.get('force%s' % field, False)
2444                     and info_dict.get(field) is not None):
2445                 self.to_stdout(info_dict[field])
2446
2447         info_dict = info_dict.copy()
2448         if filename is not None:
2449             info_dict['filename'] = filename
2450         if info_dict.get('requested_formats') is not None:
2451             # For RTMP URLs, also include the playpath
2452             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2453         elif 'url' in info_dict:
2454             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2455
2456         if self.params.get('forceprint') or self.params.get('forcejson'):
2457             self.post_extract(info_dict)
2458         for tmpl in self.params.get('forceprint', []):
2459             mobj = re.match(r'\w+(=?)$', tmpl)
2460             if mobj and mobj.group(1):
2461                 tmpl = f'{tmpl[:-1]} = %({tmpl[:-1]})s'
2462             elif mobj:
2463                 tmpl = '%({})s'.format(tmpl)
2464             self.to_stdout(self.evaluate_outtmpl(tmpl, info_dict))
2465
2466         print_mandatory('title')
2467         print_mandatory('id')
2468         print_mandatory('url', 'urls')
2469         print_optional('thumbnail')
2470         print_optional('description')
2471         print_optional('filename')
2472         if self.params.get('forceduration') and info_dict.get('duration') is not None:
2473             self.to_stdout(formatSeconds(info_dict['duration']))
2474         print_mandatory('format')
2475
2476         if self.params.get('forcejson'):
2477             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2478
2479     def dl(self, name, info, subtitle=False, test=False):
2480         if not info.get('url'):
2481             self.raise_no_formats(info, True)
2482
2483         if test:
2484             verbose = self.params.get('verbose')
2485             params = {
2486                 'test': True,
2487                 'quiet': not verbose,
2488                 'verbose': verbose,
2489                 'noprogress': not verbose,
2490                 'nopart': True,
2491                 'skip_unavailable_fragments': False,
2492                 'keep_fragments': False,
2493                 'overwrites': True,
2494                 '_no_ytdl_file': True,
2495             }
2496         else:
2497             params = self.params
2498         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2499         if not test:
2500             for ph in self._progress_hooks:
2501                 fd.add_progress_hook(ph)
2502             urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2503             self.write_debug('Invoking downloader on "%s"' % urls)
2504         new_info = dict(info)
2505         if new_info.get('http_headers') is None:
2506             new_info['http_headers'] = self._calc_headers(new_info)
2507         return fd.download(name, new_info, subtitle)
2508
2509     def process_info(self, info_dict):
2510         """Process a single resolved IE result."""
2511
2512         assert info_dict.get('_type', 'video') == 'video'
2513
2514         max_downloads = self.params.get('max_downloads')
2515         if max_downloads is not None:
2516             if self._num_downloads >= int(max_downloads):
2517                 raise MaxDownloadsReached()
2518
2519         # TODO: backward compatibility, to be removed
2520         info_dict['fulltitle'] = info_dict['title']
2521
2522         if 'format' not in info_dict and 'ext' in info_dict:
2523             info_dict['format'] = info_dict['ext']
2524
2525         if self._match_entry(info_dict) is not None:
2526             return
2527
2528         self.post_extract(info_dict)
2529         self._num_downloads += 1
2530
2531         # info_dict['_filename'] needs to be set for backward compatibility
2532         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2533         temp_filename = self.prepare_filename(info_dict, 'temp')
2534         files_to_move = {}
2535
2536         # Forced printings
2537         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2538
2539         if self.params.get('simulate'):
2540             if self.params.get('force_write_download_archive', False):
2541                 self.record_download_archive(info_dict)
2542             # Do nothing else if in simulate mode
2543             return
2544
2545         if full_filename is None:
2546             return
2547         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2548             return
2549         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2550             return
2551
2552         if self._write_description('video', info_dict,
2553                                    self.prepare_filename(info_dict, 'description')) is None:
2554             return
2555
2556         sub_files = self._write_subtitles(info_dict, temp_filename)
2557         if sub_files is None:
2558             return
2559         files_to_move.update(dict(sub_files))
2560
2561         thumb_files = self._write_thumbnails(
2562             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2563         if thumb_files is None:
2564             return
2565         files_to_move.update(dict(thumb_files))
2566
2567         infofn = self.prepare_filename(info_dict, 'infojson')
2568         _infojson_written = self._write_info_json('video', info_dict, infofn)
2569         if _infojson_written:
2570             info_dict['__infojson_filename'] = infofn
2571         elif _infojson_written is None:
2572             return
2573
2574         # Note: Annotations are deprecated
2575         annofn = None
2576         if self.params.get('writeannotations', False):
2577             annofn = self.prepare_filename(info_dict, 'annotation')
2578         if annofn:
2579             if not self._ensure_dir_exists(encodeFilename(annofn)):
2580                 return
2581             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2582                 self.to_screen('[info] Video annotations are already present')
2583             elif not info_dict.get('annotations'):
2584                 self.report_warning('There are no annotations to write.')
2585             else:
2586                 try:
2587                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2588                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2589                         annofile.write(info_dict['annotations'])
2590                 except (KeyError, TypeError):
2591                     self.report_warning('There are no annotations to write.')
2592                 except (OSError, IOError):
2593                     self.report_error('Cannot write annotations file: ' + annofn)
2594                     return
2595
2596         # Write internet shortcut files
2597         url_link = webloc_link = desktop_link = False
2598         if self.params.get('writelink', False):
2599             if sys.platform == "darwin":  # macOS.
2600                 webloc_link = True
2601             elif sys.platform.startswith("linux"):
2602                 desktop_link = True
2603             else:  # if sys.platform in ['win32', 'cygwin']:
2604                 url_link = True
2605         if self.params.get('writeurllink', False):
2606             url_link = True
2607         if self.params.get('writewebloclink', False):
2608             webloc_link = True
2609         if self.params.get('writedesktoplink', False):
2610             desktop_link = True
2611
2612         if url_link or webloc_link or desktop_link:
2613             if 'webpage_url' not in info_dict:
2614                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2615                 return
2616             ascii_url = iri_to_uri(info_dict['webpage_url'])
2617
2618         def _write_link_file(extension, template, newline, embed_filename):
2619             linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
2620             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2621                 self.to_screen('[info] Internet shortcut is already present')
2622             else:
2623                 try:
2624                     self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2625                     with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2626                         template_vars = {'url': ascii_url}
2627                         if embed_filename:
2628                             template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2629                         linkfile.write(template % template_vars)
2630                 except (OSError, IOError):
2631                     self.report_error('Cannot write internet shortcut ' + linkfn)
2632                     return False
2633             return True
2634
2635         if url_link:
2636             if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2637                 return
2638         if webloc_link:
2639             if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2640                 return
2641         if desktop_link:
2642             if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2643                 return
2644
2645         try:
2646             info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2647         except PostProcessingError as err:
2648             self.report_error('Preprocessing: %s' % str(err))
2649             return
2650
2651         must_record_download_archive = False
2652         if self.params.get('skip_download', False):
2653             info_dict['filepath'] = temp_filename
2654             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2655             info_dict['__files_to_move'] = files_to_move
2656             info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2657         else:
2658             # Download
2659             info_dict.setdefault('__postprocessors', [])
2660             try:
2661
2662                 def existing_file(*filepaths):
2663                     ext = info_dict.get('ext')
2664                     final_ext = self.params.get('final_ext', ext)
2665                     existing_files = []
2666                     for file in orderedSet(filepaths):
2667                         if final_ext != ext:
2668                             converted = replace_extension(file, final_ext, ext)
2669                             if os.path.exists(encodeFilename(converted)):
2670                                 existing_files.append(converted)
2671                         if os.path.exists(encodeFilename(file)):
2672                             existing_files.append(file)
2673
2674                     if not existing_files or self.params.get('overwrites', False):
2675                         for file in orderedSet(existing_files):
2676                             self.report_file_delete(file)
2677                             os.remove(encodeFilename(file))
2678                         return None
2679
2680                     info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2681                     return existing_files[0]
2682
2683                 success = True
2684                 if info_dict.get('requested_formats') is not None:
2685
2686                     def compatible_formats(formats):
2687                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2688                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2689                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2690                         if len(video_formats) > 2 or len(audio_formats) > 2:
2691                             return False
2692
2693                         # Check extension
2694                         exts = set(format.get('ext') for format in formats)
2695                         COMPATIBLE_EXTS = (
2696                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2697                             set(('webm',)),
2698                         )
2699                         for ext_sets in COMPATIBLE_EXTS:
2700                             if ext_sets.issuperset(exts):
2701                                 return True
2702                         # TODO: Check acodec/vcodec
2703                         return False
2704
2705                     requested_formats = info_dict['requested_formats']
2706                     old_ext = info_dict['ext']
2707                     if self.params.get('merge_output_format') is None:
2708                         if not compatible_formats(requested_formats):
2709                             info_dict['ext'] = 'mkv'
2710                             self.report_warning(
2711                                 'Requested formats are incompatible for merge and will be merged into mkv')
2712                         if (info_dict['ext'] == 'webm'
2713                                 and info_dict.get('thumbnails')
2714                                 # check with type instead of pp_key, __name__, or isinstance
2715                                 # since we dont want any custom PPs to trigger this
2716                                 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
2717                             info_dict['ext'] = 'mkv'
2718                             self.report_warning(
2719                                 'webm doesn\'t support embedding a thumbnail, mkv will be used')
2720                     new_ext = info_dict['ext']
2721
2722                     def correct_ext(filename, ext=new_ext):
2723                         if filename == '-':
2724                             return filename
2725                         filename_real_ext = os.path.splitext(filename)[1][1:]
2726                         filename_wo_ext = (
2727                             os.path.splitext(filename)[0]
2728                             if filename_real_ext in (old_ext, new_ext)
2729                             else filename)
2730                         return '%s.%s' % (filename_wo_ext, ext)
2731
2732                     # Ensure filename always has a correct extension for successful merge
2733                     full_filename = correct_ext(full_filename)
2734                     temp_filename = correct_ext(temp_filename)
2735                     dl_filename = existing_file(full_filename, temp_filename)
2736                     info_dict['__real_download'] = False
2737
2738                     _protocols = set(determine_protocol(f) for f in requested_formats)
2739                     if len(_protocols) == 1:  # All requested formats have same protocol
2740                         info_dict['protocol'] = _protocols.pop()
2741                     directly_mergable = FFmpegFD.can_merge_formats(info_dict, self.params)
2742                     if dl_filename is not None:
2743                         self.report_file_already_downloaded(dl_filename)
2744                     elif (directly_mergable and get_suitable_downloader(
2745                             info_dict, self.params, to_stdout=(temp_filename == '-')) == FFmpegFD):
2746                         info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
2747                         success, real_download = self.dl(temp_filename, info_dict)
2748                         info_dict['__real_download'] = real_download
2749                     else:
2750                         downloaded = []
2751                         merger = FFmpegMergerPP(self)
2752                         if self.params.get('allow_unplayable_formats'):
2753                             self.report_warning(
2754                                 'You have requested merging of multiple formats '
2755                                 'while also allowing unplayable formats to be downloaded. '
2756                                 'The formats won\'t be merged to prevent data corruption.')
2757                         elif not merger.available:
2758                             self.report_warning(
2759                                 'You have requested merging of multiple formats but ffmpeg is not installed. '
2760                                 'The formats won\'t be merged.')
2761
2762                         if temp_filename == '-':
2763                             reason = ('using a downloader other than ffmpeg' if directly_mergable
2764                                       else 'but the formats are incompatible for simultaneous download' if merger.available
2765                                       else 'but ffmpeg is not installed')
2766                             self.report_warning(
2767                                 f'You have requested downloading multiple formats to stdout {reason}. '
2768                                 'The formats will be streamed one after the other')
2769                             fname = temp_filename
2770                         for f in requested_formats:
2771                             new_info = dict(info_dict)
2772                             del new_info['requested_formats']
2773                             new_info.update(f)
2774                             if temp_filename != '-':
2775                                 fname = prepend_extension(
2776                                     correct_ext(temp_filename, new_info['ext']),
2777                                     'f%s' % f['format_id'], new_info['ext'])
2778                                 if not self._ensure_dir_exists(fname):
2779                                     return
2780                                 f['filepath'] = fname
2781                                 downloaded.append(fname)
2782                             partial_success, real_download = self.dl(fname, new_info)
2783                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
2784                             success = success and partial_success
2785                         if merger.available and not self.params.get('allow_unplayable_formats'):
2786                             info_dict['__postprocessors'].append(merger)
2787                             info_dict['__files_to_merge'] = downloaded
2788                             # Even if there were no downloads, it is being merged only now
2789                             info_dict['__real_download'] = True
2790                         else:
2791                             for file in downloaded:
2792                                 files_to_move[file] = None
2793                 else:
2794                     # Just a single file
2795                     dl_filename = existing_file(full_filename, temp_filename)
2796                     if dl_filename is None or dl_filename == temp_filename:
2797                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
2798                         # So we should try to resume the download
2799                         success, real_download = self.dl(temp_filename, info_dict)
2800                         info_dict['__real_download'] = real_download
2801                     else:
2802                         self.report_file_already_downloaded(dl_filename)
2803
2804                 dl_filename = dl_filename or temp_filename
2805                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2806
2807             except network_exceptions as err:
2808                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2809                 return
2810             except (OSError, IOError) as err:
2811                 raise UnavailableVideoError(err)
2812             except (ContentTooShortError, ) as err:
2813                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2814                 return
2815
2816             if success and full_filename != '-':
2817
2818                 def fixup():
2819                     do_fixup = True
2820                     fixup_policy = self.params.get('fixup')
2821                     vid = info_dict['id']
2822
2823                     if fixup_policy in ('ignore', 'never'):
2824                         return
2825                     elif fixup_policy == 'warn':
2826                         do_fixup = False
2827                     elif fixup_policy != 'force':
2828                         assert fixup_policy in ('detect_or_warn', None)
2829                         if not info_dict.get('__real_download'):
2830                             do_fixup = False
2831
2832                     def ffmpeg_fixup(cndn, msg, cls):
2833                         if not cndn:
2834                             return
2835                         if not do_fixup:
2836                             self.report_warning(f'{vid}: {msg}')
2837                             return
2838                         pp = cls(self)
2839                         if pp.available:
2840                             info_dict['__postprocessors'].append(pp)
2841                         else:
2842                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
2843
2844                     stretched_ratio = info_dict.get('stretched_ratio')
2845                     ffmpeg_fixup(
2846                         stretched_ratio not in (1, None),
2847                         f'Non-uniform pixel ratio {stretched_ratio}',
2848                         FFmpegFixupStretchedPP)
2849
2850                     ffmpeg_fixup(
2851                         (info_dict.get('requested_formats') is None
2852                          and info_dict.get('container') == 'm4a_dash'
2853                          and info_dict.get('ext') == 'm4a'),
2854                         'writing DASH m4a. Only some players support this container',
2855                         FFmpegFixupM4aPP)
2856
2857                     downloader = (get_suitable_downloader(info_dict, self.params).__name__
2858                                   if 'protocol' in info_dict else None)
2859                     ffmpeg_fixup(info_dict.get('requested_formats') is None and downloader == 'HlsFD',
2860                                  'malformed AAC bitstream detected', FFmpegFixupM3u8PP)
2861                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP)
2862                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed duration detected', FFmpegFixupDurationPP)
2863
2864                 fixup()
2865                 try:
2866                     info_dict = self.post_process(dl_filename, info_dict, files_to_move)
2867                 except PostProcessingError as err:
2868                     self.report_error('Postprocessing: %s' % str(err))
2869                     return
2870                 try:
2871                     for ph in self._post_hooks:
2872                         ph(info_dict['filepath'])
2873                 except Exception as err:
2874                     self.report_error('post hooks: %s' % str(err))
2875                     return
2876                 must_record_download_archive = True
2877
2878         if must_record_download_archive or self.params.get('force_write_download_archive', False):
2879             self.record_download_archive(info_dict)
2880         max_downloads = self.params.get('max_downloads')
2881         if max_downloads is not None and self._num_downloads >= int(max_downloads):
2882             raise MaxDownloadsReached()
2883
2884     def download(self, url_list):
2885         """Download a given list of URLs."""
2886         outtmpl = self.outtmpl_dict['default']
2887         if (len(url_list) > 1
2888                 and outtmpl != '-'
2889                 and '%' not in outtmpl
2890                 and self.params.get('max_downloads') != 1):
2891             raise SameFileError(outtmpl)
2892
2893         for url in url_list:
2894             try:
2895                 # It also downloads the videos
2896                 res = self.extract_info(
2897                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2898             except UnavailableVideoError:
2899                 self.report_error('unable to download video')
2900             except MaxDownloadsReached:
2901                 self.to_screen('[info] Maximum number of downloads reached')
2902                 raise
2903             except ExistingVideoReached:
2904                 self.to_screen('[info] Encountered a video that is already in the archive, stopping due to --break-on-existing')
2905                 raise
2906             except RejectedVideoReached:
2907                 self.to_screen('[info] Encountered a video that did not match filter, stopping due to --break-on-reject')
2908                 raise
2909             else:
2910                 if self.params.get('dump_single_json', False):
2911                     self.post_extract(res)
2912                     self.to_stdout(json.dumps(self.sanitize_info(res)))
2913
2914         return self._download_retcode
2915
2916     def download_with_info_file(self, info_filename):
2917         with contextlib.closing(fileinput.FileInput(
2918                 [info_filename], mode='r',
2919                 openhook=fileinput.hook_encoded('utf-8'))) as f:
2920             # FileInput doesn't have a read method, we can't call json.load
2921             info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
2922         try:
2923             self.process_ie_result(info, download=True)
2924         except (DownloadError, EntryNotInPlaylist, ThrottledDownload):
2925             webpage_url = info.get('webpage_url')
2926             if webpage_url is not None:
2927                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2928                 return self.download([webpage_url])
2929             else:
2930                 raise
2931         return self._download_retcode
2932
2933     @staticmethod
2934     def sanitize_info(info_dict, remove_private_keys=False):
2935         ''' Sanitize the infodict for converting to json '''
2936         if info_dict is None:
2937             return info_dict
2938         info_dict.setdefault('epoch', int(time.time()))
2939         remove_keys = {'__original_infodict'}  # Always remove this since this may contain a copy of the entire dict
2940         keep_keys = ['_type'],  # Always keep this to facilitate load-info-json
2941         if remove_private_keys:
2942             remove_keys |= {
2943                 'requested_formats', 'requested_subtitles', 'requested_entries',
2944                 'filepath', 'entries', 'original_url', 'playlist_autonumber',
2945             }
2946             empty_values = (None, {}, [], set(), tuple())
2947             reject = lambda k, v: k not in keep_keys and (
2948                 k.startswith('_') or k in remove_keys or v in empty_values)
2949         else:
2950             reject = lambda k, v: k in remove_keys
2951         filter_fn = lambda obj: (
2952             list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))
2953             else obj if not isinstance(obj, dict)
2954             else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
2955         return filter_fn(info_dict)
2956
2957     @staticmethod
2958     def filter_requested_info(info_dict, actually_filter=True):
2959         ''' Alias of sanitize_info for backward compatibility '''
2960         return YoutubeDL.sanitize_info(info_dict, actually_filter)
2961
2962     def run_pp(self, pp, infodict):
2963         files_to_delete = []
2964         if '__files_to_move' not in infodict:
2965             infodict['__files_to_move'] = {}
2966         try:
2967             files_to_delete, infodict = pp.run(infodict)
2968         except PostProcessingError as e:
2969             # Must be True and not 'only_download'
2970             if self.params.get('ignoreerrors') is True:
2971                 self.report_error(e)
2972                 return infodict
2973             raise
2974
2975         if not files_to_delete:
2976             return infodict
2977         if self.params.get('keepvideo', False):
2978             for f in files_to_delete:
2979                 infodict['__files_to_move'].setdefault(f, '')
2980         else:
2981             for old_filename in set(files_to_delete):
2982                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2983                 try:
2984                     os.remove(encodeFilename(old_filename))
2985                 except (IOError, OSError):
2986                     self.report_warning('Unable to remove downloaded original file')
2987                 if old_filename in infodict['__files_to_move']:
2988                     del infodict['__files_to_move'][old_filename]
2989         return infodict
2990
2991     @staticmethod
2992     def post_extract(info_dict):
2993         def actual_post_extract(info_dict):
2994             if info_dict.get('_type') in ('playlist', 'multi_video'):
2995                 for video_dict in info_dict.get('entries', {}):
2996                     actual_post_extract(video_dict or {})
2997                 return
2998
2999             post_extractor = info_dict.get('__post_extractor') or (lambda: {})
3000             extra = post_extractor().items()
3001             info_dict.update(extra)
3002             info_dict.pop('__post_extractor', None)
3003
3004             original_infodict = info_dict.get('__original_infodict') or {}
3005             original_infodict.update(extra)
3006             original_infodict.pop('__post_extractor', None)
3007
3008         actual_post_extract(info_dict or {})
3009
3010     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3011         info = dict(ie_info)
3012         info['__files_to_move'] = files_to_move or {}
3013         for pp in self._pps[key]:
3014             info = self.run_pp(pp, info)
3015         return info, info.pop('__files_to_move', None)
3016
3017     def post_process(self, filename, ie_info, files_to_move=None):
3018         """Run all the postprocessors on the given file."""
3019         info = dict(ie_info)
3020         info['filepath'] = filename
3021         info['__files_to_move'] = files_to_move or {}
3022
3023         for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
3024             info = self.run_pp(pp, info)
3025         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3026         del info['__files_to_move']
3027         for pp in self._pps['after_move']:
3028             info = self.run_pp(pp, info)
3029         return info
3030
3031     def _make_archive_id(self, info_dict):
3032         video_id = info_dict.get('id')
3033         if not video_id:
3034             return
3035         # Future-proof against any change in case
3036         # and backwards compatibility with prior versions
3037         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3038         if extractor is None:
3039             url = str_or_none(info_dict.get('url'))
3040             if not url:
3041                 return
3042             # Try to find matching extractor for the URL and take its ie_key
3043             for ie_key, ie in self._ies.items():
3044                 if ie.suitable(url):
3045                     extractor = ie_key
3046                     break
3047             else:
3048                 return
3049         return '%s %s' % (extractor.lower(), video_id)
3050
3051     def in_download_archive(self, info_dict):
3052         fn = self.params.get('download_archive')
3053         if fn is None:
3054             return False
3055
3056         vid_id = self._make_archive_id(info_dict)
3057         if not vid_id:
3058             return False  # Incomplete video information
3059
3060         return vid_id in self.archive
3061
3062     def record_download_archive(self, info_dict):
3063         fn = self.params.get('download_archive')
3064         if fn is None:
3065             return
3066         vid_id = self._make_archive_id(info_dict)
3067         assert vid_id
3068         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3069             archive_file.write(vid_id + '\n')
3070         self.archive.add(vid_id)
3071
3072     @staticmethod
3073     def format_resolution(format, default='unknown'):
3074         if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3075             return 'audio only'
3076         if format.get('resolution') is not None:
3077             return format['resolution']
3078         if format.get('width') and format.get('height'):
3079             res = '%dx%d' % (format['width'], format['height'])
3080         elif format.get('height'):
3081             res = '%sp' % format['height']
3082         elif format.get('width'):
3083             res = '%dx?' % format['width']
3084         else:
3085             res = default
3086         if format.get('vcodec') == 'none' and format.get('acodec') == 'none':
3087             res += ' (images)'
3088         return res
3089
3090     def _format_note(self, fdict):
3091         res = ''
3092         if fdict.get('ext') in ['f4f', 'f4m']:
3093             res += '(unsupported) '
3094         if fdict.get('language'):
3095             if res:
3096                 res += ' '
3097             res += '[%s] ' % fdict['language']
3098         if fdict.get('format_note') is not None:
3099             res += fdict['format_note'] + ' '
3100         if fdict.get('tbr') is not None:
3101             res += '%4dk ' % fdict['tbr']
3102         if fdict.get('container') is not None:
3103             if res:
3104                 res += ', '
3105             res += '%s container' % fdict['container']
3106         if (fdict.get('vcodec') is not None
3107                 and fdict.get('vcodec') != 'none'):
3108             if res:
3109                 res += ', '
3110             res += fdict['vcodec']
3111             if fdict.get('vbr') is not None:
3112                 res += '@'
3113         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3114             res += 'video@'
3115         if fdict.get('vbr') is not None:
3116             res += '%4dk' % fdict['vbr']
3117         if fdict.get('fps') is not None:
3118             if res:
3119                 res += ', '
3120             res += '%sfps' % fdict['fps']
3121         if fdict.get('acodec') is not None:
3122             if res:
3123                 res += ', '
3124             if fdict['acodec'] == 'none':
3125                 res += 'video only'
3126             else:
3127                 res += '%-5s' % fdict['acodec']
3128         elif fdict.get('abr') is not None:
3129             if res:
3130                 res += ', '
3131             res += 'audio'
3132         if fdict.get('abr') is not None:
3133             res += '@%3dk' % fdict['abr']
3134         if fdict.get('asr') is not None:
3135             res += ' (%5dHz)' % fdict['asr']
3136         if fdict.get('filesize') is not None:
3137             if res:
3138                 res += ', '
3139             res += format_bytes(fdict['filesize'])
3140         elif fdict.get('filesize_approx') is not None:
3141             if res:
3142                 res += ', '
3143             res += '~' + format_bytes(fdict['filesize_approx'])
3144         return res
3145
3146     def list_formats(self, info_dict):
3147         formats = info_dict.get('formats', [info_dict])
3148         new_format = (
3149             'list-formats' not in self.params.get('compat_opts', [])
3150             and self.params.get('listformats_table', True) is not False)
3151         if new_format:
3152             table = [
3153                 [
3154                     format_field(f, 'format_id'),
3155                     format_field(f, 'ext'),
3156                     self.format_resolution(f),
3157                     format_field(f, 'fps', '%d'),
3158                     '|',
3159                     format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
3160                     format_field(f, 'tbr', '%4dk'),
3161                     shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
3162                     '|',
3163                     format_field(f, 'vcodec', default='unknown').replace('none', ''),
3164                     format_field(f, 'vbr', '%4dk'),
3165                     format_field(f, 'acodec', default='unknown').replace('none', ''),
3166                     format_field(f, 'abr', '%3dk'),
3167                     format_field(f, 'asr', '%5dHz'),
3168                     ', '.join(filter(None, (
3169                         'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
3170                         format_field(f, 'language', '[%s]'),
3171                         format_field(f, 'format_note'),
3172                         format_field(f, 'container', ignore=(None, f.get('ext'))),
3173                     ))),
3174                 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3175             header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', '  TBR', 'PROTO',
3176                            '|', 'VCODEC', '  VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO']
3177         else:
3178             table = [
3179                 [
3180                     format_field(f, 'format_id'),
3181                     format_field(f, 'ext'),
3182                     self.format_resolution(f),
3183                     self._format_note(f)]
3184                 for f in formats
3185                 if f.get('preference') is None or f['preference'] >= -1000]
3186             header_line = ['format code', 'extension', 'resolution', 'note']
3187
3188         self.to_screen(
3189             '[info] Available formats for %s:' % info_dict['id'])
3190         self.to_stdout(render_table(
3191             header_line, table, delim=new_format, extraGap=(0 if new_format else 1), hideEmpty=new_format))
3192
3193     def list_thumbnails(self, info_dict):
3194         thumbnails = list(info_dict.get('thumbnails'))
3195         if not thumbnails:
3196             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3197             return
3198
3199         self.to_screen(
3200             '[info] Thumbnails for %s:' % info_dict['id'])
3201         self.to_stdout(render_table(
3202             ['ID', 'width', 'height', 'URL'],
3203             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
3204
3205     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3206         if not subtitles:
3207             self.to_screen('%s has no %s' % (video_id, name))
3208             return
3209         self.to_screen(
3210             'Available %s for %s:' % (name, video_id))
3211
3212         def _row(lang, formats):
3213             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3214             if len(set(names)) == 1:
3215                 names = [] if names[0] == 'unknown' else names[:1]
3216             return [lang, ', '.join(names), ', '.join(exts)]
3217
3218         self.to_stdout(render_table(
3219             ['Language', 'Name', 'Formats'],
3220             [_row(lang, formats) for lang, formats in subtitles.items()],
3221             hideEmpty=True))
3222
3223     def urlopen(self, req):
3224         """ Start an HTTP download """
3225         if isinstance(req, compat_basestring):
3226             req = sanitized_Request(req)
3227         return self._opener.open(req, timeout=self._socket_timeout)
3228
3229     def print_debug_header(self):
3230         if not self.params.get('verbose'):
3231             return
3232
3233         stdout_encoding = getattr(
3234             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
3235         encoding_str = (
3236             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
3237                 locale.getpreferredencoding(),
3238                 sys.getfilesystemencoding(),
3239                 stdout_encoding,
3240                 self.get_encoding()))
3241         write_string(encoding_str, encoding=None)
3242
3243         source = detect_variant()
3244         self._write_string('[debug] yt-dlp version %s%s\n' % (__version__, '' if source == 'unknown' else f' ({source})'))
3245         if _LAZY_LOADER:
3246             self._write_string('[debug] Lazy loading extractors enabled\n')
3247         if plugin_extractors or plugin_postprocessors:
3248             self._write_string('[debug] Plugins: %s\n' % [
3249                 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3250                 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3251         if self.params.get('compat_opts'):
3252             self._write_string(
3253                 '[debug] Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts')))
3254         try:
3255             sp = subprocess.Popen(
3256                 ['git', 'rev-parse', '--short', 'HEAD'],
3257                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3258                 cwd=os.path.dirname(os.path.abspath(__file__)))
3259             out, err = process_communicate_or_kill(sp)
3260             out = out.decode().strip()
3261             if re.match('[0-9a-f]+', out):
3262                 self._write_string('[debug] Git HEAD: %s\n' % out)
3263         except Exception:
3264             try:
3265                 sys.exc_clear()
3266             except Exception:
3267                 pass
3268
3269         def python_implementation():
3270             impl_name = platform.python_implementation()
3271             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3272                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3273             return impl_name
3274
3275         self._write_string('[debug] Python version %s (%s %s) - %s\n' % (
3276             platform.python_version(),
3277             python_implementation(),
3278             platform.architecture()[0],
3279             platform_name()))
3280
3281         exe_versions = FFmpegPostProcessor.get_versions(self)
3282         exe_versions['rtmpdump'] = rtmpdump_version()
3283         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3284         exe_str = ', '.join(
3285             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3286         ) or 'none'
3287         self._write_string('[debug] exe versions: %s\n' % exe_str)
3288
3289         from .downloader.websocket import has_websockets
3290         from .postprocessor.embedthumbnail import has_mutagen
3291         from .cookies import SQLITE_AVAILABLE, KEYRING_AVAILABLE
3292
3293         lib_str = ', '.join(sorted(filter(None, (
3294             compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
3295             has_websockets and 'websockets',
3296             has_mutagen and 'mutagen',
3297             SQLITE_AVAILABLE and 'sqlite',
3298             KEYRING_AVAILABLE and 'keyring',
3299         )))) or 'none'
3300         self._write_string('[debug] Optional libraries: %s\n' % lib_str)
3301
3302         proxy_map = {}
3303         for handler in self._opener.handlers:
3304             if hasattr(handler, 'proxies'):
3305                 proxy_map.update(handler.proxies)
3306         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
3307
3308         if self.params.get('call_home', False):
3309             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3310             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
3311             return
3312             latest_version = self.urlopen(
3313                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3314             if version_tuple(latest_version) > version_tuple(__version__):
3315                 self.report_warning(
3316                     'You are using an outdated version (newest version: %s)! '
3317                     'See https://yt-dl.org/update if you need help updating.' %
3318                     latest_version)
3319
3320     def _setup_opener(self):
3321         timeout_val = self.params.get('socket_timeout')
3322         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
3323
3324         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3325         opts_cookiefile = self.params.get('cookiefile')
3326         opts_proxy = self.params.get('proxy')
3327
3328         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3329
3330         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3331         if opts_proxy is not None:
3332             if opts_proxy == '':
3333                 proxies = {}
3334             else:
3335                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3336         else:
3337             proxies = compat_urllib_request.getproxies()
3338             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3339             if 'http' in proxies and 'https' not in proxies:
3340                 proxies['https'] = proxies['http']
3341         proxy_handler = PerRequestProxyHandler(proxies)
3342
3343         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3344         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3345         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3346         redirect_handler = YoutubeDLRedirectHandler()
3347         data_handler = compat_urllib_request_DataHandler()
3348
3349         # When passing our own FileHandler instance, build_opener won't add the
3350         # default FileHandler and allows us to disable the file protocol, which
3351         # can be used for malicious purposes (see
3352         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3353         file_handler = compat_urllib_request.FileHandler()
3354
3355         def file_open(*args, **kwargs):
3356             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3357         file_handler.file_open = file_open
3358
3359         opener = compat_urllib_request.build_opener(
3360             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3361
3362         # Delete the default user-agent header, which would otherwise apply in
3363         # cases where our custom HTTP handler doesn't come into play
3364         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3365         opener.addheaders = []
3366         self._opener = opener
3367
3368     def encode(self, s):
3369         if isinstance(s, bytes):
3370             return s  # Already encoded
3371
3372         try:
3373             return s.encode(self.get_encoding())
3374         except UnicodeEncodeError as err:
3375             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3376             raise
3377
3378     def get_encoding(self):
3379         encoding = self.params.get('encoding')
3380         if encoding is None:
3381             encoding = preferredencoding()
3382         return encoding
3383
3384     def _write_info_json(self, label, ie_result, infofn):
3385         ''' Write infojson and returns True = written, False = skip, None = error '''
3386         if not self.params.get('writeinfojson'):
3387             return False
3388         elif not infofn:
3389             self.write_debug(f'Skipping writing {label} infojson')
3390             return False
3391         elif not self._ensure_dir_exists(infofn):
3392             return None
3393         elif not self.params.get('overwrites', True) and os.path.exists(infofn):
3394             self.to_screen(f'[info] {label.title()} metadata is already present')
3395         else:
3396             self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3397             try:
3398                 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3399             except (OSError, IOError):
3400                 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3401                 return None
3402         return True
3403
3404     def _write_description(self, label, ie_result, descfn):
3405         ''' Write description and returns True = written, False = skip, None = error '''
3406         if not self.params.get('writedescription'):
3407             return False
3408         elif not descfn:
3409             self.write_debug(f'Skipping writing {label} description')
3410             return False
3411         elif not self._ensure_dir_exists(descfn):
3412             return None
3413         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3414             self.to_screen(f'[info] {label.title()} description is already present')
3415         elif ie_result.get('description') is None:
3416             self.report_warning(f'There\'s no {label} description to write')
3417             return False
3418         else:
3419             try:
3420                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3421                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3422                     descfile.write(ie_result['description'])
3423             except (OSError, IOError):
3424                 self.report_error(f'Cannot write {label} description file {descfn}')
3425                 return None
3426         return True
3427
3428     def _write_subtitles(self, info_dict, filename):
3429         ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3430         ret = []
3431         subtitles = info_dict.get('requested_subtitles')
3432         if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3433             # subtitles download errors are already managed as troubles in relevant IE
3434             # that way it will silently go on when used with unsupporting IE
3435             return ret
3436
3437         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3438         if not sub_filename_base:
3439             self.to_screen('[info] Skipping writing video subtitles')
3440             return ret
3441         for sub_lang, sub_info in subtitles.items():
3442             sub_format = sub_info['ext']
3443             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3444             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3445             if not self.params.get('overwrites', True) and os.path.exists(sub_filename):
3446                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3447                 sub_info['filepath'] = sub_filename
3448                 ret.append((sub_filename, sub_filename_final))
3449                 continue
3450
3451             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3452             if sub_info.get('data') is not None:
3453                 try:
3454                     # Use newline='' to prevent conversion of newline characters
3455                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
3456                     with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3457                         subfile.write(sub_info['data'])
3458                     sub_info['filepath'] = sub_filename
3459                     ret.append((sub_filename, sub_filename_final))
3460                     continue
3461                 except (OSError, IOError):
3462                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
3463                     return None
3464
3465             try:
3466                 sub_copy = sub_info.copy()
3467                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3468                 self.dl(sub_filename, sub_copy, subtitle=True)
3469                 sub_info['filepath'] = sub_filename
3470                 ret.append((sub_filename, sub_filename_final))
3471             except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3472                 self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
3473                 continue
3474         return ret
3475
3476     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3477         ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3478         write_all = self.params.get('write_all_thumbnails', False)
3479         thumbnails, ret = [], []
3480         if write_all or self.params.get('writethumbnail', False):
3481             thumbnails = info_dict.get('thumbnails') or []
3482         multiple = write_all and len(thumbnails) > 1
3483
3484         if thumb_filename_base is None:
3485             thumb_filename_base = filename
3486         if thumbnails and not thumb_filename_base:
3487             self.write_debug(f'Skipping writing {label} thumbnail')
3488             return ret
3489
3490         for t in thumbnails[::-1]:
3491             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3492             thumb_display_id = f'{label} thumbnail' + (f' {t["id"]}' if multiple else '')
3493             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3494             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3495
3496             if not self.params.get('overwrites', True) and os.path.exists(thumb_filename):
3497                 ret.append((thumb_filename, thumb_filename_final))
3498                 t['filepath'] = thumb_filename
3499                 self.to_screen(f'[info] {thumb_display_id.title()} is already present')
3500             else:
3501                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3502                 try:
3503                     uf = self.urlopen(t['url'])
3504                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3505                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3506                         shutil.copyfileobj(uf, thumbf)
3507                     ret.append((thumb_filename, thumb_filename_final))
3508                     t['filepath'] = thumb_filename
3509                 except network_exceptions as err:
3510                     self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3511             if ret and not write_all:
3512                 break
3513         return ret