yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import sys
  23 import time
  24 import tokenize
  25 import traceback
  26 import random
  27
  28 from string import ascii_letters
  29 from zipimport import zipimporter
  30
  31 from .compat import (
  32     compat_basestring,
  33     compat_cookiejar,
  34     compat_get_terminal_size,
  35     compat_kwargs,
  36     compat_numeric_types,
  37     compat_os_name,
  38     compat_str,
  39     compat_tokenize_tokenize,
  40     compat_urllib_error,
  41     compat_urllib_request,
  42     compat_urllib_request_DataHandler,
  43 )
  44 from .utils import (
  45     age_restricted,
  46     args_to_str,
  47     ContentTooShortError,
  48     date_from_str,
  49     DateRange,
  50     DEFAULT_OUTTMPL,
  51     determine_ext,
  52     determine_protocol,
  53     DOT_DESKTOP_LINK_TEMPLATE,
  54     DOT_URL_LINK_TEMPLATE,
  55     DOT_WEBLOC_LINK_TEMPLATE,
  56     DownloadError,
  57     encode_compat_str,
  58     encodeFilename,
  59     EntryNotInPlaylist,
  60     error_to_compat_str,
  61     ExistingVideoReached,
  62     expand_path,
  63     ExtractorError,
  64     float_or_none,
  65     format_bytes,
  66     format_field,
  67     STR_FORMAT_RE,
  68     formatSeconds,
  69     GeoRestrictedError,
  70     int_or_none,
  71     iri_to_uri,
  72     ISO3166Utils,
  73     LazyList,
  74     locked_file,
  75     make_dir,
  76     make_HTTPS_handler,
  77     MaxDownloadsReached,
  78     network_exceptions,
  79     orderedSet,
  80     OUTTMPL_TYPES,
  81     PagedList,
  82     parse_filesize,
  83     PerRequestProxyHandler,
  84     platform_name,
  85     PostProcessingError,
  86     preferredencoding,
  87     prepend_extension,
  88     process_communicate_or_kill,
  89     random_uuidv4,
  90     register_socks_protocols,
  91     RejectedVideoReached,
  92     render_table,
  93     replace_extension,
  94     SameFileError,
  95     sanitize_filename,
  96     sanitize_path,
  97     sanitize_url,
  98     sanitized_Request,
  99     std_headers,
 100     str_or_none,
 101     strftime_or_none,
 102     subtitles_filename,
 103     to_high_limit_path,
 104     traverse_obj,
 105     UnavailableVideoError,
 106     url_basename,
 107     version_tuple,
 108     write_json_file,
 109     write_string,
 110     YoutubeDLCookieJar,
 111     YoutubeDLCookieProcessor,
 112     YoutubeDLHandler,
 113     YoutubeDLRedirectHandler,
 114 )
 115 from .cache import Cache
 116 from .extractor import (
 117     gen_extractor_classes,
 118     get_info_extractor,
 119     _LAZY_LOADER,
 120     _PLUGIN_CLASSES
 121 )
 122 from .extractor.openload import PhantomJSwrapper
 123 from .downloader import (
 124     get_suitable_downloader,
 125     shorten_protocol_name
 126 )
 127 from .downloader.rtmp import rtmpdump_version
 128 from .postprocessor import (
 129     FFmpegFixupM3u8PP,
 130     FFmpegFixupM4aPP,
 131     FFmpegFixupStretchedPP,
 132     FFmpegMergerPP,
 133     FFmpegPostProcessor,
 134     # FFmpegSubtitlesConvertorPP,
 135     get_postprocessor,
 136     MoveFilesAfterDownloadPP,
 137 )
 138 from .version import __version__
 139
 140 if compat_os_name == 'nt':
 141     import ctypes
 142
 143
 144 class YoutubeDL(object):
 145     """YoutubeDL class.
 146
 147     YoutubeDL objects are the ones responsible of downloading the
 148     actual video file and writing it to disk if the user has requested
 149     it, among some other tasks. In most cases there should be one per
 150     program. As, given a video URL, the downloader doesn't know how to
 151     extract all the needed information, task that InfoExtractors do, it
 152     has to pass the URL to one of them.
 153
 154     For this, YoutubeDL objects have a method that allows
 155     InfoExtractors to be registered in a given order. When it is passed
 156     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 157     finds that reports being able to handle it. The InfoExtractor extracts
 158     all the information about the video or videos the URL refers to, and
 159     YoutubeDL process the extracted information, possibly using a File
 160     Downloader to download the video.
 161
 162     YoutubeDL objects accept a lot of parameters. In order not to saturate
 163     the object constructor with arguments, it receives a dictionary of
 164     options instead. These options are available through the params
 165     attribute for the InfoExtractors to use. The YoutubeDL also
 166     registers itself as the downloader in charge for the InfoExtractors
 167     that are added to it, so this is a "mutual registration".
 168
 169     Available options:
 170
 171     username:          Username for authentication purposes.
 172     password:          Password for authentication purposes.
 173     videopassword:     Password for accessing a video.
 174     ap_mso:            Adobe Pass multiple-system operator identifier.
 175     ap_username:       Multiple-system operator account username.
 176     ap_password:       Multiple-system operator account password.
 177     usenetrc:          Use netrc for authentication instead.
 178     verbose:           Print additional info to stdout.
 179     quiet:             Do not print messages to stdout.
 180     no_warnings:       Do not print out anything for warnings.
 181     forceprint:        A list of templates to force print
 182     forceurl:          Force printing final URL. (Deprecated)
 183     forcetitle:        Force printing title. (Deprecated)
 184     forceid:           Force printing ID. (Deprecated)
 185     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 186     forcedescription:  Force printing description. (Deprecated)
 187     forcefilename:     Force printing final filename. (Deprecated)
 188     forceduration:     Force printing duration. (Deprecated)
 189     forcejson:         Force printing info_dict as JSON.
 190     dump_single_json:  Force printing the info_dict of the whole playlist
 191                        (or video) as a single JSON line.
 192     force_write_download_archive: Force writing download archive regardless
 193                        of 'skip_download' or 'simulate'.
 194     simulate:          Do not download the video files.
 195     format:            Video format code. see "FORMAT SELECTION" for more details.
 196     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 197     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 198                        extracting metadata even if the video is not actually
 199                        available for download (experimental)
 200     format_sort:       How to sort the video formats. see "Sorting Formats"
 201                        for more details.
 202     format_sort_force: Force the given format_sort. see "Sorting Formats"
 203                        for more details.
 204     allow_multiple_video_streams:   Allow multiple video streams to be merged
 205                        into a single file
 206     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 207                        into a single file
 208     paths:             Dictionary of output paths. The allowed keys are 'home'
 209                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 210     outtmpl:           Dictionary of templates for output names. Allowed keys
 211                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 212                        A string a also accepted for backward compatibility
 213     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 214     restrictfilenames: Do not allow "&" and spaces in file names
 215     trim_file_name:    Limit length of filename (extension excluded)
 216     windowsfilenames:  Force the filenames to be windows compatible
 217     ignoreerrors:      Do not stop on download errors
 218                        (Default True when running yt-dlp,
 219                        but False when directly accessing YoutubeDL class)
 220     skip_playlist_after_errors: Number of allowed failures until the rest of
 221                        the playlist is skipped
 222     force_generic_extractor: Force downloader to use the generic extractor
 223     overwrites:        Overwrite all video and metadata files if True,
 224                        overwrite only non-video files if None
 225                        and don't overwrite any file if False
 226     playliststart:     Playlist item to start at.
 227     playlistend:       Playlist item to end at.
 228     playlist_items:    Specific indices of playlist to download.
 229     playlistreverse:   Download playlist items in reverse order.
 230     playlistrandom:    Download playlist items in random order.
 231     matchtitle:        Download only matching titles.
 232     rejecttitle:       Reject downloads for matching titles.
 233     logger:            Log messages to a logging.Logger instance.
 234     logtostderr:       Log messages to stderr instead of stdout.
 235     writedescription:  Write the video description to a .description file
 236     writeinfojson:     Write the video description to a .info.json file
 237     clean_infojson:    Remove private fields from the infojson
 238     writecomments:     Extract video comments. This will not be written to disk
 239                        unless writeinfojson is also given
 240     writeannotations:  Write the video annotations to a .annotations.xml file
 241     writethumbnail:    Write the thumbnail image to a file
 242     allow_playlist_files: Whether to write playlists' description, infojson etc
 243                        also to disk when using the 'write*' options
 244     write_all_thumbnails:  Write all thumbnail formats to files
 245     writelink:         Write an internet shortcut file, depending on the
 246                        current platform (.url/.webloc/.desktop)
 247     writeurllink:      Write a Windows internet shortcut file (.url)
 248     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 249     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 250     writesubtitles:    Write the video subtitles to a file
 251     writeautomaticsub: Write the automatically generated subtitles to a file
 252     allsubtitles:      Deprecated - Use subtitlelangs = ['all']
 253                        Downloads all the subtitles of the video
 254                        (requires writesubtitles or writeautomaticsub)
 255     listsubtitles:     Lists all available subtitles for the video
 256     subtitlesformat:   The format code for subtitles
 257     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 258                        The list may contain "all" to refer to all the available
 259                        subtitles. The language can be prefixed with a "-" to
 260                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 261     keepvideo:         Keep the video file after post-processing
 262     daterange:         A DateRange object, download only if the upload_date is in the range.
 263     skip_download:     Skip the actual download of the video file
 264     cachedir:          Location of the cache files in the filesystem.
 265                        False to disable filesystem cache.
 266     noplaylist:        Download single video instead of a playlist if in doubt.
 267     age_limit:         An integer representing the user's age in years.
 268                        Unsuitable videos for the given age are skipped.
 269     min_views:         An integer representing the minimum view count the video
 270                        must have in order to not be skipped.
 271                        Videos without view count information are always
 272                        downloaded. None for no limit.
 273     max_views:         An integer representing the maximum view count.
 274                        Videos that are more popular than that are not
 275                        downloaded.
 276                        Videos without view count information are always
 277                        downloaded. None for no limit.
 278     download_archive:  File name of a file where all downloads are recorded.
 279                        Videos already present in the file are not downloaded
 280                        again.
 281     break_on_existing: Stop the download process after attempting to download a
 282                        file that is in the archive.
 283     break_on_reject:   Stop the download process when encountering a video that
 284                        has been filtered out.
 285     cookiefile:        File name where cookies should be read from and dumped to
 286     nocheckcertificate:Do not verify SSL certificates
 287     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 288                        At the moment, this is only supported by YouTube.
 289     proxy:             URL of the proxy server to use
 290     geo_verification_proxy:  URL of the proxy to use for IP address verification
 291                        on geo-restricted sites.
 292     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 293     bidi_workaround:   Work around buggy terminals without bidirectional text
 294                        support, using fridibi
 295     debug_printtraffic:Print out sent and received HTTP traffic
 296     include_ads:       Download ads as well
 297     default_search:    Prepend this string if an input url is not valid.
 298                        'auto' for elaborate guessing
 299     encoding:          Use this encoding instead of the system-specified.
 300     extract_flat:      Do not resolve URLs, return the immediate result.
 301                        Pass in 'in_playlist' to only show this behavior for
 302                        playlist items.
 303     postprocessors:    A list of dictionaries, each with an entry
 304                        * key:  The name of the postprocessor. See
 305                                yt_dlp/postprocessor/__init__.py for a list.
 306                        * when: When to run the postprocessor. Can be one of
 307                                pre_process|before_dl|post_process|after_move.
 308                                Assumed to be 'post_process' if not given
 309     post_hooks:        A list of functions that get called as the final step
 310                        for each video file, after all postprocessors have been
 311                        called. The filename will be passed as the only argument.
 312     progress_hooks:    A list of functions that get called on download
 313                        progress, with a dictionary with the entries
 314                        * status: One of "downloading", "error", or "finished".
 315                                  Check this first and ignore unknown values.
 316
 317                        If status is one of "downloading", or "finished", the
 318                        following properties may also be present:
 319                        * filename: The final filename (always present)
 320                        * tmpfilename: The filename we're currently writing to
 321                        * downloaded_bytes: Bytes on disk
 322                        * total_bytes: Size of the whole file, None if unknown
 323                        * total_bytes_estimate: Guess of the eventual file size,
 324                                                None if unavailable.
 325                        * elapsed: The number of seconds since download started.
 326                        * eta: The estimated time in seconds, None if unknown
 327                        * speed: The download speed in bytes/second, None if
 328                                 unknown
 329                        * fragment_index: The counter of the currently
 330                                          downloaded video fragment.
 331                        * fragment_count: The number of fragments (= individual
 332                                          files that will be merged)
 333
 334                        Progress hooks are guaranteed to be called at least once
 335                        (with status "finished") if the download is successful.
 336     merge_output_format: Extension to use when merging formats.
 337     final_ext:         Expected final extension; used to detect when the file was
 338                        already downloaded and converted. "merge_output_format" is
 339                        replaced by this extension when given
 340     fixup:             Automatically correct known faults of the file.
 341                        One of:
 342                        - "never": do nothing
 343                        - "warn": only emit a warning
 344                        - "detect_or_warn": check whether we can do anything
 345                                            about it, warn otherwise (default)
 346     source_address:    Client-side IP address to bind to.
 347     call_home:         Boolean, true iff we are allowed to contact the
 348                        yt-dlp servers for debugging. (BROKEN)
 349     sleep_interval_requests: Number of seconds to sleep between requests
 350                        during extraction
 351     sleep_interval:    Number of seconds to sleep before each download when
 352                        used alone or a lower bound of a range for randomized
 353                        sleep before each download (minimum possible number
 354                        of seconds to sleep) when used along with
 355                        max_sleep_interval.
 356     max_sleep_interval:Upper bound of a range for randomized sleep before each
 357                        download (maximum possible number of seconds to sleep).
 358                        Must only be used along with sleep_interval.
 359                        Actual sleep time will be a random float from range
 360                        [sleep_interval; max_sleep_interval].
 361     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 362     listformats:       Print an overview of available video formats and exit.
 363     list_thumbnails:   Print a table of all thumbnails and exit.
 364     match_filter:      A function that gets called with the info_dict of
 365                        every video.
 366                        If it returns a message, the video is ignored.
 367                        If it returns None, the video is downloaded.
 368                        match_filter_func in utils.py is one example for this.
 369     no_color:          Do not emit color codes in output.
 370     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 371                        HTTP header
 372     geo_bypass_country:
 373                        Two-letter ISO 3166-2 country code that will be used for
 374                        explicit geographic restriction bypassing via faking
 375                        X-Forwarded-For HTTP header
 376     geo_bypass_ip_block:
 377                        IP range in CIDR notation that will be used similarly to
 378                        geo_bypass_country
 379
 380     The following options determine which downloader is picked:
 381     external_downloader: A dictionary of protocol keys and the executable of the
 382                        external downloader to use for it. The allowed protocols
 383                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 384                        Set the value to 'native' to use the native downloader
 385     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 386                        or {'m3u8': 'ffmpeg'} instead.
 387                        Use the native HLS downloader instead of ffmpeg/avconv
 388                        if True, otherwise use ffmpeg/avconv if False, otherwise
 389                        use downloader suggested by extractor if None.
 390     compat_opts:       Compatibility options. See "Differences in default behavior".
 391                        Note that only format-sort, format-spec, no-live-chat,
 392                        no-attach-info-json, playlist-index, list-formats,
 393                        no-direct-merge, no-youtube-channel-redirect,
 394                        and no-youtube-unavailable-videos works when used via the API
 395
 396     The following parameters are not used by YoutubeDL itself, they are used by
 397     the downloader (see yt_dlp/downloader/common.py):
 398     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 399     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 400     xattr_set_filesize, external_downloader_args, hls_use_mpegts,
 401     http_chunk_size.
 402
 403     The following options are used by the post processors:
 404     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 405                        otherwise prefer ffmpeg. (avconv support is deprecated)
 406     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 407                        to the binary or its containing directory.
 408     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 409                         and a list of additional command-line arguments for the
 410                         postprocessor/executable. The dict can also have "PP+EXE" keys
 411                         which are used when the given exe is used by the given PP.
 412                         Use 'default' as the name for arguments to passed to all PP
 413
 414     The following options are used by the extractors:
 415     extractor_retries: Number of times to retry for known errors
 416     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 417     hls_split_discontinuity: Split HLS playlists to different formats at
 418                        discontinuities such as ad breaks (default: False)
 419     youtube_include_dash_manifest: If True (default), DASH manifests and related
 420                        data will be downloaded and processed by extractor.
 421                        You can reduce network I/O by disabling it if you don't
 422                        care about DASH. (only for youtube)
 423     youtube_include_hls_manifest: If True (default), HLS manifests and related
 424                        data will be downloaded and processed by extractor.
 425                        You can reduce network I/O by disabling it if you don't
 426                        care about HLS. (only for youtube)
 427     """
 428
 429     _NUMERIC_FIELDS = set((
 430         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 431         'timestamp', 'upload_year', 'upload_month', 'upload_day',
 432         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 433         'average_rating', 'comment_count', 'age_limit',
 434         'start_time', 'end_time',
 435         'chapter_number', 'season_number', 'episode_number',
 436         'track_number', 'disc_number', 'release_year',
 437         'playlist_index',
 438     ))
 439
 440     params = None
 441     _ies = []
 442     _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 443     __prepare_filename_warned = False
 444     _first_webpage_request = True
 445     _download_retcode = None
 446     _num_downloads = None
 447     _playlist_level = 0
 448     _playlist_urls = set()
 449     _screen_file = None
 450
 451     def __init__(self, params=None, auto_init=True):
 452         """Create a FileDownloader object with the given options."""
 453         if params is None:
 454             params = {}
 455         self._ies = []
 456         self._ies_instances = {}
 457         self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 458         self.__prepare_filename_warned = False
 459         self._first_webpage_request = True
 460         self._post_hooks = []
 461         self._progress_hooks = []
 462         self._download_retcode = 0
 463         self._num_downloads = 0
 464         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 465         self._err_file = sys.stderr
 466         self.params = {
 467             # Default parameters
 468             'nocheckcertificate': False,
 469         }
 470         self.params.update(params)
 471         self.cache = Cache(self)
 472
 473         if sys.version_info < (3, 6):
 474             self.report_warning(
 475                 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 476
 477         def check_deprecated(param, option, suggestion):
 478             if self.params.get(param) is not None:
 479                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 480                 return True
 481             return False
 482
 483         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 484             if self.params.get('geo_verification_proxy') is None:
 485                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 486
 487         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 488         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 489         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 490
 491         for msg in self.params.get('warnings', []):
 492             self.report_warning(msg)
 493
 494         if self.params.get('final_ext'):
 495             if self.params.get('merge_output_format'):
 496                 self.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
 497             self.params['merge_output_format'] = self.params['final_ext']
 498
 499         if 'overwrites' in self.params and self.params['overwrites'] is None:
 500             del self.params['overwrites']
 501
 502         if params.get('bidi_workaround', False):
 503             try:
 504                 import pty
 505                 master, slave = pty.openpty()
 506                 width = compat_get_terminal_size().columns
 507                 if width is None:
 508                     width_args = []
 509                 else:
 510                     width_args = ['-w', str(width)]
 511                 sp_kwargs = dict(
 512                     stdin=subprocess.PIPE,
 513                     stdout=slave,
 514                     stderr=self._err_file)
 515                 try:
 516                     self._output_process = subprocess.Popen(
 517                         ['bidiv'] + width_args, **sp_kwargs
 518                     )
 519                 except OSError:
 520                     self._output_process = subprocess.Popen(
 521                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 522                 self._output_channel = os.fdopen(master, 'rb')
 523             except OSError as ose:
 524                 if ose.errno == errno.ENOENT:
 525                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 526                 else:
 527                     raise
 528
 529         if (sys.platform != 'win32'
 530                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 531                 and not params.get('restrictfilenames', False)):
 532             # Unicode filesystem API will throw errors (#1474, #13027)
 533             self.report_warning(
 534                 'Assuming --restrict-filenames since file system encoding '
 535                 'cannot encode all characters. '
 536                 'Set the LC_ALL environment variable to fix this.')
 537             self.params['restrictfilenames'] = True
 538
 539         self.outtmpl_dict = self.parse_outtmpl()
 540
 541         self._setup_opener()
 542
 543         """Preload the archive, if any is specified"""
 544         def preload_download_archive(fn):
 545             if fn is None:
 546                 return False
 547             self.write_debug('Loading archive file %r\n' % fn)
 548             try:
 549                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 550                     for line in archive_file:
 551                         self.archive.add(line.strip())
 552             except IOError as ioe:
 553                 if ioe.errno != errno.ENOENT:
 554                     raise
 555                 return False
 556             return True
 557
 558         self.archive = set()
 559         preload_download_archive(self.params.get('download_archive'))
 560
 561         if auto_init:
 562             self.print_debug_header()
 563             self.add_default_info_extractors()
 564
 565         for pp_def_raw in self.params.get('postprocessors', []):
 566             pp_class = get_postprocessor(pp_def_raw['key'])
 567             pp_def = dict(pp_def_raw)
 568             del pp_def['key']
 569             if 'when' in pp_def:
 570                 when = pp_def['when']
 571                 del pp_def['when']
 572             else:
 573                 when = 'post_process'
 574             pp = pp_class(self, **compat_kwargs(pp_def))
 575             self.add_post_processor(pp, when=when)
 576
 577         for ph in self.params.get('post_hooks', []):
 578             self.add_post_hook(ph)
 579
 580         for ph in self.params.get('progress_hooks', []):
 581             self.add_progress_hook(ph)
 582
 583         register_socks_protocols()
 584
 585     def warn_if_short_id(self, argv):
 586         # short YouTube ID starting with dash?
 587         idxs = [
 588             i for i, a in enumerate(argv)
 589             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 590         if idxs:
 591             correct_argv = (
 592                 ['yt-dlp']
 593                 + [a for i, a in enumerate(argv) if i not in idxs]
 594                 + ['--'] + [argv[i] for i in idxs]
 595             )
 596             self.report_warning(
 597                 'Long argument string detected. '
 598                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 599                 args_to_str(correct_argv))
 600
 601     def add_info_extractor(self, ie):
 602         """Add an InfoExtractor object to the end of the list."""
 603         self._ies.append(ie)
 604         if not isinstance(ie, type):
 605             self._ies_instances[ie.ie_key()] = ie
 606             ie.set_downloader(self)
 607
 608     def get_info_extractor(self, ie_key):
 609         """
 610         Get an instance of an IE with name ie_key, it will try to get one from
 611         the _ies list, if there's no instance it will create a new one and add
 612         it to the extractor list.
 613         """
 614         ie = self._ies_instances.get(ie_key)
 615         if ie is None:
 616             ie = get_info_extractor(ie_key)()
 617             self.add_info_extractor(ie)
 618         return ie
 619
 620     def add_default_info_extractors(self):
 621         """
 622         Add the InfoExtractors returned by gen_extractors to the end of the list
 623         """
 624         for ie in gen_extractor_classes():
 625             self.add_info_extractor(ie)
 626
 627     def add_post_processor(self, pp, when='post_process'):
 628         """Add a PostProcessor object to the end of the chain."""
 629         self._pps[when].append(pp)
 630         pp.set_downloader(self)
 631
 632     def add_post_hook(self, ph):
 633         """Add the post hook"""
 634         self._post_hooks.append(ph)
 635
 636     def add_progress_hook(self, ph):
 637         """Add the progress hook (currently only for the file downloader)"""
 638         self._progress_hooks.append(ph)
 639
 640     def _bidi_workaround(self, message):
 641         if not hasattr(self, '_output_channel'):
 642             return message
 643
 644         assert hasattr(self, '_output_process')
 645         assert isinstance(message, compat_str)
 646         line_count = message.count('\n') + 1
 647         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 648         self._output_process.stdin.flush()
 649         res = ''.join(self._output_channel.readline().decode('utf-8')
 650                       for _ in range(line_count))
 651         return res[:-len('\n')]
 652
 653     def _write_string(self, s, out=None):
 654         write_string(s, out=out, encoding=self.params.get('encoding'))
 655
 656     def to_stdout(self, message, skip_eol=False, quiet=False):
 657         """Print message to stdout"""
 658         if self.params.get('logger'):
 659             self.params['logger'].debug(message)
 660         elif not quiet or self.params.get('verbose'):
 661             self._write_string(
 662                 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 663                 self._err_file if quiet else self._screen_file)
 664
 665     def to_stderr(self, message):
 666         """Print message to stderr"""
 667         assert isinstance(message, compat_str)
 668         if self.params.get('logger'):
 669             self.params['logger'].error(message)
 670         else:
 671             self._write_string('%s\n' % self._bidi_workaround(message), self._err_file)
 672
 673     def to_console_title(self, message):
 674         if not self.params.get('consoletitle', False):
 675             return
 676         if compat_os_name == 'nt':
 677             if ctypes.windll.kernel32.GetConsoleWindow():
 678                 # c_wchar_p() might not be necessary if `message` is
 679                 # already of type unicode()
 680                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 681         elif 'TERM' in os.environ:
 682             self._write_string('\033]0;%s\007' % message, self._screen_file)
 683
 684     def save_console_title(self):
 685         if not self.params.get('consoletitle', False):
 686             return
 687         if self.params.get('simulate', False):
 688             return
 689         if compat_os_name != 'nt' and 'TERM' in os.environ:
 690             # Save the title on stack
 691             self._write_string('\033[22;0t', self._screen_file)
 692
 693     def restore_console_title(self):
 694         if not self.params.get('consoletitle', False):
 695             return
 696         if self.params.get('simulate', False):
 697             return
 698         if compat_os_name != 'nt' and 'TERM' in os.environ:
 699             # Restore the title from stack
 700             self._write_string('\033[23;0t', self._screen_file)
 701
 702     def __enter__(self):
 703         self.save_console_title()
 704         return self
 705
 706     def __exit__(self, *args):
 707         self.restore_console_title()
 708
 709         if self.params.get('cookiefile') is not None:
 710             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 711
 712     def trouble(self, message=None, tb=None):
 713         """Determine action to take when a download problem appears.
 714
 715         Depending on if the downloader has been configured to ignore
 716         download errors or not, this method may throw an exception or
 717         not when errors are found, after printing the message.
 718
 719         tb, if given, is additional traceback information.
 720         """
 721         if message is not None:
 722             self.to_stderr(message)
 723         if self.params.get('verbose'):
 724             if tb is None:
 725                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 726                     tb = ''
 727                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 728                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 729                     tb += encode_compat_str(traceback.format_exc())
 730                 else:
 731                     tb_data = traceback.format_list(traceback.extract_stack())
 732                     tb = ''.join(tb_data)
 733             if tb:
 734                 self.to_stderr(tb)
 735         if not self.params.get('ignoreerrors', False):
 736             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 737                 exc_info = sys.exc_info()[1].exc_info
 738             else:
 739                 exc_info = sys.exc_info()
 740             raise DownloadError(message, exc_info)
 741         self._download_retcode = 1
 742
 743     def to_screen(self, message, skip_eol=False):
 744         """Print message to stdout if not in quiet mode"""
 745         self.to_stdout(
 746             message, skip_eol, quiet=self.params.get('quiet', False))
 747
 748     def report_warning(self, message):
 749         '''
 750         Print the message to stderr, it will be prefixed with 'WARNING:'
 751         If stderr is a tty file the 'WARNING:' will be colored
 752         '''
 753         if self.params.get('logger') is not None:
 754             self.params['logger'].warning(message)
 755         else:
 756             if self.params.get('no_warnings'):
 757                 return
 758             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 759                 _msg_header = '\033[0;33mWARNING:\033[0m'
 760             else:
 761                 _msg_header = 'WARNING:'
 762             warning_message = '%s %s' % (_msg_header, message)
 763             self.to_stderr(warning_message)
 764
 765     def report_error(self, message, tb=None):
 766         '''
 767         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 768         in red if stderr is a tty file.
 769         '''
 770         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 771             _msg_header = '\033[0;31mERROR:\033[0m'
 772         else:
 773             _msg_header = 'ERROR:'
 774         error_message = '%s %s' % (_msg_header, message)
 775         self.trouble(error_message, tb)
 776
 777     def write_debug(self, message):
 778         '''Log debug message or Print message to stderr'''
 779         if not self.params.get('verbose', False):
 780             return
 781         message = '[debug] %s' % message
 782         if self.params.get('logger'):
 783             self.params['logger'].debug(message)
 784         else:
 785             self._write_string('%s\n' % message)
 786
 787     def report_file_already_downloaded(self, file_name):
 788         """Report file has already been fully downloaded."""
 789         try:
 790             self.to_screen('[download] %s has already been downloaded' % file_name)
 791         except UnicodeEncodeError:
 792             self.to_screen('[download] The file has already been downloaded')
 793
 794     def report_file_delete(self, file_name):
 795         """Report that existing file will be deleted."""
 796         try:
 797             self.to_screen('Deleting existing file %s' % file_name)
 798         except UnicodeEncodeError:
 799             self.to_screen('Deleting existing file')
 800
 801     def parse_outtmpl(self):
 802         outtmpl_dict = self.params.get('outtmpl', {})
 803         if not isinstance(outtmpl_dict, dict):
 804             outtmpl_dict = {'default': outtmpl_dict}
 805         outtmpl_dict.update({
 806             k: v for k, v in DEFAULT_OUTTMPL.items()
 807             if not outtmpl_dict.get(k)})
 808         for key, val in outtmpl_dict.items():
 809             if isinstance(val, bytes):
 810                 self.report_warning(
 811                     'Parameter outtmpl is bytes, but should be a unicode string. '
 812                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 813         return outtmpl_dict
 814
 815     @staticmethod
 816     def validate_outtmpl(tmpl):
 817         ''' @return None or Exception object '''
 818         try:
 819             re.sub(
 820                 STR_FORMAT_RE.format(''),
 821                 lambda mobj: ('%' if not mobj.group('has_key') else '') + mobj.group(0),
 822                 tmpl
 823             ) % collections.defaultdict(int)
 824             return None
 825         except ValueError as err:
 826             return err
 827
 828     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
 829         """ Make the template and info_dict suitable for substitution (outtmpl % info_dict)"""
 830         info_dict = dict(info_dict)
 831         na = self.params.get('outtmpl_na_placeholder', 'NA')
 832
 833         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
 834             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
 835             if info_dict.get('duration', None) is not None
 836             else None)
 837         info_dict['epoch'] = int(time.time())
 838         info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
 839         if info_dict.get('resolution') is None:
 840             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
 841
 842         # For fields playlist_index and autonumber convert all occurrences
 843         # of %(field)s to %(field)0Nd for backward compatibility
 844         field_size_compat_map = {
 845             'playlist_index': len(str(info_dict.get('_last_playlist_index') or '')),
 846             'autonumber': self.params.get('autonumber_size') or 5,
 847         }
 848
 849         TMPL_DICT = {}
 850         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE.format('[^)]*'))
 851         MATH_FUNCTIONS = {
 852             '+': float.__add__,
 853             '-': float.__sub__,
 854         }
 855         # Field is of the form key1.key2...
 856         # where keys (except first) can be string, int or slice
 857         FIELD_RE = r'\w+(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
 858         MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
 859         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
 860         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
 861             (?P<negate>-)?
 862             (?P<fields>{field})
 863             (?P<maths>(?:{math_op}{math_field})*)
 864             (?:>(?P<strf_format>.+?))?
 865             (?:\|(?P<default>.*?))?
 866             $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
 867
 868         get_key = lambda k: traverse_obj(
 869             info_dict, k.split('.'), is_user_input=True, traverse_string=True)
 870
 871         def get_value(mdict):
 872             # Object traversal
 873             value = get_key(mdict['fields'])
 874             # Negative
 875             if mdict['negate']:
 876                 value = float_or_none(value)
 877                 if value is not None:
 878                     value *= -1
 879             # Do maths
 880             offset_key = mdict['maths']
 881             if offset_key:
 882                 value = float_or_none(value)
 883                 operator = None
 884                 while offset_key:
 885                     item = re.match(
 886                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
 887                         offset_key).group(0)
 888                     offset_key = offset_key[len(item):]
 889                     if operator is None:
 890                         operator = MATH_FUNCTIONS[item]
 891                         continue
 892                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
 893                     offset = float_or_none(item)
 894                     if offset is None:
 895                         offset = float_or_none(get_key(item))
 896                     try:
 897                         value = operator(value, multiplier * offset)
 898                     except (TypeError, ZeroDivisionError):
 899                         return None
 900                     operator = None
 901             # Datetime formatting
 902             if mdict['strf_format']:
 903                 value = strftime_or_none(value, mdict['strf_format'])
 904
 905             return value
 906
 907         def create_key(outer_mobj):
 908             if not outer_mobj.group('has_key'):
 909                 return '%{}'.format(outer_mobj.group(0))
 910
 911             key = outer_mobj.group('key')
 912             fmt = outer_mobj.group('format')
 913             mobj = re.match(INTERNAL_FORMAT_RE, key)
 914             if mobj is None:
 915                 value, default = None, na
 916             else:
 917                 mobj = mobj.groupdict()
 918                 default = mobj['default'] if mobj['default'] is not None else na
 919                 value = get_value(mobj)
 920
 921             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
 922                 fmt = '0{:d}d'.format(field_size_compat_map[key])
 923
 924             value = default if value is None else value
 925             key += '\0%s' % fmt
 926
 927             if fmt == 'c':
 928                 value = compat_str(value)
 929                 if value is None:
 930                     value, fmt = default, 's'
 931                 else:
 932                     value = value[0]
 933             elif fmt[-1] not in 'rs':  # numeric
 934                 value = float_or_none(value)
 935                 if value is None:
 936                     value, fmt = default, 's'
 937             if sanitize:
 938                 if fmt[-1] == 'r':
 939                     # If value is an object, sanitize might convert it to a string
 940                     # So we convert it to repr first
 941                     value, fmt = repr(value), '%ss' % fmt[:-1]
 942                 if fmt[-1] in 'csr':
 943                     value = sanitize(key, value)
 944             TMPL_DICT[key] = value
 945             return '%({key}){fmt}'.format(key=key, fmt=fmt)
 946
 947         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
 948
 949     def _prepare_filename(self, info_dict, tmpl_type='default'):
 950         try:
 951             sanitize = lambda k, v: sanitize_filename(
 952                 compat_str(v),
 953                 restricted=self.params.get('restrictfilenames'),
 954                 is_id=(k == 'id' or k.endswith('_id')))
 955             outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
 956             outtmpl, template_dict = self.prepare_outtmpl(outtmpl, info_dict, sanitize)
 957
 958             # expand_path translates '%%' into '%' and '$$' into '$'
 959             # correspondingly that is not what we want since we need to keep
 960             # '%%' intact for template dict substitution step. Working around
 961             # with boundary-alike separator hack.
 962             sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 963             outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 964
 965             # outtmpl should be expand_path'ed before template dict substitution
 966             # because meta fields may contain env variables we don't want to
 967             # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 968             # title "Hello $PATH", we don't want `$PATH` to be expanded.
 969             filename = expand_path(outtmpl).replace(sep, '') % template_dict
 970
 971             force_ext = OUTTMPL_TYPES.get(tmpl_type)
 972             if force_ext is not None:
 973                 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
 974
 975             # https://github.com/blackjack4494/youtube-dlc/issues/85
 976             trim_file_name = self.params.get('trim_file_name', False)
 977             if trim_file_name:
 978                 fn_groups = filename.rsplit('.')
 979                 ext = fn_groups[-1]
 980                 sub_ext = ''
 981                 if len(fn_groups) > 2:
 982                     sub_ext = fn_groups[-2]
 983                 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
 984
 985             return filename
 986         except ValueError as err:
 987             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 988             return None
 989
 990     def prepare_filename(self, info_dict, dir_type='', warn=False):
 991         """Generate the output filename."""
 992         paths = self.params.get('paths', {})
 993         assert isinstance(paths, dict)
 994         filename = self._prepare_filename(info_dict, dir_type or 'default')
 995
 996         if warn and not self.__prepare_filename_warned:
 997             if not paths:
 998                 pass
 999             elif filename == '-':
1000                 self.report_warning('--paths is ignored when an outputting to stdout')
1001             elif os.path.isabs(filename):
1002                 self.report_warning('--paths is ignored since an absolute path is given in output template')
1003             self.__prepare_filename_warned = True
1004         if filename == '-' or not filename:
1005             return filename
1006
1007         homepath = expand_path(paths.get('home', '').strip())
1008         assert isinstance(homepath, compat_str)
1009         subdir = expand_path(paths.get(dir_type, '').strip()) if dir_type else ''
1010         assert isinstance(subdir, compat_str)
1011         path = os.path.join(homepath, subdir, filename)
1012
1013         # Temporary fix for #4787
1014         # 'Treat' all problem characters by passing filename through preferredencoding
1015         # to workaround encoding issues with subprocess on python2 @ Windows
1016         if sys.version_info < (3, 0) and sys.platform == 'win32':
1017             path = encodeFilename(path, True).decode(preferredencoding())
1018         return sanitize_path(path, force=self.params.get('windowsfilenames'))
1019
1020     def _match_entry(self, info_dict, incomplete=False, silent=False):
1021         """ Returns None if the file should be downloaded """
1022
1023         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1024
1025         def check_filter():
1026             if 'title' in info_dict:
1027                 # This can happen when we're just evaluating the playlist
1028                 title = info_dict['title']
1029                 matchtitle = self.params.get('matchtitle', False)
1030                 if matchtitle:
1031                     if not re.search(matchtitle, title, re.IGNORECASE):
1032                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1033                 rejecttitle = self.params.get('rejecttitle', False)
1034                 if rejecttitle:
1035                     if re.search(rejecttitle, title, re.IGNORECASE):
1036                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1037             date = info_dict.get('upload_date')
1038             if date is not None:
1039                 dateRange = self.params.get('daterange', DateRange())
1040                 if date not in dateRange:
1041                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1042             view_count = info_dict.get('view_count')
1043             if view_count is not None:
1044                 min_views = self.params.get('min_views')
1045                 if min_views is not None and view_count < min_views:
1046                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1047                 max_views = self.params.get('max_views')
1048                 if max_views is not None and view_count > max_views:
1049                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1050             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1051                 return 'Skipping "%s" because it is age restricted' % video_title
1052
1053             if not incomplete:
1054                 match_filter = self.params.get('match_filter')
1055                 if match_filter is not None:
1056                     ret = match_filter(info_dict)
1057                     if ret is not None:
1058                         return ret
1059             return None
1060
1061         if self.in_download_archive(info_dict):
1062             reason = '%s has already been recorded in the archive' % video_title
1063             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1064         else:
1065             reason = check_filter()
1066             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1067         if reason is not None:
1068             if not silent:
1069                 self.to_screen('[download] ' + reason)
1070             if self.params.get(break_opt, False):
1071                 raise break_err()
1072         return reason
1073
1074     @staticmethod
1075     def add_extra_info(info_dict, extra_info):
1076         '''Set the keys from extra_info in info dict if they are missing'''
1077         for key, value in extra_info.items():
1078             info_dict.setdefault(key, value)
1079
1080     def extract_info(self, url, download=True, ie_key=None, extra_info={},
1081                      process=True, force_generic_extractor=False):
1082         """
1083         Return a list with a dictionary for each video extracted.
1084
1085         Arguments:
1086         url -- URL to extract
1087
1088         Keyword arguments:
1089         download -- whether to download videos during extraction
1090         ie_key -- extractor key hint
1091         extra_info -- dictionary containing the extra values to add to each result
1092         process -- whether to resolve all unresolved references (URLs, playlist items),
1093             must be True for download to work.
1094         force_generic_extractor -- force using the generic extractor
1095         """
1096
1097         if not ie_key and force_generic_extractor:
1098             ie_key = 'Generic'
1099
1100         if ie_key:
1101             ies = [self.get_info_extractor(ie_key)]
1102         else:
1103             ies = self._ies
1104
1105         for ie in ies:
1106             if not ie.suitable(url):
1107                 continue
1108
1109             ie_key = ie.ie_key()
1110             ie = self.get_info_extractor(ie_key)
1111             if not ie.working():
1112                 self.report_warning('The program functionality for this site has been marked as broken, '
1113                                     'and will probably not work.')
1114
1115             try:
1116                 temp_id = str_or_none(
1117                     ie.extract_id(url) if callable(getattr(ie, 'extract_id', None))
1118                     else ie._match_id(url))
1119             except (AssertionError, IndexError, AttributeError):
1120                 temp_id = None
1121             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1122                 self.to_screen("[%s] %s: has already been recorded in archive" % (
1123                                ie_key, temp_id))
1124                 break
1125             return self.__extract_info(url, ie, download, extra_info, process)
1126         else:
1127             self.report_error('no suitable InfoExtractor for URL %s' % url)
1128
1129     def __handle_extraction_exceptions(func):
1130         def wrapper(self, *args, **kwargs):
1131             try:
1132                 return func(self, *args, **kwargs)
1133             except GeoRestrictedError as e:
1134                 msg = e.msg
1135                 if e.countries:
1136                     msg += '\nThis video is available in %s.' % ', '.join(
1137                         map(ISO3166Utils.short2full, e.countries))
1138                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1139                 self.report_error(msg)
1140             except ExtractorError as e:  # An error we somewhat expected
1141                 self.report_error(compat_str(e), e.format_traceback())
1142             except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
1143                 raise
1144             except Exception as e:
1145                 if self.params.get('ignoreerrors', False):
1146                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
1147                 else:
1148                     raise
1149         return wrapper
1150
1151     @__handle_extraction_exceptions
1152     def __extract_info(self, url, ie, download, extra_info, process):
1153         ie_result = ie.extract(url)
1154         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1155             return
1156         if isinstance(ie_result, list):
1157             # Backwards compatibility: old IE result format
1158             ie_result = {
1159                 '_type': 'compat_list',
1160                 'entries': ie_result,
1161             }
1162         self.add_default_extra_info(ie_result, ie, url)
1163         if process:
1164             return self.process_ie_result(ie_result, download, extra_info)
1165         else:
1166             return ie_result
1167
1168     def add_default_extra_info(self, ie_result, ie, url):
1169         self.add_extra_info(ie_result, {
1170             'extractor': ie.IE_NAME,
1171             'webpage_url': url,
1172             'original_url': url,
1173             'webpage_url_basename': url_basename(url),
1174             'extractor_key': ie.ie_key(),
1175         })
1176
1177     def process_ie_result(self, ie_result, download=True, extra_info={}):
1178         """
1179         Take the result of the ie(may be modified) and resolve all unresolved
1180         references (URLs, playlist items).
1181
1182         It will also download the videos if 'download'.
1183         Returns the resolved ie_result.
1184         """
1185         result_type = ie_result.get('_type', 'video')
1186
1187         if result_type in ('url', 'url_transparent'):
1188             ie_result['url'] = sanitize_url(ie_result['url'])
1189             extract_flat = self.params.get('extract_flat', False)
1190             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1191                     or extract_flat is True):
1192                 info_copy = ie_result.copy()
1193                 self.add_extra_info(info_copy, extra_info)
1194                 self.add_default_extra_info(
1195                     info_copy, self.get_info_extractor(ie_result.get('ie_key')), ie_result['url'])
1196                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1197                 return ie_result
1198
1199         if result_type == 'video':
1200             self.add_extra_info(ie_result, extra_info)
1201             ie_result = self.process_video_result(ie_result, download=download)
1202             additional_urls = (ie_result or {}).get('additional_urls')
1203             if additional_urls:
1204                 # TODO: Improve MetadataFromFieldPP to allow setting a list
1205                 if isinstance(additional_urls, compat_str):
1206                     additional_urls = [additional_urls]
1207                 self.to_screen(
1208                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1209                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1210                 ie_result['additional_entries'] = [
1211                     self.extract_info(
1212                         url, download, extra_info,
1213                         force_generic_extractor=self.params.get('force_generic_extractor'))
1214                     for url in additional_urls
1215                 ]
1216             return ie_result
1217         elif result_type == 'url':
1218             # We have to add extra_info to the results because it may be
1219             # contained in a playlist
1220             return self.extract_info(
1221                 ie_result['url'], download,
1222                 ie_key=ie_result.get('ie_key'),
1223                 extra_info=extra_info)
1224         elif result_type == 'url_transparent':
1225             # Use the information from the embedding page
1226             info = self.extract_info(
1227                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1228                 extra_info=extra_info, download=False, process=False)
1229
1230             # extract_info may return None when ignoreerrors is enabled and
1231             # extraction failed with an error, don't crash and return early
1232             # in this case
1233             if not info:
1234                 return info
1235
1236             force_properties = dict(
1237                 (k, v) for k, v in ie_result.items() if v is not None)
1238             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1239                 if f in force_properties:
1240                     del force_properties[f]
1241             new_result = info.copy()
1242             new_result.update(force_properties)
1243
1244             # Extracted info may not be a video result (i.e.
1245             # info.get('_type', 'video') != video) but rather an url or
1246             # url_transparent. In such cases outer metadata (from ie_result)
1247             # should be propagated to inner one (info). For this to happen
1248             # _type of info should be overridden with url_transparent. This
1249             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1250             if new_result.get('_type') == 'url':
1251                 new_result['_type'] = 'url_transparent'
1252
1253             return self.process_ie_result(
1254                 new_result, download=download, extra_info=extra_info)
1255         elif result_type in ('playlist', 'multi_video'):
1256             # Protect from infinite recursion due to recursively nested playlists
1257             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1258             webpage_url = ie_result['webpage_url']
1259             if webpage_url in self._playlist_urls:
1260                 self.to_screen(
1261                     '[download] Skipping already downloaded playlist: %s'
1262                     % ie_result.get('title') or ie_result.get('id'))
1263                 return
1264
1265             self._playlist_level += 1
1266             self._playlist_urls.add(webpage_url)
1267             self._sanitize_thumbnails(ie_result)
1268             try:
1269                 return self.__process_playlist(ie_result, download)
1270             finally:
1271                 self._playlist_level -= 1
1272                 if not self._playlist_level:
1273                     self._playlist_urls.clear()
1274         elif result_type == 'compat_list':
1275             self.report_warning(
1276                 'Extractor %s returned a compat_list result. '
1277                 'It needs to be updated.' % ie_result.get('extractor'))
1278
1279             def _fixup(r):
1280                 self.add_extra_info(
1281                     r,
1282                     {
1283                         'extractor': ie_result['extractor'],
1284                         'webpage_url': ie_result['webpage_url'],
1285                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
1286                         'extractor_key': ie_result['extractor_key'],
1287                     }
1288                 )
1289                 return r
1290             ie_result['entries'] = [
1291                 self.process_ie_result(_fixup(r), download, extra_info)
1292                 for r in ie_result['entries']
1293             ]
1294             return ie_result
1295         else:
1296             raise Exception('Invalid result type: %s' % result_type)
1297
1298     def _ensure_dir_exists(self, path):
1299         return make_dir(path, self.report_error)
1300
1301     def __process_playlist(self, ie_result, download):
1302         # We process each entry in the playlist
1303         playlist = ie_result.get('title') or ie_result.get('id')
1304         self.to_screen('[download] Downloading playlist: %s' % playlist)
1305
1306         if 'entries' not in ie_result:
1307             raise EntryNotInPlaylist()
1308         incomplete_entries = bool(ie_result.get('requested_entries'))
1309         if incomplete_entries:
1310             def fill_missing_entries(entries, indexes):
1311                 ret = [None] * max(*indexes)
1312                 for i, entry in zip(indexes, entries):
1313                     ret[i - 1] = entry
1314                 return ret
1315             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1316
1317         playlist_results = []
1318
1319         playliststart = self.params.get('playliststart', 1)
1320         playlistend = self.params.get('playlistend')
1321         # For backwards compatibility, interpret -1 as whole list
1322         if playlistend == -1:
1323             playlistend = None
1324
1325         playlistitems_str = self.params.get('playlist_items')
1326         playlistitems = None
1327         if playlistitems_str is not None:
1328             def iter_playlistitems(format):
1329                 for string_segment in format.split(','):
1330                     if '-' in string_segment:
1331                         start, end = string_segment.split('-')
1332                         for item in range(int(start), int(end) + 1):
1333                             yield int(item)
1334                     else:
1335                         yield int(string_segment)
1336             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1337
1338         ie_entries = ie_result['entries']
1339         msg = (
1340             'Downloading %d videos' if not isinstance(ie_entries, list)
1341             else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
1342         if not isinstance(ie_entries, (list, PagedList)):
1343             ie_entries = LazyList(ie_entries)
1344
1345         entries = []
1346         for i in playlistitems or itertools.count(playliststart):
1347             if playlistitems is None and playlistend is not None and playlistend < i:
1348                 break
1349             entry = None
1350             try:
1351                 entry = ie_entries[i - 1]
1352                 if entry is None:
1353                     raise EntryNotInPlaylist()
1354             except (IndexError, EntryNotInPlaylist):
1355                 if incomplete_entries:
1356                     raise EntryNotInPlaylist()
1357                 elif not playlistitems:
1358                     break
1359             entries.append(entry)
1360             try:
1361                 if entry is not None:
1362                     self._match_entry(entry, incomplete=True, silent=True)
1363             except (ExistingVideoReached, RejectedVideoReached):
1364                 break
1365         ie_result['entries'] = entries
1366
1367         # Save playlist_index before re-ordering
1368         entries = [
1369             ((playlistitems[i - 1] if playlistitems else i), entry)
1370             for i, entry in enumerate(entries, 1)
1371             if entry is not None]
1372         n_entries = len(entries)
1373
1374         if not playlistitems and (playliststart or playlistend):
1375             playlistitems = list(range(playliststart, playliststart + n_entries))
1376         ie_result['requested_entries'] = playlistitems
1377
1378         if self.params.get('allow_playlist_files', True):
1379             ie_copy = {
1380                 'playlist': playlist,
1381                 'playlist_id': ie_result.get('id'),
1382                 'playlist_title': ie_result.get('title'),
1383                 'playlist_uploader': ie_result.get('uploader'),
1384                 'playlist_uploader_id': ie_result.get('uploader_id'),
1385                 'playlist_index': 0,
1386             }
1387             ie_copy.update(dict(ie_result))
1388
1389             if self.params.get('writeinfojson', False):
1390                 infofn = self.prepare_filename(ie_copy, 'pl_infojson')
1391                 if not self._ensure_dir_exists(encodeFilename(infofn)):
1392                     return
1393                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
1394                     self.to_screen('[info] Playlist metadata is already present')
1395                 else:
1396                     self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
1397                     try:
1398                         write_json_file(self.filter_requested_info(ie_result, self.params.get('clean_infojson', True)), infofn)
1399                     except (OSError, IOError):
1400                         self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
1401
1402             # TODO: This should be passed to ThumbnailsConvertor if necessary
1403             self._write_thumbnails(ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1404
1405             if self.params.get('writedescription', False):
1406                 descfn = self.prepare_filename(ie_copy, 'pl_description')
1407                 if not self._ensure_dir_exists(encodeFilename(descfn)):
1408                     return
1409                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
1410                     self.to_screen('[info] Playlist description is already present')
1411                 elif ie_result.get('description') is None:
1412                     self.report_warning('There\'s no playlist description to write.')
1413                 else:
1414                     try:
1415                         self.to_screen('[info] Writing playlist description to: ' + descfn)
1416                         with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1417                             descfile.write(ie_result['description'])
1418                     except (OSError, IOError):
1419                         self.report_error('Cannot write playlist description file ' + descfn)
1420                         return
1421
1422         if self.params.get('playlistreverse', False):
1423             entries = entries[::-1]
1424         if self.params.get('playlistrandom', False):
1425             random.shuffle(entries)
1426
1427         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1428
1429         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1430         failures = 0
1431         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1432         for i, entry_tuple in enumerate(entries, 1):
1433             playlist_index, entry = entry_tuple
1434             if 'playlist_index' in self.params.get('compat_options', []):
1435                 playlist_index = playlistitems[i - 1] if playlistitems else i
1436             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1437             # This __x_forwarded_for_ip thing is a bit ugly but requires
1438             # minimal changes
1439             if x_forwarded_for:
1440                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1441             extra = {
1442                 'n_entries': n_entries,
1443                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1444                 'playlist_index': playlist_index,
1445                 'playlist_autonumber': i,
1446                 'playlist': playlist,
1447                 'playlist_id': ie_result.get('id'),
1448                 'playlist_title': ie_result.get('title'),
1449                 'playlist_uploader': ie_result.get('uploader'),
1450                 'playlist_uploader_id': ie_result.get('uploader_id'),
1451                 'extractor': ie_result['extractor'],
1452                 'webpage_url': ie_result['webpage_url'],
1453                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1454                 'extractor_key': ie_result['extractor_key'],
1455             }
1456
1457             if self._match_entry(entry, incomplete=True) is not None:
1458                 continue
1459
1460             entry_result = self.__process_iterable_entry(entry, download, extra)
1461             if not entry_result:
1462                 failures += 1
1463             if failures >= max_failures:
1464                 self.report_error(
1465                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1466                 break
1467             # TODO: skip failed (empty) entries?
1468             playlist_results.append(entry_result)
1469         ie_result['entries'] = playlist_results
1470         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1471         return ie_result
1472
1473     @__handle_extraction_exceptions
1474     def __process_iterable_entry(self, entry, download, extra_info):
1475         return self.process_ie_result(
1476             entry, download=download, extra_info=extra_info)
1477
1478     def _build_format_filter(self, filter_spec):
1479         " Returns a function to filter the formats according to the filter_spec "
1480
1481         OPERATORS = {
1482             '<': operator.lt,
1483             '<=': operator.le,
1484             '>': operator.gt,
1485             '>=': operator.ge,
1486             '=': operator.eq,
1487             '!=': operator.ne,
1488         }
1489         operator_rex = re.compile(r'''(?x)\s*
1490             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
1491             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1492             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
1493             $
1494             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1495         m = operator_rex.search(filter_spec)
1496         if m:
1497             try:
1498                 comparison_value = int(m.group('value'))
1499             except ValueError:
1500                 comparison_value = parse_filesize(m.group('value'))
1501                 if comparison_value is None:
1502                     comparison_value = parse_filesize(m.group('value') + 'B')
1503                 if comparison_value is None:
1504                     raise ValueError(
1505                         'Invalid value %r in format specification %r' % (
1506                             m.group('value'), filter_spec))
1507             op = OPERATORS[m.group('op')]
1508
1509         if not m:
1510             STR_OPERATORS = {
1511                 '=': operator.eq,
1512                 '^=': lambda attr, value: attr.startswith(value),
1513                 '$=': lambda attr, value: attr.endswith(value),
1514                 '*=': lambda attr, value: value in attr,
1515             }
1516             str_operator_rex = re.compile(r'''(?x)
1517                 \s*(?P<key>[a-zA-Z0-9._-]+)
1518                 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
1519                 \s*(?P<value>[a-zA-Z0-9._-]+)
1520                 \s*$
1521                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1522             m = str_operator_rex.search(filter_spec)
1523             if m:
1524                 comparison_value = m.group('value')
1525                 str_op = STR_OPERATORS[m.group('op')]
1526                 if m.group('negation'):
1527                     op = lambda attr, value: not str_op(attr, value)
1528                 else:
1529                     op = str_op
1530
1531         if not m:
1532             raise ValueError('Invalid filter specification %r' % filter_spec)
1533
1534         def _filter(f):
1535             actual_value = f.get(m.group('key'))
1536             if actual_value is None:
1537                 return m.group('none_inclusive')
1538             return op(actual_value, comparison_value)
1539         return _filter
1540
1541     def _default_format_spec(self, info_dict, download=True):
1542
1543         def can_merge():
1544             merger = FFmpegMergerPP(self)
1545             return merger.available and merger.can_merge()
1546
1547         prefer_best = (
1548             not self.params.get('simulate', False)
1549             and download
1550             and (
1551                 not can_merge()
1552                 or info_dict.get('is_live', False)
1553                 or self.outtmpl_dict['default'] == '-'))
1554         compat = (
1555             prefer_best
1556             or self.params.get('allow_multiple_audio_streams', False)
1557             or 'format-spec' in self.params.get('compat_opts', []))
1558
1559         return (
1560             'best/bestvideo+bestaudio' if prefer_best
1561             else 'bestvideo*+bestaudio/best' if not compat
1562             else 'bestvideo+bestaudio/best')
1563
1564     def build_format_selector(self, format_spec):
1565         def syntax_error(note, start):
1566             message = (
1567                 'Invalid format specification: '
1568                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1569             return SyntaxError(message)
1570
1571         PICKFIRST = 'PICKFIRST'
1572         MERGE = 'MERGE'
1573         SINGLE = 'SINGLE'
1574         GROUP = 'GROUP'
1575         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1576
1577         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1578                                   'video': self.params.get('allow_multiple_video_streams', False)}
1579
1580         check_formats = self.params.get('check_formats')
1581
1582         def _parse_filter(tokens):
1583             filter_parts = []
1584             for type, string, start, _, _ in tokens:
1585                 if type == tokenize.OP and string == ']':
1586                     return ''.join(filter_parts)
1587                 else:
1588                     filter_parts.append(string)
1589
1590         def _remove_unused_ops(tokens):
1591             # Remove operators that we don't use and join them with the surrounding strings
1592             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1593             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1594             last_string, last_start, last_end, last_line = None, None, None, None
1595             for type, string, start, end, line in tokens:
1596                 if type == tokenize.OP and string == '[':
1597                     if last_string:
1598                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1599                         last_string = None
1600                     yield type, string, start, end, line
1601                     # everything inside brackets will be handled by _parse_filter
1602                     for type, string, start, end, line in tokens:
1603                         yield type, string, start, end, line
1604                         if type == tokenize.OP and string == ']':
1605                             break
1606                 elif type == tokenize.OP and string in ALLOWED_OPS:
1607                     if last_string:
1608                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1609                         last_string = None
1610                     yield type, string, start, end, line
1611                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1612                     if not last_string:
1613                         last_string = string
1614                         last_start = start
1615                         last_end = end
1616                     else:
1617                         last_string += string
1618             if last_string:
1619                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1620
1621         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1622             selectors = []
1623             current_selector = None
1624             for type, string, start, _, _ in tokens:
1625                 # ENCODING is only defined in python 3.x
1626                 if type == getattr(tokenize, 'ENCODING', None):
1627                     continue
1628                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1629                     current_selector = FormatSelector(SINGLE, string, [])
1630                 elif type == tokenize.OP:
1631                     if string == ')':
1632                         if not inside_group:
1633                             # ')' will be handled by the parentheses group
1634                             tokens.restore_last_token()
1635                         break
1636                     elif inside_merge and string in ['/', ',']:
1637                         tokens.restore_last_token()
1638                         break
1639                     elif inside_choice and string == ',':
1640                         tokens.restore_last_token()
1641                         break
1642                     elif string == ',':
1643                         if not current_selector:
1644                             raise syntax_error('"," must follow a format selector', start)
1645                         selectors.append(current_selector)
1646                         current_selector = None
1647                     elif string == '/':
1648                         if not current_selector:
1649                             raise syntax_error('"/" must follow a format selector', start)
1650                         first_choice = current_selector
1651                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1652                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1653                     elif string == '[':
1654                         if not current_selector:
1655                             current_selector = FormatSelector(SINGLE, 'best', [])
1656                         format_filter = _parse_filter(tokens)
1657                         current_selector.filters.append(format_filter)
1658                     elif string == '(':
1659                         if current_selector:
1660                             raise syntax_error('Unexpected "("', start)
1661                         group = _parse_format_selection(tokens, inside_group=True)
1662                         current_selector = FormatSelector(GROUP, group, [])
1663                     elif string == '+':
1664                         if not current_selector:
1665                             raise syntax_error('Unexpected "+"', start)
1666                         selector_1 = current_selector
1667                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1668                         if not selector_2:
1669                             raise syntax_error('Expected a selector', start)
1670                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1671                     else:
1672                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1673                 elif type == tokenize.ENDMARKER:
1674                     break
1675             if current_selector:
1676                 selectors.append(current_selector)
1677             return selectors
1678
1679         def _merge(formats_pair):
1680             format_1, format_2 = formats_pair
1681
1682             formats_info = []
1683             formats_info.extend(format_1.get('requested_formats', (format_1,)))
1684             formats_info.extend(format_2.get('requested_formats', (format_2,)))
1685
1686             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1687                 get_no_more = {"video": False, "audio": False}
1688                 for (i, fmt_info) in enumerate(formats_info):
1689                     for aud_vid in ["audio", "video"]:
1690                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1691                             if get_no_more[aud_vid]:
1692                                 formats_info.pop(i)
1693                             get_no_more[aud_vid] = True
1694
1695             if len(formats_info) == 1:
1696                 return formats_info[0]
1697
1698             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1699             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1700
1701             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1702             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1703
1704             output_ext = self.params.get('merge_output_format')
1705             if not output_ext:
1706                 if the_only_video:
1707                     output_ext = the_only_video['ext']
1708                 elif the_only_audio and not video_fmts:
1709                     output_ext = the_only_audio['ext']
1710                 else:
1711                     output_ext = 'mkv'
1712
1713             new_dict = {
1714                 'requested_formats': formats_info,
1715                 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1716                 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1717                 'ext': output_ext,
1718             }
1719
1720             if the_only_video:
1721                 new_dict.update({
1722                     'width': the_only_video.get('width'),
1723                     'height': the_only_video.get('height'),
1724                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1725                     'fps': the_only_video.get('fps'),
1726                     'vcodec': the_only_video.get('vcodec'),
1727                     'vbr': the_only_video.get('vbr'),
1728                     'stretched_ratio': the_only_video.get('stretched_ratio'),
1729                 })
1730
1731             if the_only_audio:
1732                 new_dict.update({
1733                     'acodec': the_only_audio.get('acodec'),
1734                     'abr': the_only_audio.get('abr'),
1735                 })
1736
1737             return new_dict
1738
1739         def _check_formats(formats):
1740             for f in formats:
1741                 self.to_screen('[info] Testing format %s' % f['format_id'])
1742                 paths = self.params.get('paths', {})
1743                 temp_file = os.path.join(
1744                     expand_path(paths.get('home', '').strip()),
1745                     expand_path(paths.get('temp', '').strip()),
1746                     'ytdl.%s.f%s.check-format' % (random_uuidv4(), f['format_id']))
1747                 try:
1748                     dl, _ = self.dl(temp_file, f, test=True)
1749                 except (ExtractorError, IOError, OSError, ValueError) + network_exceptions:
1750                     dl = False
1751                 finally:
1752                     if os.path.exists(temp_file):
1753                         os.remove(temp_file)
1754                 if dl:
1755                     yield f
1756                 else:
1757                     self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1758
1759         def _build_selector_function(selector):
1760             if isinstance(selector, list):  # ,
1761                 fs = [_build_selector_function(s) for s in selector]
1762
1763                 def selector_function(ctx):
1764                     for f in fs:
1765                         for format in f(ctx):
1766                             yield format
1767                 return selector_function
1768
1769             elif selector.type == GROUP:  # ()
1770                 selector_function = _build_selector_function(selector.selector)
1771
1772             elif selector.type == PICKFIRST:  # /
1773                 fs = [_build_selector_function(s) for s in selector.selector]
1774
1775                 def selector_function(ctx):
1776                     for f in fs:
1777                         picked_formats = list(f(ctx))
1778                         if picked_formats:
1779                             return picked_formats
1780                     return []
1781
1782             elif selector.type == SINGLE:  # atom
1783                 format_spec = selector.selector or 'best'
1784
1785                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
1786                 if format_spec == 'all':
1787                     def selector_function(ctx):
1788                         formats = list(ctx['formats'])
1789                         if check_formats:
1790                             formats = _check_formats(formats)
1791                         for f in formats:
1792                             yield f
1793                 elif format_spec == 'mergeall':
1794                     def selector_function(ctx):
1795                         formats = list(_check_formats(ctx['formats']))
1796                         if not formats:
1797                             return
1798                         merged_format = formats[-1]
1799                         for f in formats[-2::-1]:
1800                             merged_format = _merge((merged_format, f))
1801                         yield merged_format
1802
1803                 else:
1804                     format_fallback, format_reverse, format_idx = False, True, 1
1805                     mobj = re.match(
1806                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
1807                         format_spec)
1808                     if mobj is not None:
1809                         format_idx = int_or_none(mobj.group('n'), default=1)
1810                         format_reverse = mobj.group('bw')[0] == 'b'
1811                         format_type = (mobj.group('type') or [None])[0]
1812                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
1813                         format_modified = mobj.group('mod') is not None
1814
1815                         format_fallback = not format_type and not format_modified  # for b, w
1816                         filter_f = (
1817                             (lambda f: f.get('%scodec' % format_type) != 'none')
1818                             if format_type and format_modified  # bv*, ba*, wv*, wa*
1819                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
1820                             if format_type  # bv, ba, wv, wa
1821                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1822                             if not format_modified  # b, w
1823                             else None)  # b*, w*
1824                     else:
1825                         filter_f = ((lambda f: f.get('ext') == format_spec)
1826                                     if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']  # extension
1827                                     else (lambda f: f.get('format_id') == format_spec))  # id
1828
1829                     def selector_function(ctx):
1830                         formats = list(ctx['formats'])
1831                         if not formats:
1832                             return
1833                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
1834                         if format_fallback and ctx['incomplete_formats'] and not matches:
1835                             # for extractors with incomplete formats (audio only (soundcloud)
1836                             # or video only (imgur)) best/worst will fallback to
1837                             # best/worst {video,audio}-only format
1838                             matches = formats
1839                         if format_reverse:
1840                             matches = matches[::-1]
1841                         if check_formats:
1842                             matches = list(itertools.islice(_check_formats(matches), format_idx))
1843                         n = len(matches)
1844                         if -n <= format_idx - 1 < n:
1845                             yield matches[format_idx - 1]
1846
1847             elif selector.type == MERGE:        # +
1848                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1849
1850                 def selector_function(ctx):
1851                     for pair in itertools.product(
1852                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1853                         yield _merge(pair)
1854
1855             filters = [self._build_format_filter(f) for f in selector.filters]
1856
1857             def final_selector(ctx):
1858                 ctx_copy = copy.deepcopy(ctx)
1859                 for _filter in filters:
1860                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1861                 return selector_function(ctx_copy)
1862             return final_selector
1863
1864         stream = io.BytesIO(format_spec.encode('utf-8'))
1865         try:
1866             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1867         except tokenize.TokenError:
1868             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1869
1870         class TokenIterator(object):
1871             def __init__(self, tokens):
1872                 self.tokens = tokens
1873                 self.counter = 0
1874
1875             def __iter__(self):
1876                 return self
1877
1878             def __next__(self):
1879                 if self.counter >= len(self.tokens):
1880                     raise StopIteration()
1881                 value = self.tokens[self.counter]
1882                 self.counter += 1
1883                 return value
1884
1885             next = __next__
1886
1887             def restore_last_token(self):
1888                 self.counter -= 1
1889
1890         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1891         return _build_selector_function(parsed_selector)
1892
1893     def _calc_headers(self, info_dict):
1894         res = std_headers.copy()
1895
1896         add_headers = info_dict.get('http_headers')
1897         if add_headers:
1898             res.update(add_headers)
1899
1900         cookies = self._calc_cookies(info_dict)
1901         if cookies:
1902             res['Cookie'] = cookies
1903
1904         if 'X-Forwarded-For' not in res:
1905             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1906             if x_forwarded_for_ip:
1907                 res['X-Forwarded-For'] = x_forwarded_for_ip
1908
1909         return res
1910
1911     def _calc_cookies(self, info_dict):
1912         pr = sanitized_Request(info_dict['url'])
1913         self.cookiejar.add_cookie_header(pr)
1914         return pr.get_header('Cookie')
1915
1916     @staticmethod
1917     def _sanitize_thumbnails(info_dict):
1918         thumbnails = info_dict.get('thumbnails')
1919         if thumbnails is None:
1920             thumbnail = info_dict.get('thumbnail')
1921             if thumbnail:
1922                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1923         if thumbnails:
1924             thumbnails.sort(key=lambda t: (
1925                 t.get('preference') if t.get('preference') is not None else -1,
1926                 t.get('width') if t.get('width') is not None else -1,
1927                 t.get('height') if t.get('height') is not None else -1,
1928                 t.get('id') if t.get('id') is not None else '',
1929                 t.get('url')))
1930             for i, t in enumerate(thumbnails):
1931                 t['url'] = sanitize_url(t['url'])
1932                 if t.get('width') and t.get('height'):
1933                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1934                 if t.get('id') is None:
1935                     t['id'] = '%d' % i
1936
1937     def process_video_result(self, info_dict, download=True):
1938         assert info_dict.get('_type', 'video') == 'video'
1939
1940         if 'id' not in info_dict:
1941             raise ExtractorError('Missing "id" field in extractor result')
1942         if 'title' not in info_dict:
1943             raise ExtractorError('Missing "title" field in extractor result')
1944
1945         def report_force_conversion(field, field_not, conversion):
1946             self.report_warning(
1947                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1948                 % (field, field_not, conversion))
1949
1950         def sanitize_string_field(info, string_field):
1951             field = info.get(string_field)
1952             if field is None or isinstance(field, compat_str):
1953                 return
1954             report_force_conversion(string_field, 'a string', 'string')
1955             info[string_field] = compat_str(field)
1956
1957         def sanitize_numeric_fields(info):
1958             for numeric_field in self._NUMERIC_FIELDS:
1959                 field = info.get(numeric_field)
1960                 if field is None or isinstance(field, compat_numeric_types):
1961                     continue
1962                 report_force_conversion(numeric_field, 'numeric', 'int')
1963                 info[numeric_field] = int_or_none(field)
1964
1965         sanitize_string_field(info_dict, 'id')
1966         sanitize_numeric_fields(info_dict)
1967
1968         if 'playlist' not in info_dict:
1969             # It isn't part of a playlist
1970             info_dict['playlist'] = None
1971             info_dict['playlist_index'] = None
1972
1973         self._sanitize_thumbnails(info_dict)
1974
1975         if self.params.get('list_thumbnails'):
1976             self.list_thumbnails(info_dict)
1977             return
1978
1979         thumbnail = info_dict.get('thumbnail')
1980         thumbnails = info_dict.get('thumbnails')
1981         if thumbnail:
1982             info_dict['thumbnail'] = sanitize_url(thumbnail)
1983         elif thumbnails:
1984             info_dict['thumbnail'] = thumbnails[-1]['url']
1985
1986         if 'display_id' not in info_dict and 'id' in info_dict:
1987             info_dict['display_id'] = info_dict['id']
1988
1989         for ts_key, date_key in (
1990                 ('timestamp', 'upload_date'),
1991                 ('release_timestamp', 'release_date'),
1992         ):
1993             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
1994                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1995                 # see http://bugs.python.org/issue1646728)
1996                 try:
1997                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
1998                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
1999                 except (ValueError, OverflowError, OSError):
2000                     pass
2001
2002         # Auto generate title fields corresponding to the *_number fields when missing
2003         # in order to always have clean titles. This is very common for TV series.
2004         for field in ('chapter', 'season', 'episode'):
2005             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2006                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2007
2008         for cc_kind in ('subtitles', 'automatic_captions'):
2009             cc = info_dict.get(cc_kind)
2010             if cc:
2011                 for _, subtitle in cc.items():
2012                     for subtitle_format in subtitle:
2013                         if subtitle_format.get('url'):
2014                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2015                         if subtitle_format.get('ext') is None:
2016                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2017
2018         automatic_captions = info_dict.get('automatic_captions')
2019         subtitles = info_dict.get('subtitles')
2020
2021         if self.params.get('listsubtitles', False):
2022             if 'automatic_captions' in info_dict:
2023                 self.list_subtitles(
2024                     info_dict['id'], automatic_captions, 'automatic captions')
2025             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2026             return
2027
2028         info_dict['requested_subtitles'] = self.process_subtitles(
2029             info_dict['id'], subtitles, automatic_captions)
2030
2031         # We now pick which formats have to be downloaded
2032         if info_dict.get('formats') is None:
2033             # There's only one format available
2034             formats = [info_dict]
2035         else:
2036             formats = info_dict['formats']
2037
2038         if not formats:
2039             if not self.params.get('ignore_no_formats_error'):
2040                 raise ExtractorError('No video formats found!')
2041             else:
2042                 self.report_warning('No video formats found!')
2043
2044         def is_wellformed(f):
2045             url = f.get('url')
2046             if not url:
2047                 self.report_warning(
2048                     '"url" field is missing or empty - skipping format, '
2049                     'there is an error in extractor')
2050                 return False
2051             if isinstance(url, bytes):
2052                 sanitize_string_field(f, 'url')
2053             return True
2054
2055         # Filter out malformed formats for better extraction robustness
2056         formats = list(filter(is_wellformed, formats))
2057
2058         formats_dict = {}
2059
2060         # We check that all the formats have the format and format_id fields
2061         for i, format in enumerate(formats):
2062             sanitize_string_field(format, 'format_id')
2063             sanitize_numeric_fields(format)
2064             format['url'] = sanitize_url(format['url'])
2065             if not format.get('format_id'):
2066                 format['format_id'] = compat_str(i)
2067             else:
2068                 # Sanitize format_id from characters used in format selector expression
2069                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2070             format_id = format['format_id']
2071             if format_id not in formats_dict:
2072                 formats_dict[format_id] = []
2073             formats_dict[format_id].append(format)
2074
2075         # Make sure all formats have unique format_id
2076         for format_id, ambiguous_formats in formats_dict.items():
2077             if len(ambiguous_formats) > 1:
2078                 for i, format in enumerate(ambiguous_formats):
2079                     format['format_id'] = '%s-%d' % (format_id, i)
2080
2081         for i, format in enumerate(formats):
2082             if format.get('format') is None:
2083                 format['format'] = '{id} - {res}{note}'.format(
2084                     id=format['format_id'],
2085                     res=self.format_resolution(format),
2086                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
2087                 )
2088             # Automatically determine file extension if missing
2089             if format.get('ext') is None:
2090                 format['ext'] = determine_ext(format['url']).lower()
2091             # Automatically determine protocol if missing (useful for format
2092             # selection purposes)
2093             if format.get('protocol') is None:
2094                 format['protocol'] = determine_protocol(format)
2095             # Add HTTP headers, so that external programs can use them from the
2096             # json output
2097             full_format_info = info_dict.copy()
2098             full_format_info.update(format)
2099             format['http_headers'] = self._calc_headers(full_format_info)
2100         # Remove private housekeeping stuff
2101         if '__x_forwarded_for_ip' in info_dict:
2102             del info_dict['__x_forwarded_for_ip']
2103
2104         # TODO Central sorting goes here
2105
2106         if formats and formats[0] is not info_dict:
2107             # only set the 'formats' fields if the original info_dict list them
2108             # otherwise we end up with a circular reference, the first (and unique)
2109             # element in the 'formats' field in info_dict is info_dict itself,
2110             # which can't be exported to json
2111             info_dict['formats'] = formats
2112
2113         info_dict, _ = self.pre_process(info_dict)
2114
2115         if self.params.get('listformats'):
2116             if not info_dict.get('formats'):
2117                 raise ExtractorError('No video formats found', expected=True)
2118             self.list_formats(info_dict)
2119             return
2120
2121         req_format = self.params.get('format')
2122         if req_format is None:
2123             req_format = self._default_format_spec(info_dict, download=download)
2124             self.write_debug('Default format spec: %s' % req_format)
2125
2126         format_selector = self.build_format_selector(req_format)
2127
2128         # While in format selection we may need to have an access to the original
2129         # format set in order to calculate some metrics or do some processing.
2130         # For now we need to be able to guess whether original formats provided
2131         # by extractor are incomplete or not (i.e. whether extractor provides only
2132         # video-only or audio-only formats) for proper formats selection for
2133         # extractors with such incomplete formats (see
2134         # https://github.com/ytdl-org/youtube-dl/pull/5556).
2135         # Since formats may be filtered during format selection and may not match
2136         # the original formats the results may be incorrect. Thus original formats
2137         # or pre-calculated metrics should be passed to format selection routines
2138         # as well.
2139         # We will pass a context object containing all necessary additional data
2140         # instead of just formats.
2141         # This fixes incorrect format selection issue (see
2142         # https://github.com/ytdl-org/youtube-dl/issues/10083).
2143         incomplete_formats = (
2144             # All formats are video-only or
2145             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2146             # all formats are audio-only
2147             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2148
2149         ctx = {
2150             'formats': formats,
2151             'incomplete_formats': incomplete_formats,
2152         }
2153
2154         formats_to_download = list(format_selector(ctx))
2155         if not formats_to_download:
2156             if not self.params.get('ignore_no_formats_error'):
2157                 raise ExtractorError('Requested format is not available', expected=True)
2158             else:
2159                 self.report_warning('Requested format is not available')
2160         elif download:
2161             self.to_screen(
2162                 '[info] %s: Downloading %d format(s): %s' % (
2163                     info_dict['id'], len(formats_to_download),
2164                     ", ".join([f['format_id'] for f in formats_to_download])))
2165             for fmt in formats_to_download:
2166                 new_info = dict(info_dict)
2167                 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2168                 new_info['__original_infodict'] = info_dict
2169                 new_info.update(fmt)
2170                 self.process_info(new_info)
2171         # We update the info dict with the best quality format (backwards compatibility)
2172         if formats_to_download:
2173             info_dict.update(formats_to_download[-1])
2174         return info_dict
2175
2176     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2177         """Select the requested subtitles and their format"""
2178         available_subs = {}
2179         if normal_subtitles and self.params.get('writesubtitles'):
2180             available_subs.update(normal_subtitles)
2181         if automatic_captions and self.params.get('writeautomaticsub'):
2182             for lang, cap_info in automatic_captions.items():
2183                 if lang not in available_subs:
2184                     available_subs[lang] = cap_info
2185
2186         if (not self.params.get('writesubtitles') and not
2187                 self.params.get('writeautomaticsub') or not
2188                 available_subs):
2189             return None
2190
2191         all_sub_langs = available_subs.keys()
2192         if self.params.get('allsubtitles', False):
2193             requested_langs = all_sub_langs
2194         elif self.params.get('subtitleslangs', False):
2195             requested_langs = set()
2196             for lang in self.params.get('subtitleslangs'):
2197                 if lang == 'all':
2198                     requested_langs.update(all_sub_langs)
2199                     continue
2200                 discard = lang[0] == '-'
2201                 if discard:
2202                     lang = lang[1:]
2203                 current_langs = filter(re.compile(lang + '$').match, all_sub_langs)
2204                 if discard:
2205                     for lang in current_langs:
2206                         requested_langs.discard(lang)
2207                 else:
2208                     requested_langs.update(current_langs)
2209         elif 'en' in available_subs:
2210             requested_langs = ['en']
2211         else:
2212             requested_langs = [list(all_sub_langs)[0]]
2213         self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2214
2215         formats_query = self.params.get('subtitlesformat', 'best')
2216         formats_preference = formats_query.split('/') if formats_query else []
2217         subs = {}
2218         for lang in requested_langs:
2219             formats = available_subs.get(lang)
2220             if formats is None:
2221                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2222                 continue
2223             for ext in formats_preference:
2224                 if ext == 'best':
2225                     f = formats[-1]
2226                     break
2227                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2228                 if matches:
2229                     f = matches[-1]
2230                     break
2231             else:
2232                 f = formats[-1]
2233                 self.report_warning(
2234                     'No subtitle format found matching "%s" for language %s, '
2235                     'using %s' % (formats_query, lang, f['ext']))
2236             subs[lang] = f
2237         return subs
2238
2239     def __forced_printings(self, info_dict, filename, incomplete):
2240         def print_mandatory(field, actual_field=None):
2241             if actual_field is None:
2242                 actual_field = field
2243             if (self.params.get('force%s' % field, False)
2244                     and (not incomplete or info_dict.get(actual_field) is not None)):
2245                 self.to_stdout(info_dict[actual_field])
2246
2247         def print_optional(field):
2248             if (self.params.get('force%s' % field, False)
2249                     and info_dict.get(field) is not None):
2250                 self.to_stdout(info_dict[field])
2251
2252         info_dict = info_dict.copy()
2253         if filename is not None:
2254             info_dict['filename'] = filename
2255         if info_dict.get('requested_formats') is not None:
2256             # For RTMP URLs, also include the playpath
2257             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2258         elif 'url' in info_dict:
2259             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2260
2261         for tmpl in self.params.get('forceprint', []):
2262             if re.match(r'\w+$', tmpl):
2263                 tmpl = '%({})s'.format(tmpl)
2264             tmpl, info_copy = self.prepare_outtmpl(tmpl, info_dict)
2265             self.to_stdout(tmpl % info_copy)
2266
2267         print_mandatory('title')
2268         print_mandatory('id')
2269         print_mandatory('url', 'urls')
2270         print_optional('thumbnail')
2271         print_optional('description')
2272         print_optional('filename')
2273         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
2274             self.to_stdout(formatSeconds(info_dict['duration']))
2275         print_mandatory('format')
2276
2277         if self.params.get('forcejson', False):
2278             self.post_extract(info_dict)
2279             self.to_stdout(json.dumps(info_dict, default=repr))
2280
2281     def dl(self, name, info, subtitle=False, test=False):
2282
2283         if test:
2284             verbose = self.params.get('verbose')
2285             params = {
2286                 'test': True,
2287                 'quiet': not verbose,
2288                 'verbose': verbose,
2289                 'noprogress': not verbose,
2290                 'nopart': True,
2291                 'skip_unavailable_fragments': False,
2292                 'keep_fragments': False,
2293                 'overwrites': True,
2294                 '_no_ytdl_file': True,
2295             }
2296         else:
2297             params = self.params
2298         fd = get_suitable_downloader(info, params)(self, params)
2299         if not test:
2300             for ph in self._progress_hooks:
2301                 fd.add_progress_hook(ph)
2302             urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2303             self.write_debug('Invoking downloader on "%s"' % urls)
2304         new_info = dict(info)
2305         if new_info.get('http_headers') is None:
2306             new_info['http_headers'] = self._calc_headers(new_info)
2307         return fd.download(name, new_info, subtitle)
2308
2309     def process_info(self, info_dict):
2310         """Process a single resolved IE result."""
2311
2312         assert info_dict.get('_type', 'video') == 'video'
2313
2314         info_dict.setdefault('__postprocessors', [])
2315
2316         max_downloads = self.params.get('max_downloads')
2317         if max_downloads is not None:
2318             if self._num_downloads >= int(max_downloads):
2319                 raise MaxDownloadsReached()
2320
2321         # TODO: backward compatibility, to be removed
2322         info_dict['fulltitle'] = info_dict['title']
2323
2324         if 'format' not in info_dict:
2325             info_dict['format'] = info_dict['ext']
2326
2327         if self._match_entry(info_dict) is not None:
2328             return
2329
2330         self.post_extract(info_dict)
2331         self._num_downloads += 1
2332
2333         # info_dict['_filename'] needs to be set for backward compatibility
2334         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2335         temp_filename = self.prepare_filename(info_dict, 'temp')
2336         files_to_move = {}
2337
2338         # Forced printings
2339         self.__forced_printings(info_dict, full_filename, incomplete=False)
2340
2341         if self.params.get('simulate', False):
2342             if self.params.get('force_write_download_archive', False):
2343                 self.record_download_archive(info_dict)
2344
2345             # Do nothing else if in simulate mode
2346             return
2347
2348         if full_filename is None:
2349             return
2350
2351         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2352             return
2353         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2354             return
2355
2356         if self.params.get('writedescription', False):
2357             descfn = self.prepare_filename(info_dict, 'description')
2358             if not self._ensure_dir_exists(encodeFilename(descfn)):
2359                 return
2360             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
2361                 self.to_screen('[info] Video description is already present')
2362             elif info_dict.get('description') is None:
2363                 self.report_warning('There\'s no description to write.')
2364             else:
2365                 try:
2366                     self.to_screen('[info] Writing video description to: ' + descfn)
2367                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
2368                         descfile.write(info_dict['description'])
2369                 except (OSError, IOError):
2370                     self.report_error('Cannot write description file ' + descfn)
2371                     return
2372
2373         if self.params.get('writeannotations', False):
2374             annofn = self.prepare_filename(info_dict, 'annotation')
2375             if not self._ensure_dir_exists(encodeFilename(annofn)):
2376                 return
2377             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2378                 self.to_screen('[info] Video annotations are already present')
2379             elif not info_dict.get('annotations'):
2380                 self.report_warning('There are no annotations to write.')
2381             else:
2382                 try:
2383                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2384                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2385                         annofile.write(info_dict['annotations'])
2386                 except (KeyError, TypeError):
2387                     self.report_warning('There are no annotations to write.')
2388                 except (OSError, IOError):
2389                     self.report_error('Cannot write annotations file: ' + annofn)
2390                     return
2391
2392         subtitles_are_requested = any([self.params.get('writesubtitles', False),
2393                                        self.params.get('writeautomaticsub')])
2394
2395         if subtitles_are_requested and info_dict.get('requested_subtitles'):
2396             # subtitles download errors are already managed as troubles in relevant IE
2397             # that way it will silently go on when used with unsupporting IE
2398             subtitles = info_dict['requested_subtitles']
2399             # ie = self.get_info_extractor(info_dict['extractor_key'])
2400             for sub_lang, sub_info in subtitles.items():
2401                 sub_format = sub_info['ext']
2402                 sub_filename = subtitles_filename(temp_filename, sub_lang, sub_format, info_dict.get('ext'))
2403                 sub_filename_final = subtitles_filename(
2404                     self.prepare_filename(info_dict, 'subtitle'), sub_lang, sub_format, info_dict.get('ext'))
2405                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
2406                     self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
2407                     sub_info['filepath'] = sub_filename
2408                     files_to_move[sub_filename] = sub_filename_final
2409                 else:
2410                     self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
2411                     if sub_info.get('data') is not None:
2412                         try:
2413                             # Use newline='' to prevent conversion of newline characters
2414                             # See https://github.com/ytdl-org/youtube-dl/issues/10268
2415                             with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
2416                                 subfile.write(sub_info['data'])
2417                             sub_info['filepath'] = sub_filename
2418                             files_to_move[sub_filename] = sub_filename_final
2419                         except (OSError, IOError):
2420                             self.report_error('Cannot write subtitles file ' + sub_filename)
2421                             return
2422                     else:
2423                         try:
2424                             self.dl(sub_filename, sub_info.copy(), subtitle=True)
2425                             sub_info['filepath'] = sub_filename
2426                             files_to_move[sub_filename] = sub_filename_final
2427                         except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
2428                             self.report_warning('Unable to download subtitle for "%s": %s' %
2429                                                 (sub_lang, error_to_compat_str(err)))
2430                             continue
2431
2432         if self.params.get('writeinfojson', False):
2433             infofn = self.prepare_filename(info_dict, 'infojson')
2434             if not self._ensure_dir_exists(encodeFilename(infofn)):
2435                 return
2436             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
2437                 self.to_screen('[info] Video metadata is already present')
2438             else:
2439                 self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
2440                 try:
2441                     write_json_file(self.filter_requested_info(info_dict, self.params.get('clean_infojson', True)), infofn)
2442                 except (OSError, IOError):
2443                     self.report_error('Cannot write video metadata to JSON file ' + infofn)
2444                     return
2445             info_dict['__infojson_filename'] = infofn
2446
2447         for thumb_ext in self._write_thumbnails(info_dict, temp_filename):
2448             thumb_filename_temp = replace_extension(temp_filename, thumb_ext, info_dict.get('ext'))
2449             thumb_filename = replace_extension(
2450                 self.prepare_filename(info_dict, 'thumbnail'), thumb_ext, info_dict.get('ext'))
2451             files_to_move[thumb_filename_temp] = thumb_filename
2452
2453         # Write internet shortcut files
2454         url_link = webloc_link = desktop_link = False
2455         if self.params.get('writelink', False):
2456             if sys.platform == "darwin":  # macOS.
2457                 webloc_link = True
2458             elif sys.platform.startswith("linux"):
2459                 desktop_link = True
2460             else:  # if sys.platform in ['win32', 'cygwin']:
2461                 url_link = True
2462         if self.params.get('writeurllink', False):
2463             url_link = True
2464         if self.params.get('writewebloclink', False):
2465             webloc_link = True
2466         if self.params.get('writedesktoplink', False):
2467             desktop_link = True
2468
2469         if url_link or webloc_link or desktop_link:
2470             if 'webpage_url' not in info_dict:
2471                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2472                 return
2473             ascii_url = iri_to_uri(info_dict['webpage_url'])
2474
2475         def _write_link_file(extension, template, newline, embed_filename):
2476             linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
2477             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2478                 self.to_screen('[info] Internet shortcut is already present')
2479             else:
2480                 try:
2481                     self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2482                     with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2483                         template_vars = {'url': ascii_url}
2484                         if embed_filename:
2485                             template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2486                         linkfile.write(template % template_vars)
2487                 except (OSError, IOError):
2488                     self.report_error('Cannot write internet shortcut ' + linkfn)
2489                     return False
2490             return True
2491
2492         if url_link:
2493             if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2494                 return
2495         if webloc_link:
2496             if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2497                 return
2498         if desktop_link:
2499             if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2500                 return
2501
2502         try:
2503             info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2504         except PostProcessingError as err:
2505             self.report_error('Preprocessing: %s' % str(err))
2506             return
2507
2508         must_record_download_archive = False
2509         if self.params.get('skip_download', False):
2510             info_dict['filepath'] = temp_filename
2511             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2512             info_dict['__files_to_move'] = files_to_move
2513             info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2514         else:
2515             # Download
2516             try:
2517
2518                 def existing_file(*filepaths):
2519                     ext = info_dict.get('ext')
2520                     final_ext = self.params.get('final_ext', ext)
2521                     existing_files = []
2522                     for file in orderedSet(filepaths):
2523                         if final_ext != ext:
2524                             converted = replace_extension(file, final_ext, ext)
2525                             if os.path.exists(encodeFilename(converted)):
2526                                 existing_files.append(converted)
2527                         if os.path.exists(encodeFilename(file)):
2528                             existing_files.append(file)
2529
2530                     if not existing_files or self.params.get('overwrites', False):
2531                         for file in orderedSet(existing_files):
2532                             self.report_file_delete(file)
2533                             os.remove(encodeFilename(file))
2534                         return None
2535
2536                     self.report_file_already_downloaded(existing_files[0])
2537                     info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2538                     return existing_files[0]
2539
2540                 success = True
2541                 if info_dict.get('requested_formats') is not None:
2542
2543                     def compatible_formats(formats):
2544                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2545                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2546                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2547                         if len(video_formats) > 2 or len(audio_formats) > 2:
2548                             return False
2549
2550                         # Check extension
2551                         exts = set(format.get('ext') for format in formats)
2552                         COMPATIBLE_EXTS = (
2553                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2554                             set(('webm',)),
2555                         )
2556                         for ext_sets in COMPATIBLE_EXTS:
2557                             if ext_sets.issuperset(exts):
2558                                 return True
2559                         # TODO: Check acodec/vcodec
2560                         return False
2561
2562                     requested_formats = info_dict['requested_formats']
2563                     old_ext = info_dict['ext']
2564                     if self.params.get('merge_output_format') is None:
2565                         if not compatible_formats(requested_formats):
2566                             info_dict['ext'] = 'mkv'
2567                             self.report_warning(
2568                                 'Requested formats are incompatible for merge and will be merged into mkv.')
2569                         if (info_dict['ext'] == 'webm'
2570                                 and self.params.get('writethumbnail', False)
2571                                 and info_dict.get('thumbnails')):
2572                             info_dict['ext'] = 'mkv'
2573                             self.report_warning(
2574                                 'webm doesn\'t support embedding a thumbnail, mkv will be used.')
2575
2576                     def correct_ext(filename):
2577                         filename_real_ext = os.path.splitext(filename)[1][1:]
2578                         filename_wo_ext = (
2579                             os.path.splitext(filename)[0]
2580                             if filename_real_ext == old_ext
2581                             else filename)
2582                         return '%s.%s' % (filename_wo_ext, info_dict['ext'])
2583
2584                     # Ensure filename always has a correct extension for successful merge
2585                     full_filename = correct_ext(full_filename)
2586                     temp_filename = correct_ext(temp_filename)
2587                     dl_filename = existing_file(full_filename, temp_filename)
2588                     info_dict['__real_download'] = False
2589
2590                     _protocols = set(determine_protocol(f) for f in requested_formats)
2591                     if len(_protocols) == 1:
2592                         info_dict['protocol'] = _protocols.pop()
2593                     directly_mergable = (
2594                         'no-direct-merge' not in self.params.get('compat_opts', [])
2595                         and info_dict.get('protocol') is not None  # All requested formats have same protocol
2596                         and not self.params.get('allow_unplayable_formats')
2597                         and get_suitable_downloader(info_dict, self.params).__name__ == 'FFmpegFD')
2598                     if directly_mergable:
2599                         info_dict['url'] = requested_formats[0]['url']
2600                         # Treat it as a single download
2601                         dl_filename = existing_file(full_filename, temp_filename)
2602                         if dl_filename is None:
2603                             success, real_download = self.dl(temp_filename, info_dict)
2604                             info_dict['__real_download'] = real_download
2605                     else:
2606                         downloaded = []
2607                         merger = FFmpegMergerPP(self)
2608                         if self.params.get('allow_unplayable_formats'):
2609                             self.report_warning(
2610                                 'You have requested merging of multiple formats '
2611                                 'while also allowing unplayable formats to be downloaded. '
2612                                 'The formats won\'t be merged to prevent data corruption.')
2613                         elif not merger.available:
2614                             self.report_warning(
2615                                 'You have requested merging of multiple formats but ffmpeg is not installed. '
2616                                 'The formats won\'t be merged.')
2617
2618                         if dl_filename is None:
2619                             for f in requested_formats:
2620                                 new_info = dict(info_dict)
2621                                 del new_info['requested_formats']
2622                                 new_info.update(f)
2623                                 fname = prepend_extension(
2624                                     self.prepare_filename(new_info, 'temp'),
2625                                     'f%s' % f['format_id'], new_info['ext'])
2626                                 if not self._ensure_dir_exists(fname):
2627                                     return
2628                                 downloaded.append(fname)
2629                                 partial_success, real_download = self.dl(fname, new_info)
2630                                 info_dict['__real_download'] = info_dict['__real_download'] or real_download
2631                                 success = success and partial_success
2632                             if merger.available and not self.params.get('allow_unplayable_formats'):
2633                                 info_dict['__postprocessors'].append(merger)
2634                                 info_dict['__files_to_merge'] = downloaded
2635                                 # Even if there were no downloads, it is being merged only now
2636                                 info_dict['__real_download'] = True
2637                             else:
2638                                 for file in downloaded:
2639                                     files_to_move[file] = None
2640                 else:
2641                     # Just a single file
2642                     dl_filename = existing_file(full_filename, temp_filename)
2643                     if dl_filename is None:
2644                         success, real_download = self.dl(temp_filename, info_dict)
2645                         info_dict['__real_download'] = real_download
2646
2647                 dl_filename = dl_filename or temp_filename
2648                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2649
2650             except network_exceptions as err:
2651                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2652                 return
2653             except (OSError, IOError) as err:
2654                 raise UnavailableVideoError(err)
2655             except (ContentTooShortError, ) as err:
2656                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2657                 return
2658
2659             if success and full_filename != '-':
2660                 # Fixup content
2661                 fixup_policy = self.params.get('fixup')
2662                 if fixup_policy is None:
2663                     fixup_policy = 'detect_or_warn'
2664
2665                 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg to fix this automatically.'
2666
2667                 stretched_ratio = info_dict.get('stretched_ratio')
2668                 if stretched_ratio is not None and stretched_ratio != 1:
2669                     if fixup_policy == 'warn':
2670                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2671                             info_dict['id'], stretched_ratio))
2672                     elif fixup_policy == 'detect_or_warn':
2673                         stretched_pp = FFmpegFixupStretchedPP(self)
2674                         if stretched_pp.available:
2675                             info_dict['__postprocessors'].append(stretched_pp)
2676                         else:
2677                             self.report_warning(
2678                                 '%s: Non-uniform pixel ratio (%s). %s'
2679                                 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
2680                     else:
2681                         assert fixup_policy in ('ignore', 'never')
2682
2683                 if (info_dict.get('requested_formats') is None
2684                         and info_dict.get('container') == 'm4a_dash'
2685                         and info_dict.get('ext') == 'm4a'):
2686                     if fixup_policy == 'warn':
2687                         self.report_warning(
2688                             '%s: writing DASH m4a. '
2689                             'Only some players support this container.'
2690                             % info_dict['id'])
2691                     elif fixup_policy == 'detect_or_warn':
2692                         fixup_pp = FFmpegFixupM4aPP(self)
2693                         if fixup_pp.available:
2694                             info_dict['__postprocessors'].append(fixup_pp)
2695                         else:
2696                             self.report_warning(
2697                                 '%s: writing DASH m4a. '
2698                                 'Only some players support this container. %s'
2699                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2700                     else:
2701                         assert fixup_policy in ('ignore', 'never')
2702
2703                 if ('protocol' in info_dict
2704                         and get_suitable_downloader(info_dict, self.params).__name__ == 'HlsFD'):
2705                     if fixup_policy == 'warn':
2706                         self.report_warning('%s: malformed AAC bitstream detected.' % (
2707                             info_dict['id']))
2708                     elif fixup_policy == 'detect_or_warn':
2709                         fixup_pp = FFmpegFixupM3u8PP(self)
2710                         if fixup_pp.available:
2711                             info_dict['__postprocessors'].append(fixup_pp)
2712                         else:
2713                             self.report_warning(
2714                                 '%s: malformed AAC bitstream detected. %s'
2715                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2716                     else:
2717                         assert fixup_policy in ('ignore', 'never')
2718
2719                 try:
2720                     info_dict = self.post_process(dl_filename, info_dict, files_to_move)
2721                 except PostProcessingError as err:
2722                     self.report_error('Postprocessing: %s' % str(err))
2723                     return
2724                 try:
2725                     for ph in self._post_hooks:
2726                         ph(info_dict['filepath'])
2727                 except Exception as err:
2728                     self.report_error('post hooks: %s' % str(err))
2729                     return
2730                 must_record_download_archive = True
2731
2732         if must_record_download_archive or self.params.get('force_write_download_archive', False):
2733             self.record_download_archive(info_dict)
2734         max_downloads = self.params.get('max_downloads')
2735         if max_downloads is not None and self._num_downloads >= int(max_downloads):
2736             raise MaxDownloadsReached()
2737
2738     def download(self, url_list):
2739         """Download a given list of URLs."""
2740         outtmpl = self.outtmpl_dict['default']
2741         if (len(url_list) > 1
2742                 and outtmpl != '-'
2743                 and '%' not in outtmpl
2744                 and self.params.get('max_downloads') != 1):
2745             raise SameFileError(outtmpl)
2746
2747         for url in url_list:
2748             try:
2749                 # It also downloads the videos
2750                 res = self.extract_info(
2751                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2752             except UnavailableVideoError:
2753                 self.report_error('unable to download video')
2754             except MaxDownloadsReached:
2755                 self.to_screen('[info] Maximum number of downloaded files reached')
2756                 raise
2757             except ExistingVideoReached:
2758                 self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
2759                 raise
2760             except RejectedVideoReached:
2761                 self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
2762                 raise
2763             else:
2764                 if self.params.get('dump_single_json', False):
2765                     self.post_extract(res)
2766                     self.to_stdout(json.dumps(res, default=repr))
2767
2768         return self._download_retcode
2769
2770     def download_with_info_file(self, info_filename):
2771         with contextlib.closing(fileinput.FileInput(
2772                 [info_filename], mode='r',
2773                 openhook=fileinput.hook_encoded('utf-8'))) as f:
2774             # FileInput doesn't have a read method, we can't call json.load
2775             info = self.filter_requested_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
2776         try:
2777             self.process_ie_result(info, download=True)
2778         except (DownloadError, EntryNotInPlaylist):
2779             webpage_url = info.get('webpage_url')
2780             if webpage_url is not None:
2781                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2782                 return self.download([webpage_url])
2783             else:
2784                 raise
2785         return self._download_retcode
2786
2787     @staticmethod
2788     def filter_requested_info(info_dict, actually_filter=True):
2789         remove_keys = ['__original_infodict']  # Always remove this since this may contain a copy of the entire dict
2790         keep_keys = ['_type'],  # Always keep this to facilitate load-info-json
2791         if actually_filter:
2792             remove_keys += ('requested_formats', 'requested_subtitles', 'requested_entries', 'filepath', 'entries', 'original_url')
2793             empty_values = (None, {}, [], set(), tuple())
2794             reject = lambda k, v: k not in keep_keys and (
2795                 k.startswith('_') or k in remove_keys or v in empty_values)
2796         else:
2797             info_dict['epoch'] = int(time.time())
2798             reject = lambda k, v: k in remove_keys
2799         filter_fn = lambda obj: (
2800             list(map(filter_fn, obj)) if isinstance(obj, (list, tuple, set))
2801             else obj if not isinstance(obj, dict)
2802             else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
2803         return filter_fn(info_dict)
2804
2805     def run_pp(self, pp, infodict):
2806         files_to_delete = []
2807         if '__files_to_move' not in infodict:
2808             infodict['__files_to_move'] = {}
2809         files_to_delete, infodict = pp.run(infodict)
2810         if not files_to_delete:
2811             return infodict
2812
2813         if self.params.get('keepvideo', False):
2814             for f in files_to_delete:
2815                 infodict['__files_to_move'].setdefault(f, '')
2816         else:
2817             for old_filename in set(files_to_delete):
2818                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2819                 try:
2820                     os.remove(encodeFilename(old_filename))
2821                 except (IOError, OSError):
2822                     self.report_warning('Unable to remove downloaded original file')
2823                 if old_filename in infodict['__files_to_move']:
2824                     del infodict['__files_to_move'][old_filename]
2825         return infodict
2826
2827     @staticmethod
2828     def post_extract(info_dict):
2829         def actual_post_extract(info_dict):
2830             if info_dict.get('_type') in ('playlist', 'multi_video'):
2831                 for video_dict in info_dict.get('entries', {}):
2832                     actual_post_extract(video_dict or {})
2833                 return
2834
2835             post_extractor = info_dict.get('__post_extractor') or (lambda: {})
2836             extra = post_extractor().items()
2837             info_dict.update(extra)
2838             info_dict.pop('__post_extractor', None)
2839
2840             original_infodict = info_dict.get('__original_infodict') or {}
2841             original_infodict.update(extra)
2842             original_infodict.pop('__post_extractor', None)
2843
2844         actual_post_extract(info_dict or {})
2845
2846     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
2847         info = dict(ie_info)
2848         info['__files_to_move'] = files_to_move or {}
2849         for pp in self._pps[key]:
2850             info = self.run_pp(pp, info)
2851         return info, info.pop('__files_to_move', None)
2852
2853     def post_process(self, filename, ie_info, files_to_move=None):
2854         """Run all the postprocessors on the given file."""
2855         info = dict(ie_info)
2856         info['filepath'] = filename
2857         info['__files_to_move'] = files_to_move or {}
2858
2859         for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
2860             info = self.run_pp(pp, info)
2861         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
2862         del info['__files_to_move']
2863         for pp in self._pps['after_move']:
2864             info = self.run_pp(pp, info)
2865         return info
2866
2867     def _make_archive_id(self, info_dict):
2868         video_id = info_dict.get('id')
2869         if not video_id:
2870             return
2871         # Future-proof against any change in case
2872         # and backwards compatibility with prior versions
2873         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
2874         if extractor is None:
2875             url = str_or_none(info_dict.get('url'))
2876             if not url:
2877                 return
2878             # Try to find matching extractor for the URL and take its ie_key
2879             for ie in self._ies:
2880                 if ie.suitable(url):
2881                     extractor = ie.ie_key()
2882                     break
2883             else:
2884                 return
2885         return '%s %s' % (extractor.lower(), video_id)
2886
2887     def in_download_archive(self, info_dict):
2888         fn = self.params.get('download_archive')
2889         if fn is None:
2890             return False
2891
2892         vid_id = self._make_archive_id(info_dict)
2893         if not vid_id:
2894             return False  # Incomplete video information
2895
2896         return vid_id in self.archive
2897
2898     def record_download_archive(self, info_dict):
2899         fn = self.params.get('download_archive')
2900         if fn is None:
2901             return
2902         vid_id = self._make_archive_id(info_dict)
2903         assert vid_id
2904         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
2905             archive_file.write(vid_id + '\n')
2906         self.archive.add(vid_id)
2907
2908     @staticmethod
2909     def format_resolution(format, default='unknown'):
2910         if format.get('vcodec') == 'none':
2911             return 'audio only'
2912         if format.get('resolution') is not None:
2913             return format['resolution']
2914         if format.get('width') and format.get('height'):
2915             res = '%dx%d' % (format['width'], format['height'])
2916         elif format.get('height'):
2917             res = '%sp' % format['height']
2918         elif format.get('width'):
2919             res = '%dx?' % format['width']
2920         else:
2921             res = default
2922         return res
2923
2924     def _format_note(self, fdict):
2925         res = ''
2926         if fdict.get('ext') in ['f4f', 'f4m']:
2927             res += '(unsupported) '
2928         if fdict.get('language'):
2929             if res:
2930                 res += ' '
2931             res += '[%s] ' % fdict['language']
2932         if fdict.get('format_note') is not None:
2933             res += fdict['format_note'] + ' '
2934         if fdict.get('tbr') is not None:
2935             res += '%4dk ' % fdict['tbr']
2936         if fdict.get('container') is not None:
2937             if res:
2938                 res += ', '
2939             res += '%s container' % fdict['container']
2940         if (fdict.get('vcodec') is not None
2941                 and fdict.get('vcodec') != 'none'):
2942             if res:
2943                 res += ', '
2944             res += fdict['vcodec']
2945             if fdict.get('vbr') is not None:
2946                 res += '@'
2947         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2948             res += 'video@'
2949         if fdict.get('vbr') is not None:
2950             res += '%4dk' % fdict['vbr']
2951         if fdict.get('fps') is not None:
2952             if res:
2953                 res += ', '
2954             res += '%sfps' % fdict['fps']
2955         if fdict.get('acodec') is not None:
2956             if res:
2957                 res += ', '
2958             if fdict['acodec'] == 'none':
2959                 res += 'video only'
2960             else:
2961                 res += '%-5s' % fdict['acodec']
2962         elif fdict.get('abr') is not None:
2963             if res:
2964                 res += ', '
2965             res += 'audio'
2966         if fdict.get('abr') is not None:
2967             res += '@%3dk' % fdict['abr']
2968         if fdict.get('asr') is not None:
2969             res += ' (%5dHz)' % fdict['asr']
2970         if fdict.get('filesize') is not None:
2971             if res:
2972                 res += ', '
2973             res += format_bytes(fdict['filesize'])
2974         elif fdict.get('filesize_approx') is not None:
2975             if res:
2976                 res += ', '
2977             res += '~' + format_bytes(fdict['filesize_approx'])
2978         return res
2979
2980     def _format_note_table(self, f):
2981         def join_fields(*vargs):
2982             return ', '.join((val for val in vargs if val != ''))
2983
2984         return join_fields(
2985             'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
2986             format_field(f, 'language', '[%s]'),
2987             format_field(f, 'format_note'),
2988             format_field(f, 'container', ignore=(None, f.get('ext'))),
2989             format_field(f, 'asr', '%5dHz'))
2990
2991     def list_formats(self, info_dict):
2992         formats = info_dict.get('formats', [info_dict])
2993         new_format = (
2994             'list-formats' not in self.params.get('compat_opts', [])
2995             and self.params.get('list_formats_as_table', True) is not False)
2996         if new_format:
2997             table = [
2998                 [
2999                     format_field(f, 'format_id'),
3000                     format_field(f, 'ext'),
3001                     self.format_resolution(f),
3002                     format_field(f, 'fps', '%d'),
3003                     '|',
3004                     format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
3005                     format_field(f, 'tbr', '%4dk'),
3006                     shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
3007                     '|',
3008                     format_field(f, 'vcodec', default='unknown').replace('none', ''),
3009                     format_field(f, 'vbr', '%4dk'),
3010                     format_field(f, 'acodec', default='unknown').replace('none', ''),
3011                     format_field(f, 'abr', '%3dk'),
3012                     format_field(f, 'asr', '%5dHz'),
3013                     self._format_note_table(f)]
3014                 for f in formats
3015                 if f.get('preference') is None or f['preference'] >= -1000]
3016             header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', '  TBR', 'PROTO',
3017                            '|', 'VCODEC', '  VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']
3018         else:
3019             table = [
3020                 [
3021                     format_field(f, 'format_id'),
3022                     format_field(f, 'ext'),
3023                     self.format_resolution(f),
3024                     self._format_note(f)]
3025                 for f in formats
3026                 if f.get('preference') is None or f['preference'] >= -1000]
3027             header_line = ['format code', 'extension', 'resolution', 'note']
3028
3029         self.to_screen(
3030             '[info] Available formats for %s:\n%s' % (info_dict['id'], render_table(
3031                 header_line,
3032                 table,
3033                 delim=new_format,
3034                 extraGap=(0 if new_format else 1),
3035                 hideEmpty=new_format)))
3036
3037     def list_thumbnails(self, info_dict):
3038         thumbnails = info_dict.get('thumbnails')
3039         if not thumbnails:
3040             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3041             return
3042
3043         self.to_screen(
3044             '[info] Thumbnails for %s:' % info_dict['id'])
3045         self.to_screen(render_table(
3046             ['ID', 'width', 'height', 'URL'],
3047             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
3048
3049     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3050         if not subtitles:
3051             self.to_screen('%s has no %s' % (video_id, name))
3052             return
3053         self.to_screen(
3054             'Available %s for %s:' % (name, video_id))
3055
3056         def _row(lang, formats):
3057             exts, names = zip(*((f['ext'], f.get('name', 'unknown')) for f in reversed(formats)))
3058             if len(set(names)) == 1:
3059                 names = [] if names[0] == 'unknown' else names[:1]
3060             return [lang, ', '.join(names), ', '.join(exts)]
3061
3062         self.to_screen(render_table(
3063             ['Language', 'Name', 'Formats'],
3064             [_row(lang, formats) for lang, formats in subtitles.items()],
3065             hideEmpty=True))
3066
3067     def urlopen(self, req):
3068         """ Start an HTTP download """
3069         if isinstance(req, compat_basestring):
3070             req = sanitized_Request(req)
3071         return self._opener.open(req, timeout=self._socket_timeout)
3072
3073     def print_debug_header(self):
3074         if not self.params.get('verbose'):
3075             return
3076
3077         if type('') is not compat_str:
3078             # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
3079             self.report_warning(
3080                 'Your Python is broken! Update to a newer and supported version')
3081
3082         stdout_encoding = getattr(
3083             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
3084         encoding_str = (
3085             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
3086                 locale.getpreferredencoding(),
3087                 sys.getfilesystemencoding(),
3088                 stdout_encoding,
3089                 self.get_encoding()))
3090         write_string(encoding_str, encoding=None)
3091
3092         source = (
3093             '(exe)' if hasattr(sys, 'frozen')
3094             else '(zip)' if isinstance(globals().get('__loader__'), zipimporter)
3095             else '(source)' if os.path.basename(sys.argv[0]) == '__main__.py'
3096             else '')
3097         self._write_string('[debug] yt-dlp version %s %s\n' % (__version__, source))
3098         if _LAZY_LOADER:
3099             self._write_string('[debug] Lazy loading extractors enabled\n')
3100         if _PLUGIN_CLASSES:
3101             self._write_string(
3102                 '[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])
3103         if self.params.get('compat_opts'):
3104             self._write_string(
3105                 '[debug] Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts')))
3106         try:
3107             sp = subprocess.Popen(
3108                 ['git', 'rev-parse', '--short', 'HEAD'],
3109                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3110                 cwd=os.path.dirname(os.path.abspath(__file__)))
3111             out, err = process_communicate_or_kill(sp)
3112             out = out.decode().strip()
3113             if re.match('[0-9a-f]+', out):
3114                 self._write_string('[debug] Git HEAD: %s\n' % out)
3115         except Exception:
3116             try:
3117                 sys.exc_clear()
3118             except Exception:
3119                 pass
3120
3121         def python_implementation():
3122             impl_name = platform.python_implementation()
3123             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3124                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3125             return impl_name
3126
3127         self._write_string('[debug] Python version %s (%s %s) - %s\n' % (
3128             platform.python_version(),
3129             python_implementation(),
3130             platform.architecture()[0],
3131             platform_name()))
3132
3133         exe_versions = FFmpegPostProcessor.get_versions(self)
3134         exe_versions['rtmpdump'] = rtmpdump_version()
3135         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3136         exe_str = ', '.join(
3137             '%s %s' % (exe, v)
3138             for exe, v in sorted(exe_versions.items())
3139             if v
3140         )
3141         if not exe_str:
3142             exe_str = 'none'
3143         self._write_string('[debug] exe versions: %s\n' % exe_str)
3144
3145         proxy_map = {}
3146         for handler in self._opener.handlers:
3147             if hasattr(handler, 'proxies'):
3148                 proxy_map.update(handler.proxies)
3149         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
3150
3151         if self.params.get('call_home', False):
3152             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3153             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
3154             return
3155             latest_version = self.urlopen(
3156                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3157             if version_tuple(latest_version) > version_tuple(__version__):
3158                 self.report_warning(
3159                     'You are using an outdated version (newest version: %s)! '
3160                     'See https://yt-dl.org/update if you need help updating.' %
3161                     latest_version)
3162
3163     def _setup_opener(self):
3164         timeout_val = self.params.get('socket_timeout')
3165         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
3166
3167         opts_cookiefile = self.params.get('cookiefile')
3168         opts_proxy = self.params.get('proxy')
3169
3170         if opts_cookiefile is None:
3171             self.cookiejar = compat_cookiejar.CookieJar()
3172         else:
3173             opts_cookiefile = expand_path(opts_cookiefile)
3174             self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
3175             if os.access(opts_cookiefile, os.R_OK):
3176                 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
3177
3178         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3179         if opts_proxy is not None:
3180             if opts_proxy == '':
3181                 proxies = {}
3182             else:
3183                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3184         else:
3185             proxies = compat_urllib_request.getproxies()
3186             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3187             if 'http' in proxies and 'https' not in proxies:
3188                 proxies['https'] = proxies['http']
3189         proxy_handler = PerRequestProxyHandler(proxies)
3190
3191         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3192         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3193         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3194         redirect_handler = YoutubeDLRedirectHandler()
3195         data_handler = compat_urllib_request_DataHandler()
3196
3197         # When passing our own FileHandler instance, build_opener won't add the
3198         # default FileHandler and allows us to disable the file protocol, which
3199         # can be used for malicious purposes (see
3200         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3201         file_handler = compat_urllib_request.FileHandler()
3202
3203         def file_open(*args, **kwargs):
3204             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3205         file_handler.file_open = file_open
3206
3207         opener = compat_urllib_request.build_opener(
3208             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3209
3210         # Delete the default user-agent header, which would otherwise apply in
3211         # cases where our custom HTTP handler doesn't come into play
3212         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3213         opener.addheaders = []
3214         self._opener = opener
3215
3216     def encode(self, s):
3217         if isinstance(s, bytes):
3218             return s  # Already encoded
3219
3220         try:
3221             return s.encode(self.get_encoding())
3222         except UnicodeEncodeError as err:
3223             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3224             raise
3225
3226     def get_encoding(self):
3227         encoding = self.params.get('encoding')
3228         if encoding is None:
3229             encoding = preferredencoding()
3230         return encoding
3231
3232     def _write_thumbnails(self, info_dict, filename):  # return the extensions
3233         write_all = self.params.get('write_all_thumbnails', False)
3234         thumbnails = []
3235         if write_all or self.params.get('writethumbnail', False):
3236             thumbnails = info_dict.get('thumbnails') or []
3237         multiple = write_all and len(thumbnails) > 1
3238
3239         ret = []
3240         for t in thumbnails[::1 if write_all else -1]:
3241             thumb_ext = determine_ext(t['url'], 'jpg')
3242             suffix = '%s.' % t['id'] if multiple else ''
3243             thumb_display_id = '%s ' % t['id'] if multiple else ''
3244             thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
3245
3246             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
3247                 ret.append(suffix + thumb_ext)
3248                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
3249                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
3250             else:
3251                 self.to_screen('[%s] %s: Downloading thumbnail %s ...' %
3252                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
3253                 try:
3254                     uf = self.urlopen(t['url'])
3255                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3256                         shutil.copyfileobj(uf, thumbf)
3257                     ret.append(suffix + thumb_ext)
3258                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
3259                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
3260                     t['filepath'] = thumb_filename
3261                 except network_exceptions as err:
3262                     self.report_warning('Unable to download thumbnail "%s": %s' %
3263                                         (t['url'], error_to_compat_str(err)))
3264             if ret and not write_all:
3265                 break
3266         return ret