yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import sys
  23 import time
  24 import tokenize
  25 import traceback
  26 import random
  27
  28 from string import ascii_letters
  29 from zipimport import zipimporter
  30
  31 from .compat import (
  32     compat_basestring,
  33     compat_cookiejar,
  34     compat_get_terminal_size,
  35     compat_kwargs,
  36     compat_numeric_types,
  37     compat_os_name,
  38     compat_str,
  39     compat_tokenize_tokenize,
  40     compat_urllib_error,
  41     compat_urllib_request,
  42     compat_urllib_request_DataHandler,
  43 )
  44 from .utils import (
  45     age_restricted,
  46     args_to_str,
  47     ContentTooShortError,
  48     date_from_str,
  49     DateRange,
  50     DEFAULT_OUTTMPL,
  51     determine_ext,
  52     determine_protocol,
  53     DOT_DESKTOP_LINK_TEMPLATE,
  54     DOT_URL_LINK_TEMPLATE,
  55     DOT_WEBLOC_LINK_TEMPLATE,
  56     DownloadError,
  57     encode_compat_str,
  58     encodeFilename,
  59     EntryNotInPlaylist,
  60     error_to_compat_str,
  61     ExistingVideoReached,
  62     expand_path,
  63     ExtractorError,
  64     float_or_none,
  65     format_bytes,
  66     format_field,
  67     FORMAT_RE,
  68     formatSeconds,
  69     GeoRestrictedError,
  70     int_or_none,
  71     iri_to_uri,
  72     ISO3166Utils,
  73     locked_file,
  74     make_dir,
  75     make_HTTPS_handler,
  76     MaxDownloadsReached,
  77     network_exceptions,
  78     orderedSet,
  79     OUTTMPL_TYPES,
  80     PagedList,
  81     parse_filesize,
  82     PerRequestProxyHandler,
  83     platform_name,
  84     PostProcessingError,
  85     preferredencoding,
  86     prepend_extension,
  87     process_communicate_or_kill,
  88     random_uuidv4,
  89     register_socks_protocols,
  90     RejectedVideoReached,
  91     render_table,
  92     replace_extension,
  93     SameFileError,
  94     sanitize_filename,
  95     sanitize_path,
  96     sanitize_url,
  97     sanitized_Request,
  98     std_headers,
  99     str_or_none,
 100     strftime_or_none,
 101     subtitles_filename,
 102     to_high_limit_path,
 103     traverse_dict,
 104     UnavailableVideoError,
 105     url_basename,
 106     version_tuple,
 107     write_json_file,
 108     write_string,
 109     YoutubeDLCookieJar,
 110     YoutubeDLCookieProcessor,
 111     YoutubeDLHandler,
 112     YoutubeDLRedirectHandler,
 113 )
 114 from .cache import Cache
 115 from .extractor import (
 116     gen_extractor_classes,
 117     get_info_extractor,
 118     _LAZY_LOADER,
 119     _PLUGIN_CLASSES
 120 )
 121 from .extractor.openload import PhantomJSwrapper
 122 from .downloader import (
 123     get_suitable_downloader,
 124     shorten_protocol_name
 125 )
 126 from .downloader.rtmp import rtmpdump_version
 127 from .postprocessor import (
 128     FFmpegFixupM3u8PP,
 129     FFmpegFixupM4aPP,
 130     FFmpegFixupStretchedPP,
 131     FFmpegMergerPP,
 132     FFmpegPostProcessor,
 133     # FFmpegSubtitlesConvertorPP,
 134     get_postprocessor,
 135     MoveFilesAfterDownloadPP,
 136 )
 137 from .version import __version__
 138
 139 if compat_os_name == 'nt':
 140     import ctypes
 141
 142
 143 class YoutubeDL(object):
 144     """YoutubeDL class.
 145
 146     YoutubeDL objects are the ones responsible of downloading the
 147     actual video file and writing it to disk if the user has requested
 148     it, among some other tasks. In most cases there should be one per
 149     program. As, given a video URL, the downloader doesn't know how to
 150     extract all the needed information, task that InfoExtractors do, it
 151     has to pass the URL to one of them.
 152
 153     For this, YoutubeDL objects have a method that allows
 154     InfoExtractors to be registered in a given order. When it is passed
 155     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 156     finds that reports being able to handle it. The InfoExtractor extracts
 157     all the information about the video or videos the URL refers to, and
 158     YoutubeDL process the extracted information, possibly using a File
 159     Downloader to download the video.
 160
 161     YoutubeDL objects accept a lot of parameters. In order not to saturate
 162     the object constructor with arguments, it receives a dictionary of
 163     options instead. These options are available through the params
 164     attribute for the InfoExtractors to use. The YoutubeDL also
 165     registers itself as the downloader in charge for the InfoExtractors
 166     that are added to it, so this is a "mutual registration".
 167
 168     Available options:
 169
 170     username:          Username for authentication purposes.
 171     password:          Password for authentication purposes.
 172     videopassword:     Password for accessing a video.
 173     ap_mso:            Adobe Pass multiple-system operator identifier.
 174     ap_username:       Multiple-system operator account username.
 175     ap_password:       Multiple-system operator account password.
 176     usenetrc:          Use netrc for authentication instead.
 177     verbose:           Print additional info to stdout.
 178     quiet:             Do not print messages to stdout.
 179     no_warnings:       Do not print out anything for warnings.
 180     forceprint:        A list of templates to force print
 181     forceurl:          Force printing final URL. (Deprecated)
 182     forcetitle:        Force printing title. (Deprecated)
 183     forceid:           Force printing ID. (Deprecated)
 184     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 185     forcedescription:  Force printing description. (Deprecated)
 186     forcefilename:     Force printing final filename. (Deprecated)
 187     forceduration:     Force printing duration. (Deprecated)
 188     forcejson:         Force printing info_dict as JSON.
 189     dump_single_json:  Force printing the info_dict of the whole playlist
 190                        (or video) as a single JSON line.
 191     force_write_download_archive: Force writing download archive regardless
 192                        of 'skip_download' or 'simulate'.
 193     simulate:          Do not download the video files.
 194     format:            Video format code. see "FORMAT SELECTION" for more details.
 195     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 196     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 197                        extracting metadata even if the video is not actually
 198                        available for download (experimental)
 199     format_sort:       How to sort the video formats. see "Sorting Formats"
 200                        for more details.
 201     format_sort_force: Force the given format_sort. see "Sorting Formats"
 202                        for more details.
 203     allow_multiple_video_streams:   Allow multiple video streams to be merged
 204                        into a single file
 205     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 206                        into a single file
 207     paths:             Dictionary of output paths. The allowed keys are 'home'
 208                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 209     outtmpl:           Dictionary of templates for output names. Allowed keys
 210                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 211                        A string a also accepted for backward compatibility
 212     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 213     restrictfilenames: Do not allow "&" and spaces in file names
 214     trim_file_name:    Limit length of filename (extension excluded)
 215     windowsfilenames:  Force the filenames to be windows compatible
 216     ignoreerrors:      Do not stop on download errors
 217                        (Default True when running yt-dlp,
 218                        but False when directly accessing YoutubeDL class)
 219     skip_playlist_after_errors: Number of allowed failures until the rest of
 220                        the playlist is skipped
 221     force_generic_extractor: Force downloader to use the generic extractor
 222     overwrites:        Overwrite all video and metadata files if True,
 223                        overwrite only non-video files if None
 224                        and don't overwrite any file if False
 225     playliststart:     Playlist item to start at.
 226     playlistend:       Playlist item to end at.
 227     playlist_items:    Specific indices of playlist to download.
 228     playlistreverse:   Download playlist items in reverse order.
 229     playlistrandom:    Download playlist items in random order.
 230     matchtitle:        Download only matching titles.
 231     rejecttitle:       Reject downloads for matching titles.
 232     logger:            Log messages to a logging.Logger instance.
 233     logtostderr:       Log messages to stderr instead of stdout.
 234     writedescription:  Write the video description to a .description file
 235     writeinfojson:     Write the video description to a .info.json file
 236     clean_infojson:    Remove private fields from the infojson
 237     writecomments:     Extract video comments. This will not be written to disk
 238                        unless writeinfojson is also given
 239     writeannotations:  Write the video annotations to a .annotations.xml file
 240     writethumbnail:    Write the thumbnail image to a file
 241     allow_playlist_files: Whether to write playlists' description, infojson etc
 242                        also to disk when using the 'write*' options
 243     write_all_thumbnails:  Write all thumbnail formats to files
 244     writelink:         Write an internet shortcut file, depending on the
 245                        current platform (.url/.webloc/.desktop)
 246     writeurllink:      Write a Windows internet shortcut file (.url)
 247     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 248     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 249     writesubtitles:    Write the video subtitles to a file
 250     writeautomaticsub: Write the automatically generated subtitles to a file
 251     allsubtitles:      Deprecated - Use subtitlelangs = ['all']
 252                        Downloads all the subtitles of the video
 253                        (requires writesubtitles or writeautomaticsub)
 254     listsubtitles:     Lists all available subtitles for the video
 255     subtitlesformat:   The format code for subtitles
 256     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 257                        The list may contain "all" to refer to all the available
 258                        subtitles. The language can be prefixed with a "-" to
 259                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 260     keepvideo:         Keep the video file after post-processing
 261     daterange:         A DateRange object, download only if the upload_date is in the range.
 262     skip_download:     Skip the actual download of the video file
 263     cachedir:          Location of the cache files in the filesystem.
 264                        False to disable filesystem cache.
 265     noplaylist:        Download single video instead of a playlist if in doubt.
 266     age_limit:         An integer representing the user's age in years.
 267                        Unsuitable videos for the given age are skipped.
 268     min_views:         An integer representing the minimum view count the video
 269                        must have in order to not be skipped.
 270                        Videos without view count information are always
 271                        downloaded. None for no limit.
 272     max_views:         An integer representing the maximum view count.
 273                        Videos that are more popular than that are not
 274                        downloaded.
 275                        Videos without view count information are always
 276                        downloaded. None for no limit.
 277     download_archive:  File name of a file where all downloads are recorded.
 278                        Videos already present in the file are not downloaded
 279                        again.
 280     break_on_existing: Stop the download process after attempting to download a
 281                        file that is in the archive.
 282     break_on_reject:   Stop the download process when encountering a video that
 283                        has been filtered out.
 284     cookiefile:        File name where cookies should be read from and dumped to
 285     nocheckcertificate:Do not verify SSL certificates
 286     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 287                        At the moment, this is only supported by YouTube.
 288     proxy:             URL of the proxy server to use
 289     geo_verification_proxy:  URL of the proxy to use for IP address verification
 290                        on geo-restricted sites.
 291     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 292     bidi_workaround:   Work around buggy terminals without bidirectional text
 293                        support, using fridibi
 294     debug_printtraffic:Print out sent and received HTTP traffic
 295     include_ads:       Download ads as well
 296     default_search:    Prepend this string if an input url is not valid.
 297                        'auto' for elaborate guessing
 298     encoding:          Use this encoding instead of the system-specified.
 299     extract_flat:      Do not resolve URLs, return the immediate result.
 300                        Pass in 'in_playlist' to only show this behavior for
 301                        playlist items.
 302     postprocessors:    A list of dictionaries, each with an entry
 303                        * key:  The name of the postprocessor. See
 304                                yt_dlp/postprocessor/__init__.py for a list.
 305                        * when: When to run the postprocessor. Can be one of
 306                                pre_process|before_dl|post_process|after_move.
 307                                Assumed to be 'post_process' if not given
 308     post_hooks:        A list of functions that get called as the final step
 309                        for each video file, after all postprocessors have been
 310                        called. The filename will be passed as the only argument.
 311     progress_hooks:    A list of functions that get called on download
 312                        progress, with a dictionary with the entries
 313                        * status: One of "downloading", "error", or "finished".
 314                                  Check this first and ignore unknown values.
 315
 316                        If status is one of "downloading", or "finished", the
 317                        following properties may also be present:
 318                        * filename: The final filename (always present)
 319                        * tmpfilename: The filename we're currently writing to
 320                        * downloaded_bytes: Bytes on disk
 321                        * total_bytes: Size of the whole file, None if unknown
 322                        * total_bytes_estimate: Guess of the eventual file size,
 323                                                None if unavailable.
 324                        * elapsed: The number of seconds since download started.
 325                        * eta: The estimated time in seconds, None if unknown
 326                        * speed: The download speed in bytes/second, None if
 327                                 unknown
 328                        * fragment_index: The counter of the currently
 329                                          downloaded video fragment.
 330                        * fragment_count: The number of fragments (= individual
 331                                          files that will be merged)
 332
 333                        Progress hooks are guaranteed to be called at least once
 334                        (with status "finished") if the download is successful.
 335     merge_output_format: Extension to use when merging formats.
 336     final_ext:         Expected final extension; used to detect when the file was
 337                        already downloaded and converted. "merge_output_format" is
 338                        replaced by this extension when given
 339     fixup:             Automatically correct known faults of the file.
 340                        One of:
 341                        - "never": do nothing
 342                        - "warn": only emit a warning
 343                        - "detect_or_warn": check whether we can do anything
 344                                            about it, warn otherwise (default)
 345     source_address:    Client-side IP address to bind to.
 346     call_home:         Boolean, true iff we are allowed to contact the
 347                        yt-dlp servers for debugging. (BROKEN)
 348     sleep_interval_requests: Number of seconds to sleep between requests
 349                        during extraction
 350     sleep_interval:    Number of seconds to sleep before each download when
 351                        used alone or a lower bound of a range for randomized
 352                        sleep before each download (minimum possible number
 353                        of seconds to sleep) when used along with
 354                        max_sleep_interval.
 355     max_sleep_interval:Upper bound of a range for randomized sleep before each
 356                        download (maximum possible number of seconds to sleep).
 357                        Must only be used along with sleep_interval.
 358                        Actual sleep time will be a random float from range
 359                        [sleep_interval; max_sleep_interval].
 360     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 361     listformats:       Print an overview of available video formats and exit.
 362     list_thumbnails:   Print a table of all thumbnails and exit.
 363     match_filter:      A function that gets called with the info_dict of
 364                        every video.
 365                        If it returns a message, the video is ignored.
 366                        If it returns None, the video is downloaded.
 367                        match_filter_func in utils.py is one example for this.
 368     no_color:          Do not emit color codes in output.
 369     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 370                        HTTP header
 371     geo_bypass_country:
 372                        Two-letter ISO 3166-2 country code that will be used for
 373                        explicit geographic restriction bypassing via faking
 374                        X-Forwarded-For HTTP header
 375     geo_bypass_ip_block:
 376                        IP range in CIDR notation that will be used similarly to
 377                        geo_bypass_country
 378
 379     The following options determine which downloader is picked:
 380     external_downloader: A dictionary of protocol keys and the executable of the
 381                        external downloader to use for it. The allowed protocols
 382                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 383                        Set the value to 'native' to use the native downloader
 384     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 385                        or {'m3u8': 'ffmpeg'} instead.
 386                        Use the native HLS downloader instead of ffmpeg/avconv
 387                        if True, otherwise use ffmpeg/avconv if False, otherwise
 388                        use downloader suggested by extractor if None.
 389     compat_opts:       Compatibility options. See "Differences in default behavior".
 390                        Note that only format-sort, format-spec, no-live-chat,
 391                        no-attach-info-json, playlist-index, list-formats,
 392                        no-direct-merge, no-youtube-channel-redirect,
 393                        and no-youtube-unavailable-videos works when used via the API
 394
 395     The following parameters are not used by YoutubeDL itself, they are used by
 396     the downloader (see yt_dlp/downloader/common.py):
 397     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 398     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 399     xattr_set_filesize, external_downloader_args, hls_use_mpegts,
 400     http_chunk_size.
 401
 402     The following options are used by the post processors:
 403     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 404                        otherwise prefer ffmpeg. (avconv support is deprecated)
 405     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 406                        to the binary or its containing directory.
 407     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 408                         and a list of additional command-line arguments for the
 409                         postprocessor/executable. The dict can also have "PP+EXE" keys
 410                         which are used when the given exe is used by the given PP.
 411                         Use 'default' as the name for arguments to passed to all PP
 412
 413     The following options are used by the extractors:
 414     extractor_retries: Number of times to retry for known errors
 415     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 416     hls_split_discontinuity: Split HLS playlists to different formats at
 417                        discontinuities such as ad breaks (default: False)
 418     youtube_include_dash_manifest: If True (default), DASH manifests and related
 419                        data will be downloaded and processed by extractor.
 420                        You can reduce network I/O by disabling it if you don't
 421                        care about DASH. (only for youtube)
 422     youtube_include_hls_manifest: If True (default), HLS manifests and related
 423                        data will be downloaded and processed by extractor.
 424                        You can reduce network I/O by disabling it if you don't
 425                        care about HLS. (only for youtube)
 426     """
 427
 428     _NUMERIC_FIELDS = set((
 429         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 430         'timestamp', 'upload_year', 'upload_month', 'upload_day',
 431         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 432         'average_rating', 'comment_count', 'age_limit',
 433         'start_time', 'end_time',
 434         'chapter_number', 'season_number', 'episode_number',
 435         'track_number', 'disc_number', 'release_year',
 436         'playlist_index',
 437     ))
 438
 439     params = None
 440     _ies = []
 441     _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 442     __prepare_filename_warned = False
 443     _first_webpage_request = True
 444     _download_retcode = None
 445     _num_downloads = None
 446     _playlist_level = 0
 447     _playlist_urls = set()
 448     _screen_file = None
 449
 450     def __init__(self, params=None, auto_init=True):
 451         """Create a FileDownloader object with the given options."""
 452         if params is None:
 453             params = {}
 454         self._ies = []
 455         self._ies_instances = {}
 456         self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 457         self.__prepare_filename_warned = False
 458         self._first_webpage_request = True
 459         self._post_hooks = []
 460         self._progress_hooks = []
 461         self._download_retcode = 0
 462         self._num_downloads = 0
 463         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 464         self._err_file = sys.stderr
 465         self.params = {
 466             # Default parameters
 467             'nocheckcertificate': False,
 468         }
 469         self.params.update(params)
 470         self.cache = Cache(self)
 471
 472         if sys.version_info < (3, 6):
 473             self.report_warning(
 474                 'Support for Python version %d.%d have been deprecated and will break in future versions of yt-dlp! '
 475                 'Update to Python 3.6 or above' % sys.version_info[:2])
 476
 477         def check_deprecated(param, option, suggestion):
 478             if self.params.get(param) is not None:
 479                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 480                 return True
 481             return False
 482
 483         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 484             if self.params.get('geo_verification_proxy') is None:
 485                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 486
 487         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 488         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 489         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 490
 491         for msg in self.params.get('warnings', []):
 492             self.report_warning(msg)
 493
 494         if self.params.get('final_ext'):
 495             if self.params.get('merge_output_format'):
 496                 self.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
 497             self.params['merge_output_format'] = self.params['final_ext']
 498
 499         if 'overwrites' in self.params and self.params['overwrites'] is None:
 500             del self.params['overwrites']
 501
 502         if params.get('bidi_workaround', False):
 503             try:
 504                 import pty
 505                 master, slave = pty.openpty()
 506                 width = compat_get_terminal_size().columns
 507                 if width is None:
 508                     width_args = []
 509                 else:
 510                     width_args = ['-w', str(width)]
 511                 sp_kwargs = dict(
 512                     stdin=subprocess.PIPE,
 513                     stdout=slave,
 514                     stderr=self._err_file)
 515                 try:
 516                     self._output_process = subprocess.Popen(
 517                         ['bidiv'] + width_args, **sp_kwargs
 518                     )
 519                 except OSError:
 520                     self._output_process = subprocess.Popen(
 521                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 522                 self._output_channel = os.fdopen(master, 'rb')
 523             except OSError as ose:
 524                 if ose.errno == errno.ENOENT:
 525                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 526                 else:
 527                     raise
 528
 529         if (sys.platform != 'win32'
 530                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 531                 and not params.get('restrictfilenames', False)):
 532             # Unicode filesystem API will throw errors (#1474, #13027)
 533             self.report_warning(
 534                 'Assuming --restrict-filenames since file system encoding '
 535                 'cannot encode all characters. '
 536                 'Set the LC_ALL environment variable to fix this.')
 537             self.params['restrictfilenames'] = True
 538
 539         self.outtmpl_dict = self.parse_outtmpl()
 540
 541         self._setup_opener()
 542
 543         """Preload the archive, if any is specified"""
 544         def preload_download_archive(fn):
 545             if fn is None:
 546                 return False
 547             self.write_debug('Loading archive file %r\n' % fn)
 548             try:
 549                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 550                     for line in archive_file:
 551                         self.archive.add(line.strip())
 552             except IOError as ioe:
 553                 if ioe.errno != errno.ENOENT:
 554                     raise
 555                 return False
 556             return True
 557
 558         self.archive = set()
 559         preload_download_archive(self.params.get('download_archive'))
 560
 561         if auto_init:
 562             self.print_debug_header()
 563             self.add_default_info_extractors()
 564
 565         for pp_def_raw in self.params.get('postprocessors', []):
 566             pp_class = get_postprocessor(pp_def_raw['key'])
 567             pp_def = dict(pp_def_raw)
 568             del pp_def['key']
 569             if 'when' in pp_def:
 570                 when = pp_def['when']
 571                 del pp_def['when']
 572             else:
 573                 when = 'post_process'
 574             pp = pp_class(self, **compat_kwargs(pp_def))
 575             self.add_post_processor(pp, when=when)
 576
 577         for ph in self.params.get('post_hooks', []):
 578             self.add_post_hook(ph)
 579
 580         for ph in self.params.get('progress_hooks', []):
 581             self.add_progress_hook(ph)
 582
 583         register_socks_protocols()
 584
 585     def warn_if_short_id(self, argv):
 586         # short YouTube ID starting with dash?
 587         idxs = [
 588             i for i, a in enumerate(argv)
 589             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 590         if idxs:
 591             correct_argv = (
 592                 ['yt-dlp']
 593                 + [a for i, a in enumerate(argv) if i not in idxs]
 594                 + ['--'] + [argv[i] for i in idxs]
 595             )
 596             self.report_warning(
 597                 'Long argument string detected. '
 598                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 599                 args_to_str(correct_argv))
 600
 601     def add_info_extractor(self, ie):
 602         """Add an InfoExtractor object to the end of the list."""
 603         self._ies.append(ie)
 604         if not isinstance(ie, type):
 605             self._ies_instances[ie.ie_key()] = ie
 606             ie.set_downloader(self)
 607
 608     def get_info_extractor(self, ie_key):
 609         """
 610         Get an instance of an IE with name ie_key, it will try to get one from
 611         the _ies list, if there's no instance it will create a new one and add
 612         it to the extractor list.
 613         """
 614         ie = self._ies_instances.get(ie_key)
 615         if ie is None:
 616             ie = get_info_extractor(ie_key)()
 617             self.add_info_extractor(ie)
 618         return ie
 619
 620     def add_default_info_extractors(self):
 621         """
 622         Add the InfoExtractors returned by gen_extractors to the end of the list
 623         """
 624         for ie in gen_extractor_classes():
 625             self.add_info_extractor(ie)
 626
 627     def add_post_processor(self, pp, when='post_process'):
 628         """Add a PostProcessor object to the end of the chain."""
 629         self._pps[when].append(pp)
 630         pp.set_downloader(self)
 631
 632     def add_post_hook(self, ph):
 633         """Add the post hook"""
 634         self._post_hooks.append(ph)
 635
 636     def add_progress_hook(self, ph):
 637         """Add the progress hook (currently only for the file downloader)"""
 638         self._progress_hooks.append(ph)
 639
 640     def _bidi_workaround(self, message):
 641         if not hasattr(self, '_output_channel'):
 642             return message
 643
 644         assert hasattr(self, '_output_process')
 645         assert isinstance(message, compat_str)
 646         line_count = message.count('\n') + 1
 647         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 648         self._output_process.stdin.flush()
 649         res = ''.join(self._output_channel.readline().decode('utf-8')
 650                       for _ in range(line_count))
 651         return res[:-len('\n')]
 652
 653     def _write_string(self, s, out=None):
 654         write_string(s, out=out, encoding=self.params.get('encoding'))
 655
 656     def to_stdout(self, message, skip_eol=False, quiet=False):
 657         """Print message to stdout"""
 658         if self.params.get('logger'):
 659             self.params['logger'].debug(message)
 660         elif not quiet:
 661             message = self._bidi_workaround(message)
 662             terminator = ['\n', ''][skip_eol]
 663             output = message + terminator
 664
 665             self._write_string(output, self._screen_file)
 666
 667     def to_stderr(self, message):
 668         """Print message to stderr"""
 669         assert isinstance(message, compat_str)
 670         if self.params.get('logger'):
 671             self.params['logger'].error(message)
 672         else:
 673             message = self._bidi_workaround(message)
 674             output = message + '\n'
 675             self._write_string(output, self._err_file)
 676
 677     def to_console_title(self, message):
 678         if not self.params.get('consoletitle', False):
 679             return
 680         if compat_os_name == 'nt':
 681             if ctypes.windll.kernel32.GetConsoleWindow():
 682                 # c_wchar_p() might not be necessary if `message` is
 683                 # already of type unicode()
 684                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 685         elif 'TERM' in os.environ:
 686             self._write_string('\033]0;%s\007' % message, self._screen_file)
 687
 688     def save_console_title(self):
 689         if not self.params.get('consoletitle', False):
 690             return
 691         if self.params.get('simulate', False):
 692             return
 693         if compat_os_name != 'nt' and 'TERM' in os.environ:
 694             # Save the title on stack
 695             self._write_string('\033[22;0t', self._screen_file)
 696
 697     def restore_console_title(self):
 698         if not self.params.get('consoletitle', False):
 699             return
 700         if self.params.get('simulate', False):
 701             return
 702         if compat_os_name != 'nt' and 'TERM' in os.environ:
 703             # Restore the title from stack
 704             self._write_string('\033[23;0t', self._screen_file)
 705
 706     def __enter__(self):
 707         self.save_console_title()
 708         return self
 709
 710     def __exit__(self, *args):
 711         self.restore_console_title()
 712
 713         if self.params.get('cookiefile') is not None:
 714             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 715
 716     def trouble(self, message=None, tb=None):
 717         """Determine action to take when a download problem appears.
 718
 719         Depending on if the downloader has been configured to ignore
 720         download errors or not, this method may throw an exception or
 721         not when errors are found, after printing the message.
 722
 723         tb, if given, is additional traceback information.
 724         """
 725         if message is not None:
 726             self.to_stderr(message)
 727         if self.params.get('verbose'):
 728             if tb is None:
 729                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 730                     tb = ''
 731                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 732                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 733                     tb += encode_compat_str(traceback.format_exc())
 734                 else:
 735                     tb_data = traceback.format_list(traceback.extract_stack())
 736                     tb = ''.join(tb_data)
 737             if tb:
 738                 self.to_stderr(tb)
 739         if not self.params.get('ignoreerrors', False):
 740             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 741                 exc_info = sys.exc_info()[1].exc_info
 742             else:
 743                 exc_info = sys.exc_info()
 744             raise DownloadError(message, exc_info)
 745         self._download_retcode = 1
 746
 747     def to_screen(self, message, skip_eol=False):
 748         """Print message to stdout if not in quiet mode"""
 749         self.to_stdout(
 750             message, skip_eol, quiet=self.params.get('quiet', False))
 751
 752     def report_warning(self, message):
 753         '''
 754         Print the message to stderr, it will be prefixed with 'WARNING:'
 755         If stderr is a tty file the 'WARNING:' will be colored
 756         '''
 757         if self.params.get('logger') is not None:
 758             self.params['logger'].warning(message)
 759         else:
 760             if self.params.get('no_warnings'):
 761                 return
 762             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 763                 _msg_header = '\033[0;33mWARNING:\033[0m'
 764             else:
 765                 _msg_header = 'WARNING:'
 766             warning_message = '%s %s' % (_msg_header, message)
 767             self.to_stderr(warning_message)
 768
 769     def report_error(self, message, tb=None):
 770         '''
 771         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 772         in red if stderr is a tty file.
 773         '''
 774         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 775             _msg_header = '\033[0;31mERROR:\033[0m'
 776         else:
 777             _msg_header = 'ERROR:'
 778         error_message = '%s %s' % (_msg_header, message)
 779         self.trouble(error_message, tb)
 780
 781     def write_debug(self, message):
 782         '''Log debug message or Print message to stderr'''
 783         if not self.params.get('verbose', False):
 784             return
 785         message = '[debug] %s' % message
 786         if self.params.get('logger'):
 787             self.params['logger'].debug(message)
 788         else:
 789             self._write_string('%s\n' % message)
 790
 791     def report_file_already_downloaded(self, file_name):
 792         """Report file has already been fully downloaded."""
 793         try:
 794             self.to_screen('[download] %s has already been downloaded' % file_name)
 795         except UnicodeEncodeError:
 796             self.to_screen('[download] The file has already been downloaded')
 797
 798     def report_file_delete(self, file_name):
 799         """Report that existing file will be deleted."""
 800         try:
 801             self.to_screen('Deleting existing file %s' % file_name)
 802         except UnicodeEncodeError:
 803             self.to_screen('Deleting existing file')
 804
 805     def parse_outtmpl(self):
 806         outtmpl_dict = self.params.get('outtmpl', {})
 807         if not isinstance(outtmpl_dict, dict):
 808             outtmpl_dict = {'default': outtmpl_dict}
 809         outtmpl_dict.update({
 810             k: v for k, v in DEFAULT_OUTTMPL.items()
 811             if not outtmpl_dict.get(k)})
 812         for key, val in outtmpl_dict.items():
 813             if isinstance(val, bytes):
 814                 self.report_warning(
 815                     'Parameter outtmpl is bytes, but should be a unicode string. '
 816                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 817         return outtmpl_dict
 818
 819     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
 820         """ Make the template and info_dict suitable for substitution (outtmpl % info_dict)"""
 821         template_dict = dict(info_dict)
 822         na = self.params.get('outtmpl_na_placeholder', 'NA')
 823
 824         # duration_string
 825         template_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
 826             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
 827             if info_dict.get('duration', None) is not None
 828             else None)
 829
 830         # epoch
 831         template_dict['epoch'] = int(time.time())
 832
 833         # autonumber
 834         autonumber_size = self.params.get('autonumber_size')
 835         if autonumber_size is None:
 836             autonumber_size = 5
 837         template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
 838
 839         # resolution if not defined
 840         if template_dict.get('resolution') is None:
 841             if template_dict.get('width') and template_dict.get('height'):
 842                 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 843             elif template_dict.get('height'):
 844                 template_dict['resolution'] = '%sp' % template_dict['height']
 845             elif template_dict.get('width'):
 846                 template_dict['resolution'] = '%dx?' % template_dict['width']
 847
 848         # For fields playlist_index and autonumber convert all occurrences
 849         # of %(field)s to %(field)0Nd for backward compatibility
 850         field_size_compat_map = {
 851             'playlist_index': len(str(template_dict.get('_last_playlist_index') or '')),
 852             'autonumber': autonumber_size,
 853         }
 854         FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
 855         mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
 856         if mobj:
 857             outtmpl = re.sub(
 858                 FIELD_SIZE_COMPAT_RE,
 859                 r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
 860                 outtmpl)
 861
 862         numeric_fields = list(self._NUMERIC_FIELDS)
 863         if sanitize is None:
 864             sanitize = lambda k, v: v
 865
 866         EXTERNAL_FORMAT_RE = FORMAT_RE.format('(?P<key>[^)]*)')
 867         # Field is of the form key1.key2...
 868         # where keys (except first) can be string, int or slice
 869         FIELD_RE = r'\w+(?:\.(?:\w+|[-\d]*(?::[-\d]*){0,2}))*'
 870         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
 871             (?P<negate>-)?
 872             (?P<fields>{0})
 873             (?P<maths>(?:[-+]-?(?:\d+(?:\.\d+)?|{0}))*)
 874             (?:>(?P<strf_format>.+?))?
 875             (?:\|(?P<default>.*?))?
 876             $'''.format(FIELD_RE))
 877         MATH_OPERATORS_RE = re.compile(r'(?<![-+])([-+])')
 878         MATH_FUNCTIONS = {
 879             '+': float.__add__,
 880             '-': float.__sub__,
 881         }
 882         for outer_mobj in re.finditer(EXTERNAL_FORMAT_RE, outtmpl):
 883             final_key = outer_mobj.group('key')
 884             str_type = outer_mobj.group('type')
 885             value = None
 886             mobj = re.match(INTERNAL_FORMAT_RE, final_key)
 887             if mobj is not None:
 888                 mobj = mobj.groupdict()
 889                 # Object traversal
 890                 fields = mobj['fields'].split('.')
 891                 value = traverse_dict(template_dict, fields)
 892                 # Negative
 893                 if mobj['negate']:
 894                     value = float_or_none(value)
 895                     if value is not None:
 896                         value *= -1
 897                 # Do maths
 898                 if mobj['maths']:
 899                     value = float_or_none(value)
 900                     operator = None
 901                     for item in MATH_OPERATORS_RE.split(mobj['maths'])[1:]:
 902                         if item == '':
 903                             value = None
 904                         if value is None:
 905                             break
 906                         if operator:
 907                             item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
 908                             offset = float_or_none(item)
 909                             if offset is None:
 910                                 offset = float_or_none(traverse_dict(template_dict, item.split('.')))
 911                             try:
 912                                 value = operator(value, multiplier * offset)
 913                             except (TypeError, ZeroDivisionError):
 914                                 value = None
 915                             operator = None
 916                         else:
 917                             operator = MATH_FUNCTIONS[item]
 918                 # Datetime formatting
 919                 if mobj['strf_format']:
 920                     value = strftime_or_none(value, mobj['strf_format'])
 921                 # Set default
 922                 if value is None and mobj['default'] is not None:
 923                     value = mobj['default']
 924             # Sanitize
 925             if str_type in 'crs' and value is not None:  # string
 926                 value = sanitize('%{}'.format(str_type) % fields[-1], value)
 927             else:  # numeric
 928                 numeric_fields.append(final_key)
 929                 value = float_or_none(value)
 930             if value is not None:
 931                 template_dict[final_key] = value
 932
 933         # Missing numeric fields used together with integer presentation types
 934         # in format specification will break the argument substitution since
 935         # string NA placeholder is returned for missing fields. We will patch
 936         # output template for missing fields to meet string presentation type.
 937         for numeric_field in numeric_fields:
 938             if template_dict.get(numeric_field) is None:
 939                 outtmpl = re.sub(
 940                     FORMAT_RE.format(re.escape(numeric_field)),
 941                     r'%({0})s'.format(numeric_field), outtmpl)
 942
 943         template_dict = collections.defaultdict(lambda: na, (
 944             (k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
 945             for k, v in template_dict.items() if v is not None))
 946         return outtmpl, template_dict
 947
 948     def _prepare_filename(self, info_dict, tmpl_type='default'):
 949         try:
 950             sanitize = lambda k, v: sanitize_filename(
 951                 compat_str(v),
 952                 restricted=self.params.get('restrictfilenames'),
 953                 is_id=(k == 'id' or k.endswith('_id')))
 954             outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
 955             outtmpl, template_dict = self.prepare_outtmpl(outtmpl, info_dict, sanitize)
 956
 957             # expand_path translates '%%' into '%' and '$$' into '$'
 958             # correspondingly that is not what we want since we need to keep
 959             # '%%' intact for template dict substitution step. Working around
 960             # with boundary-alike separator hack.
 961             sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 962             outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 963
 964             # outtmpl should be expand_path'ed before template dict substitution
 965             # because meta fields may contain env variables we don't want to
 966             # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 967             # title "Hello $PATH", we don't want `$PATH` to be expanded.
 968             filename = expand_path(outtmpl).replace(sep, '') % template_dict
 969
 970             force_ext = OUTTMPL_TYPES.get(tmpl_type)
 971             if force_ext is not None:
 972                 filename = replace_extension(filename, force_ext, template_dict.get('ext'))
 973
 974             # https://github.com/blackjack4494/youtube-dlc/issues/85
 975             trim_file_name = self.params.get('trim_file_name', False)
 976             if trim_file_name:
 977                 fn_groups = filename.rsplit('.')
 978                 ext = fn_groups[-1]
 979                 sub_ext = ''
 980                 if len(fn_groups) > 2:
 981                     sub_ext = fn_groups[-2]
 982                 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
 983
 984             return filename
 985         except ValueError as err:
 986             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 987             return None
 988
 989     def prepare_filename(self, info_dict, dir_type='', warn=False):
 990         """Generate the output filename."""
 991         paths = self.params.get('paths', {})
 992         assert isinstance(paths, dict)
 993         filename = self._prepare_filename(info_dict, dir_type or 'default')
 994
 995         if warn and not self.__prepare_filename_warned:
 996             if not paths:
 997                 pass
 998             elif filename == '-':
 999                 self.report_warning('--paths is ignored when an outputting to stdout')
1000             elif os.path.isabs(filename):
1001                 self.report_warning('--paths is ignored since an absolute path is given in output template')
1002             self.__prepare_filename_warned = True
1003         if filename == '-' or not filename:
1004             return filename
1005
1006         homepath = expand_path(paths.get('home', '').strip())
1007         assert isinstance(homepath, compat_str)
1008         subdir = expand_path(paths.get(dir_type, '').strip()) if dir_type else ''
1009         assert isinstance(subdir, compat_str)
1010         path = os.path.join(homepath, subdir, filename)
1011
1012         # Temporary fix for #4787
1013         # 'Treat' all problem characters by passing filename through preferredencoding
1014         # to workaround encoding issues with subprocess on python2 @ Windows
1015         if sys.version_info < (3, 0) and sys.platform == 'win32':
1016             path = encodeFilename(path, True).decode(preferredencoding())
1017         return sanitize_path(path, force=self.params.get('windowsfilenames'))
1018
1019     def _match_entry(self, info_dict, incomplete):
1020         """ Returns None if the file should be downloaded """
1021
1022         def check_filter():
1023             video_title = info_dict.get('title', info_dict.get('id', 'video'))
1024             if 'title' in info_dict:
1025                 # This can happen when we're just evaluating the playlist
1026                 title = info_dict['title']
1027                 matchtitle = self.params.get('matchtitle', False)
1028                 if matchtitle:
1029                     if not re.search(matchtitle, title, re.IGNORECASE):
1030                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1031                 rejecttitle = self.params.get('rejecttitle', False)
1032                 if rejecttitle:
1033                     if re.search(rejecttitle, title, re.IGNORECASE):
1034                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1035             date = info_dict.get('upload_date')
1036             if date is not None:
1037                 dateRange = self.params.get('daterange', DateRange())
1038                 if date not in dateRange:
1039                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1040             view_count = info_dict.get('view_count')
1041             if view_count is not None:
1042                 min_views = self.params.get('min_views')
1043                 if min_views is not None and view_count < min_views:
1044                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1045                 max_views = self.params.get('max_views')
1046                 if max_views is not None and view_count > max_views:
1047                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1048             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1049                 return 'Skipping "%s" because it is age restricted' % video_title
1050             if self.in_download_archive(info_dict):
1051                 return '%s has already been recorded in archive' % video_title
1052
1053             if not incomplete:
1054                 match_filter = self.params.get('match_filter')
1055                 if match_filter is not None:
1056                     ret = match_filter(info_dict)
1057                     if ret is not None:
1058                         return ret
1059             return None
1060
1061         reason = check_filter()
1062         if reason is not None:
1063             self.to_screen('[download] ' + reason)
1064             if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing', False):
1065                 raise ExistingVideoReached()
1066             elif self.params.get('break_on_reject', False):
1067                 raise RejectedVideoReached()
1068         return reason
1069
1070     @staticmethod
1071     def add_extra_info(info_dict, extra_info):
1072         '''Set the keys from extra_info in info dict if they are missing'''
1073         for key, value in extra_info.items():
1074             info_dict.setdefault(key, value)
1075
1076     def extract_info(self, url, download=True, ie_key=None, extra_info={},
1077                      process=True, force_generic_extractor=False):
1078         """
1079         Return a list with a dictionary for each video extracted.
1080
1081         Arguments:
1082         url -- URL to extract
1083
1084         Keyword arguments:
1085         download -- whether to download videos during extraction
1086         ie_key -- extractor key hint
1087         extra_info -- dictionary containing the extra values to add to each result
1088         process -- whether to resolve all unresolved references (URLs, playlist items),
1089             must be True for download to work.
1090         force_generic_extractor -- force using the generic extractor
1091         """
1092
1093         if not ie_key and force_generic_extractor:
1094             ie_key = 'Generic'
1095
1096         if ie_key:
1097             ies = [self.get_info_extractor(ie_key)]
1098         else:
1099             ies = self._ies
1100
1101         for ie in ies:
1102             if not ie.suitable(url):
1103                 continue
1104
1105             ie_key = ie.ie_key()
1106             ie = self.get_info_extractor(ie_key)
1107             if not ie.working():
1108                 self.report_warning('The program functionality for this site has been marked as broken, '
1109                                     'and will probably not work.')
1110
1111             try:
1112                 temp_id = str_or_none(
1113                     ie.extract_id(url) if callable(getattr(ie, 'extract_id', None))
1114                     else ie._match_id(url))
1115             except (AssertionError, IndexError, AttributeError):
1116                 temp_id = None
1117             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1118                 self.to_screen("[%s] %s: has already been recorded in archive" % (
1119                                ie_key, temp_id))
1120                 break
1121             return self.__extract_info(url, ie, download, extra_info, process)
1122         else:
1123             self.report_error('no suitable InfoExtractor for URL %s' % url)
1124
1125     def __handle_extraction_exceptions(func):
1126         def wrapper(self, *args, **kwargs):
1127             try:
1128                 return func(self, *args, **kwargs)
1129             except GeoRestrictedError as e:
1130                 msg = e.msg
1131                 if e.countries:
1132                     msg += '\nThis video is available in %s.' % ', '.join(
1133                         map(ISO3166Utils.short2full, e.countries))
1134                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1135                 self.report_error(msg)
1136             except ExtractorError as e:  # An error we somewhat expected
1137                 self.report_error(compat_str(e), e.format_traceback())
1138             except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
1139                 raise
1140             except Exception as e:
1141                 if self.params.get('ignoreerrors', False):
1142                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
1143                 else:
1144                     raise
1145         return wrapper
1146
1147     @__handle_extraction_exceptions
1148     def __extract_info(self, url, ie, download, extra_info, process):
1149         ie_result = ie.extract(url)
1150         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1151             return
1152         if isinstance(ie_result, list):
1153             # Backwards compatibility: old IE result format
1154             ie_result = {
1155                 '_type': 'compat_list',
1156                 'entries': ie_result,
1157             }
1158         self.add_default_extra_info(ie_result, ie, url)
1159         if process:
1160             return self.process_ie_result(ie_result, download, extra_info)
1161         else:
1162             return ie_result
1163
1164     def add_default_extra_info(self, ie_result, ie, url):
1165         self.add_extra_info(ie_result, {
1166             'extractor': ie.IE_NAME,
1167             'webpage_url': url,
1168             'webpage_url_basename': url_basename(url),
1169             'extractor_key': ie.ie_key(),
1170         })
1171
1172     def process_ie_result(self, ie_result, download=True, extra_info={}):
1173         """
1174         Take the result of the ie(may be modified) and resolve all unresolved
1175         references (URLs, playlist items).
1176
1177         It will also download the videos if 'download'.
1178         Returns the resolved ie_result.
1179         """
1180         result_type = ie_result.get('_type', 'video')
1181
1182         if result_type in ('url', 'url_transparent'):
1183             ie_result['url'] = sanitize_url(ie_result['url'])
1184             extract_flat = self.params.get('extract_flat', False)
1185             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1186                     or extract_flat is True):
1187                 self.__forced_printings(ie_result, self.prepare_filename(ie_result), incomplete=True)
1188                 return ie_result
1189
1190         if result_type == 'video':
1191             self.add_extra_info(ie_result, extra_info)
1192             ie_result = self.process_video_result(ie_result, download=download)
1193             additional_urls = (ie_result or {}).get('additional_urls')
1194             if additional_urls:
1195                 # TODO: Improve MetadataFromFieldPP to allow setting a list
1196                 if isinstance(additional_urls, compat_str):
1197                     additional_urls = [additional_urls]
1198                 self.to_screen(
1199                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1200                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1201                 ie_result['additional_entries'] = [
1202                     self.extract_info(
1203                         url, download, extra_info,
1204                         force_generic_extractor=self.params.get('force_generic_extractor'))
1205                     for url in additional_urls
1206                 ]
1207             return ie_result
1208         elif result_type == 'url':
1209             # We have to add extra_info to the results because it may be
1210             # contained in a playlist
1211             return self.extract_info(
1212                 ie_result['url'], download,
1213                 ie_key=ie_result.get('ie_key'),
1214                 extra_info=extra_info)
1215         elif result_type == 'url_transparent':
1216             # Use the information from the embedding page
1217             info = self.extract_info(
1218                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1219                 extra_info=extra_info, download=False, process=False)
1220
1221             # extract_info may return None when ignoreerrors is enabled and
1222             # extraction failed with an error, don't crash and return early
1223             # in this case
1224             if not info:
1225                 return info
1226
1227             force_properties = dict(
1228                 (k, v) for k, v in ie_result.items() if v is not None)
1229             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1230                 if f in force_properties:
1231                     del force_properties[f]
1232             new_result = info.copy()
1233             new_result.update(force_properties)
1234
1235             # Extracted info may not be a video result (i.e.
1236             # info.get('_type', 'video') != video) but rather an url or
1237             # url_transparent. In such cases outer metadata (from ie_result)
1238             # should be propagated to inner one (info). For this to happen
1239             # _type of info should be overridden with url_transparent. This
1240             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1241             if new_result.get('_type') == 'url':
1242                 new_result['_type'] = 'url_transparent'
1243
1244             return self.process_ie_result(
1245                 new_result, download=download, extra_info=extra_info)
1246         elif result_type in ('playlist', 'multi_video'):
1247             # Protect from infinite recursion due to recursively nested playlists
1248             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1249             webpage_url = ie_result['webpage_url']
1250             if webpage_url in self._playlist_urls:
1251                 self.to_screen(
1252                     '[download] Skipping already downloaded playlist: %s'
1253                     % ie_result.get('title') or ie_result.get('id'))
1254                 return
1255
1256             self._playlist_level += 1
1257             self._playlist_urls.add(webpage_url)
1258             self._sanitize_thumbnails(ie_result)
1259             try:
1260                 return self.__process_playlist(ie_result, download)
1261             finally:
1262                 self._playlist_level -= 1
1263                 if not self._playlist_level:
1264                     self._playlist_urls.clear()
1265         elif result_type == 'compat_list':
1266             self.report_warning(
1267                 'Extractor %s returned a compat_list result. '
1268                 'It needs to be updated.' % ie_result.get('extractor'))
1269
1270             def _fixup(r):
1271                 self.add_extra_info(
1272                     r,
1273                     {
1274                         'extractor': ie_result['extractor'],
1275                         'webpage_url': ie_result['webpage_url'],
1276                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
1277                         'extractor_key': ie_result['extractor_key'],
1278                     }
1279                 )
1280                 return r
1281             ie_result['entries'] = [
1282                 self.process_ie_result(_fixup(r), download, extra_info)
1283                 for r in ie_result['entries']
1284             ]
1285             return ie_result
1286         else:
1287             raise Exception('Invalid result type: %s' % result_type)
1288
1289     def _ensure_dir_exists(self, path):
1290         return make_dir(path, self.report_error)
1291
1292     def __process_playlist(self, ie_result, download):
1293         # We process each entry in the playlist
1294         playlist = ie_result.get('title') or ie_result.get('id')
1295         self.to_screen('[download] Downloading playlist: %s' % playlist)
1296
1297         if 'entries' not in ie_result:
1298             raise EntryNotInPlaylist()
1299         incomplete_entries = bool(ie_result.get('requested_entries'))
1300         if incomplete_entries:
1301             def fill_missing_entries(entries, indexes):
1302                 ret = [None] * max(*indexes)
1303                 for i, entry in zip(indexes, entries):
1304                     ret[i - 1] = entry
1305                 return ret
1306             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1307
1308         playlist_results = []
1309
1310         playliststart = self.params.get('playliststart', 1) - 1
1311         playlistend = self.params.get('playlistend')
1312         # For backwards compatibility, interpret -1 as whole list
1313         if playlistend == -1:
1314             playlistend = None
1315
1316         playlistitems_str = self.params.get('playlist_items')
1317         playlistitems = None
1318         if playlistitems_str is not None:
1319             def iter_playlistitems(format):
1320                 for string_segment in format.split(','):
1321                     if '-' in string_segment:
1322                         start, end = string_segment.split('-')
1323                         for item in range(int(start), int(end) + 1):
1324                             yield int(item)
1325                     else:
1326                         yield int(string_segment)
1327             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1328
1329         ie_entries = ie_result['entries']
1330
1331         def make_playlistitems_entries(list_ie_entries):
1332             num_entries = len(list_ie_entries)
1333             for i in playlistitems:
1334                 if -num_entries < i <= num_entries:
1335                     yield list_ie_entries[i - 1]
1336                 elif incomplete_entries:
1337                     raise EntryNotInPlaylist()
1338
1339         if isinstance(ie_entries, list):
1340             n_all_entries = len(ie_entries)
1341             if playlistitems:
1342                 entries = list(make_playlistitems_entries(ie_entries))
1343             else:
1344                 entries = ie_entries[playliststart:playlistend]
1345             n_entries = len(entries)
1346             msg = 'Collected %d videos; downloading %d of them' % (n_all_entries, n_entries)
1347         elif isinstance(ie_entries, PagedList):
1348             if playlistitems:
1349                 entries = []
1350                 for item in playlistitems:
1351                     entries.extend(ie_entries.getslice(
1352                         item - 1, item
1353                     ))
1354             else:
1355                 entries = ie_entries.getslice(
1356                     playliststart, playlistend)
1357             n_entries = len(entries)
1358             msg = 'Downloading %d videos' % n_entries
1359         else:  # iterable
1360             if playlistitems:
1361                 entries = list(make_playlistitems_entries(list(itertools.islice(
1362                     ie_entries, 0, max(playlistitems)))))
1363             else:
1364                 entries = list(itertools.islice(
1365                     ie_entries, playliststart, playlistend))
1366             n_entries = len(entries)
1367             msg = 'Downloading %d videos' % n_entries
1368
1369         if any((entry is None for entry in entries)):
1370             raise EntryNotInPlaylist()
1371         if not playlistitems and (playliststart or playlistend):
1372             playlistitems = list(range(1 + playliststart, 1 + playliststart + len(entries)))
1373         ie_result['entries'] = entries
1374         ie_result['requested_entries'] = playlistitems
1375
1376         if self.params.get('allow_playlist_files', True):
1377             ie_copy = {
1378                 'playlist': playlist,
1379                 'playlist_id': ie_result.get('id'),
1380                 'playlist_title': ie_result.get('title'),
1381                 'playlist_uploader': ie_result.get('uploader'),
1382                 'playlist_uploader_id': ie_result.get('uploader_id'),
1383                 'playlist_index': 0,
1384             }
1385             ie_copy.update(dict(ie_result))
1386
1387             if self.params.get('writeinfojson', False):
1388                 infofn = self.prepare_filename(ie_copy, 'pl_infojson')
1389                 if not self._ensure_dir_exists(encodeFilename(infofn)):
1390                     return
1391                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
1392                     self.to_screen('[info] Playlist metadata is already present')
1393                 else:
1394                     self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
1395                     try:
1396                         write_json_file(self.filter_requested_info(ie_result, self.params.get('clean_infojson', True)), infofn)
1397                     except (OSError, IOError):
1398                         self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
1399
1400             # TODO: This should be passed to ThumbnailsConvertor if necessary
1401             self._write_thumbnails(ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1402
1403             if self.params.get('writedescription', False):
1404                 descfn = self.prepare_filename(ie_copy, 'pl_description')
1405                 if not self._ensure_dir_exists(encodeFilename(descfn)):
1406                     return
1407                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
1408                     self.to_screen('[info] Playlist description is already present')
1409                 elif ie_result.get('description') is None:
1410                     self.report_warning('There\'s no playlist description to write.')
1411                 else:
1412                     try:
1413                         self.to_screen('[info] Writing playlist description to: ' + descfn)
1414                         with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1415                             descfile.write(ie_result['description'])
1416                     except (OSError, IOError):
1417                         self.report_error('Cannot write playlist description file ' + descfn)
1418                         return
1419
1420         # Save playlist_index before re-ordering
1421         entries = [
1422             ((playlistitems[i - 1] if playlistitems else i), entry)
1423             for i, entry in enumerate(entries, 1)]
1424
1425         if self.params.get('playlistreverse', False):
1426             entries = entries[::-1]
1427         if self.params.get('playlistrandom', False):
1428             random.shuffle(entries)
1429
1430         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1431
1432         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg))
1433         failures = 0
1434         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1435         for i, entry_tuple in enumerate(entries, 1):
1436             playlist_index, entry = entry_tuple
1437             if 'playlist_index' in self.params.get('compat_options', []):
1438                 playlist_index = playlistitems[i - 1] if playlistitems else i
1439             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1440             # This __x_forwarded_for_ip thing is a bit ugly but requires
1441             # minimal changes
1442             if x_forwarded_for:
1443                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1444             extra = {
1445                 'n_entries': n_entries,
1446                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1447                 'playlist_index': playlist_index,
1448                 'playlist_autonumber': i,
1449                 'playlist': playlist,
1450                 'playlist_id': ie_result.get('id'),
1451                 'playlist_title': ie_result.get('title'),
1452                 'playlist_uploader': ie_result.get('uploader'),
1453                 'playlist_uploader_id': ie_result.get('uploader_id'),
1454                 'extractor': ie_result['extractor'],
1455                 'webpage_url': ie_result['webpage_url'],
1456                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1457                 'extractor_key': ie_result['extractor_key'],
1458             }
1459
1460             if self._match_entry(entry, incomplete=True) is not None:
1461                 continue
1462
1463             entry_result = self.__process_iterable_entry(entry, download, extra)
1464             if not entry_result:
1465                 failures += 1
1466             if failures >= max_failures:
1467                 self.report_error(
1468                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1469                 break
1470             # TODO: skip failed (empty) entries?
1471             playlist_results.append(entry_result)
1472         ie_result['entries'] = playlist_results
1473         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1474         return ie_result
1475
1476     @__handle_extraction_exceptions
1477     def __process_iterable_entry(self, entry, download, extra_info):
1478         return self.process_ie_result(
1479             entry, download=download, extra_info=extra_info)
1480
1481     def _build_format_filter(self, filter_spec):
1482         " Returns a function to filter the formats according to the filter_spec "
1483
1484         OPERATORS = {
1485             '<': operator.lt,
1486             '<=': operator.le,
1487             '>': operator.gt,
1488             '>=': operator.ge,
1489             '=': operator.eq,
1490             '!=': operator.ne,
1491         }
1492         operator_rex = re.compile(r'''(?x)\s*
1493             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
1494             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1495             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
1496             $
1497             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1498         m = operator_rex.search(filter_spec)
1499         if m:
1500             try:
1501                 comparison_value = int(m.group('value'))
1502             except ValueError:
1503                 comparison_value = parse_filesize(m.group('value'))
1504                 if comparison_value is None:
1505                     comparison_value = parse_filesize(m.group('value') + 'B')
1506                 if comparison_value is None:
1507                     raise ValueError(
1508                         'Invalid value %r in format specification %r' % (
1509                             m.group('value'), filter_spec))
1510             op = OPERATORS[m.group('op')]
1511
1512         if not m:
1513             STR_OPERATORS = {
1514                 '=': operator.eq,
1515                 '^=': lambda attr, value: attr.startswith(value),
1516                 '$=': lambda attr, value: attr.endswith(value),
1517                 '*=': lambda attr, value: value in attr,
1518             }
1519             str_operator_rex = re.compile(r'''(?x)
1520                 \s*(?P<key>[a-zA-Z0-9._-]+)
1521                 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
1522                 \s*(?P<value>[a-zA-Z0-9._-]+)
1523                 \s*$
1524                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1525             m = str_operator_rex.search(filter_spec)
1526             if m:
1527                 comparison_value = m.group('value')
1528                 str_op = STR_OPERATORS[m.group('op')]
1529                 if m.group('negation'):
1530                     op = lambda attr, value: not str_op(attr, value)
1531                 else:
1532                     op = str_op
1533
1534         if not m:
1535             raise ValueError('Invalid filter specification %r' % filter_spec)
1536
1537         def _filter(f):
1538             actual_value = f.get(m.group('key'))
1539             if actual_value is None:
1540                 return m.group('none_inclusive')
1541             return op(actual_value, comparison_value)
1542         return _filter
1543
1544     def _default_format_spec(self, info_dict, download=True):
1545
1546         def can_merge():
1547             merger = FFmpegMergerPP(self)
1548             return merger.available and merger.can_merge()
1549
1550         prefer_best = (
1551             not self.params.get('simulate', False)
1552             and download
1553             and (
1554                 not can_merge()
1555                 or info_dict.get('is_live', False)
1556                 or self.outtmpl_dict['default'] == '-'))
1557         compat = (
1558             prefer_best
1559             or self.params.get('allow_multiple_audio_streams', False)
1560             or 'format-spec' in self.params.get('compat_opts', []))
1561
1562         return (
1563             'best/bestvideo+bestaudio' if prefer_best
1564             else 'bestvideo*+bestaudio/best' if not compat
1565             else 'bestvideo+bestaudio/best')
1566
1567     def build_format_selector(self, format_spec):
1568         def syntax_error(note, start):
1569             message = (
1570                 'Invalid format specification: '
1571                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1572             return SyntaxError(message)
1573
1574         PICKFIRST = 'PICKFIRST'
1575         MERGE = 'MERGE'
1576         SINGLE = 'SINGLE'
1577         GROUP = 'GROUP'
1578         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1579
1580         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1581                                   'video': self.params.get('allow_multiple_video_streams', False)}
1582
1583         check_formats = self.params.get('check_formats')
1584
1585         def _parse_filter(tokens):
1586             filter_parts = []
1587             for type, string, start, _, _ in tokens:
1588                 if type == tokenize.OP and string == ']':
1589                     return ''.join(filter_parts)
1590                 else:
1591                     filter_parts.append(string)
1592
1593         def _remove_unused_ops(tokens):
1594             # Remove operators that we don't use and join them with the surrounding strings
1595             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1596             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1597             last_string, last_start, last_end, last_line = None, None, None, None
1598             for type, string, start, end, line in tokens:
1599                 if type == tokenize.OP and string == '[':
1600                     if last_string:
1601                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1602                         last_string = None
1603                     yield type, string, start, end, line
1604                     # everything inside brackets will be handled by _parse_filter
1605                     for type, string, start, end, line in tokens:
1606                         yield type, string, start, end, line
1607                         if type == tokenize.OP and string == ']':
1608                             break
1609                 elif type == tokenize.OP and string in ALLOWED_OPS:
1610                     if last_string:
1611                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1612                         last_string = None
1613                     yield type, string, start, end, line
1614                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1615                     if not last_string:
1616                         last_string = string
1617                         last_start = start
1618                         last_end = end
1619                     else:
1620                         last_string += string
1621             if last_string:
1622                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1623
1624         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1625             selectors = []
1626             current_selector = None
1627             for type, string, start, _, _ in tokens:
1628                 # ENCODING is only defined in python 3.x
1629                 if type == getattr(tokenize, 'ENCODING', None):
1630                     continue
1631                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1632                     current_selector = FormatSelector(SINGLE, string, [])
1633                 elif type == tokenize.OP:
1634                     if string == ')':
1635                         if not inside_group:
1636                             # ')' will be handled by the parentheses group
1637                             tokens.restore_last_token()
1638                         break
1639                     elif inside_merge and string in ['/', ',']:
1640                         tokens.restore_last_token()
1641                         break
1642                     elif inside_choice and string == ',':
1643                         tokens.restore_last_token()
1644                         break
1645                     elif string == ',':
1646                         if not current_selector:
1647                             raise syntax_error('"," must follow a format selector', start)
1648                         selectors.append(current_selector)
1649                         current_selector = None
1650                     elif string == '/':
1651                         if not current_selector:
1652                             raise syntax_error('"/" must follow a format selector', start)
1653                         first_choice = current_selector
1654                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1655                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1656                     elif string == '[':
1657                         if not current_selector:
1658                             current_selector = FormatSelector(SINGLE, 'best', [])
1659                         format_filter = _parse_filter(tokens)
1660                         current_selector.filters.append(format_filter)
1661                     elif string == '(':
1662                         if current_selector:
1663                             raise syntax_error('Unexpected "("', start)
1664                         group = _parse_format_selection(tokens, inside_group=True)
1665                         current_selector = FormatSelector(GROUP, group, [])
1666                     elif string == '+':
1667                         if not current_selector:
1668                             raise syntax_error('Unexpected "+"', start)
1669                         selector_1 = current_selector
1670                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1671                         if not selector_2:
1672                             raise syntax_error('Expected a selector', start)
1673                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1674                     else:
1675                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1676                 elif type == tokenize.ENDMARKER:
1677                     break
1678             if current_selector:
1679                 selectors.append(current_selector)
1680             return selectors
1681
1682         def _merge(formats_pair):
1683             format_1, format_2 = formats_pair
1684
1685             formats_info = []
1686             formats_info.extend(format_1.get('requested_formats', (format_1,)))
1687             formats_info.extend(format_2.get('requested_formats', (format_2,)))
1688
1689             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1690                 get_no_more = {"video": False, "audio": False}
1691                 for (i, fmt_info) in enumerate(formats_info):
1692                     for aud_vid in ["audio", "video"]:
1693                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1694                             if get_no_more[aud_vid]:
1695                                 formats_info.pop(i)
1696                             get_no_more[aud_vid] = True
1697
1698             if len(formats_info) == 1:
1699                 return formats_info[0]
1700
1701             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1702             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1703
1704             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1705             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1706
1707             output_ext = self.params.get('merge_output_format')
1708             if not output_ext:
1709                 if the_only_video:
1710                     output_ext = the_only_video['ext']
1711                 elif the_only_audio and not video_fmts:
1712                     output_ext = the_only_audio['ext']
1713                 else:
1714                     output_ext = 'mkv'
1715
1716             new_dict = {
1717                 'requested_formats': formats_info,
1718                 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1719                 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1720                 'ext': output_ext,
1721             }
1722
1723             if the_only_video:
1724                 new_dict.update({
1725                     'width': the_only_video.get('width'),
1726                     'height': the_only_video.get('height'),
1727                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1728                     'fps': the_only_video.get('fps'),
1729                     'vcodec': the_only_video.get('vcodec'),
1730                     'vbr': the_only_video.get('vbr'),
1731                     'stretched_ratio': the_only_video.get('stretched_ratio'),
1732                 })
1733
1734             if the_only_audio:
1735                 new_dict.update({
1736                     'acodec': the_only_audio.get('acodec'),
1737                     'abr': the_only_audio.get('abr'),
1738                 })
1739
1740             return new_dict
1741
1742         def _check_formats(formats):
1743             for f in formats:
1744                 self.to_screen('[info] Testing format %s' % f['format_id'])
1745                 paths = self.params.get('paths', {})
1746                 temp_file = os.path.join(
1747                     expand_path(paths.get('home', '').strip()),
1748                     expand_path(paths.get('temp', '').strip()),
1749                     'ytdl.%s.f%s.check-format' % (random_uuidv4(), f['format_id']))
1750                 try:
1751                     dl, _ = self.dl(temp_file, f, test=True)
1752                 except (ExtractorError, IOError, OSError, ValueError) + network_exceptions:
1753                     dl = False
1754                 finally:
1755                     if os.path.exists(temp_file):
1756                         os.remove(temp_file)
1757                 if dl:
1758                     yield f
1759                 else:
1760                     self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1761
1762         def _build_selector_function(selector):
1763             if isinstance(selector, list):  # ,
1764                 fs = [_build_selector_function(s) for s in selector]
1765
1766                 def selector_function(ctx):
1767                     for f in fs:
1768                         for format in f(ctx):
1769                             yield format
1770                 return selector_function
1771
1772             elif selector.type == GROUP:  # ()
1773                 selector_function = _build_selector_function(selector.selector)
1774
1775             elif selector.type == PICKFIRST:  # /
1776                 fs = [_build_selector_function(s) for s in selector.selector]
1777
1778                 def selector_function(ctx):
1779                     for f in fs:
1780                         picked_formats = list(f(ctx))
1781                         if picked_formats:
1782                             return picked_formats
1783                     return []
1784
1785             elif selector.type == SINGLE:  # atom
1786                 format_spec = selector.selector or 'best'
1787
1788                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
1789                 if format_spec == 'all':
1790                     def selector_function(ctx):
1791                         formats = list(ctx['formats'])
1792                         if check_formats:
1793                             formats = _check_formats(formats)
1794                         for f in formats:
1795                             yield f
1796                 elif format_spec == 'mergeall':
1797                     def selector_function(ctx):
1798                         formats = list(_check_formats(ctx['formats']))
1799                         if not formats:
1800                             return
1801                         merged_format = formats[-1]
1802                         for f in formats[-2::-1]:
1803                             merged_format = _merge((merged_format, f))
1804                         yield merged_format
1805
1806                 else:
1807                     format_fallback, format_reverse, format_idx = False, True, 1
1808                     mobj = re.match(
1809                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
1810                         format_spec)
1811                     if mobj is not None:
1812                         format_idx = int_or_none(mobj.group('n'), default=1)
1813                         format_reverse = mobj.group('bw')[0] == 'b'
1814                         format_type = (mobj.group('type') or [None])[0]
1815                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
1816                         format_modified = mobj.group('mod') is not None
1817
1818                         format_fallback = not format_type and not format_modified  # for b, w
1819                         filter_f = (
1820                             (lambda f: f.get('%scodec' % format_type) != 'none')
1821                             if format_type and format_modified  # bv*, ba*, wv*, wa*
1822                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
1823                             if format_type  # bv, ba, wv, wa
1824                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1825                             if not format_modified  # b, w
1826                             else None)  # b*, w*
1827                     else:
1828                         filter_f = ((lambda f: f.get('ext') == format_spec)
1829                                     if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']  # extension
1830                                     else (lambda f: f.get('format_id') == format_spec))  # id
1831
1832                     def selector_function(ctx):
1833                         formats = list(ctx['formats'])
1834                         if not formats:
1835                             return
1836                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
1837                         if format_fallback and ctx['incomplete_formats'] and not matches:
1838                             # for extractors with incomplete formats (audio only (soundcloud)
1839                             # or video only (imgur)) best/worst will fallback to
1840                             # best/worst {video,audio}-only format
1841                             matches = formats
1842                         if format_reverse:
1843                             matches = matches[::-1]
1844                         if check_formats:
1845                             matches = list(itertools.islice(_check_formats(matches), format_idx))
1846                         n = len(matches)
1847                         if -n <= format_idx - 1 < n:
1848                             yield matches[format_idx - 1]
1849
1850             elif selector.type == MERGE:        # +
1851                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1852
1853                 def selector_function(ctx):
1854                     for pair in itertools.product(
1855                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1856                         yield _merge(pair)
1857
1858             filters = [self._build_format_filter(f) for f in selector.filters]
1859
1860             def final_selector(ctx):
1861                 ctx_copy = copy.deepcopy(ctx)
1862                 for _filter in filters:
1863                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1864                 return selector_function(ctx_copy)
1865             return final_selector
1866
1867         stream = io.BytesIO(format_spec.encode('utf-8'))
1868         try:
1869             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1870         except tokenize.TokenError:
1871             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1872
1873         class TokenIterator(object):
1874             def __init__(self, tokens):
1875                 self.tokens = tokens
1876                 self.counter = 0
1877
1878             def __iter__(self):
1879                 return self
1880
1881             def __next__(self):
1882                 if self.counter >= len(self.tokens):
1883                     raise StopIteration()
1884                 value = self.tokens[self.counter]
1885                 self.counter += 1
1886                 return value
1887
1888             next = __next__
1889
1890             def restore_last_token(self):
1891                 self.counter -= 1
1892
1893         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1894         return _build_selector_function(parsed_selector)
1895
1896     def _calc_headers(self, info_dict):
1897         res = std_headers.copy()
1898
1899         add_headers = info_dict.get('http_headers')
1900         if add_headers:
1901             res.update(add_headers)
1902
1903         cookies = self._calc_cookies(info_dict)
1904         if cookies:
1905             res['Cookie'] = cookies
1906
1907         if 'X-Forwarded-For' not in res:
1908             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1909             if x_forwarded_for_ip:
1910                 res['X-Forwarded-For'] = x_forwarded_for_ip
1911
1912         return res
1913
1914     def _calc_cookies(self, info_dict):
1915         pr = sanitized_Request(info_dict['url'])
1916         self.cookiejar.add_cookie_header(pr)
1917         return pr.get_header('Cookie')
1918
1919     @staticmethod
1920     def _sanitize_thumbnails(info_dict):
1921         thumbnails = info_dict.get('thumbnails')
1922         if thumbnails is None:
1923             thumbnail = info_dict.get('thumbnail')
1924             if thumbnail:
1925                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1926         if thumbnails:
1927             thumbnails.sort(key=lambda t: (
1928                 t.get('preference') if t.get('preference') is not None else -1,
1929                 t.get('width') if t.get('width') is not None else -1,
1930                 t.get('height') if t.get('height') is not None else -1,
1931                 t.get('id') if t.get('id') is not None else '',
1932                 t.get('url')))
1933             for i, t in enumerate(thumbnails):
1934                 t['url'] = sanitize_url(t['url'])
1935                 if t.get('width') and t.get('height'):
1936                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1937                 if t.get('id') is None:
1938                     t['id'] = '%d' % i
1939
1940     def process_video_result(self, info_dict, download=True):
1941         assert info_dict.get('_type', 'video') == 'video'
1942
1943         if 'id' not in info_dict:
1944             raise ExtractorError('Missing "id" field in extractor result')
1945         if 'title' not in info_dict:
1946             raise ExtractorError('Missing "title" field in extractor result')
1947
1948         def report_force_conversion(field, field_not, conversion):
1949             self.report_warning(
1950                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1951                 % (field, field_not, conversion))
1952
1953         def sanitize_string_field(info, string_field):
1954             field = info.get(string_field)
1955             if field is None or isinstance(field, compat_str):
1956                 return
1957             report_force_conversion(string_field, 'a string', 'string')
1958             info[string_field] = compat_str(field)
1959
1960         def sanitize_numeric_fields(info):
1961             for numeric_field in self._NUMERIC_FIELDS:
1962                 field = info.get(numeric_field)
1963                 if field is None or isinstance(field, compat_numeric_types):
1964                     continue
1965                 report_force_conversion(numeric_field, 'numeric', 'int')
1966                 info[numeric_field] = int_or_none(field)
1967
1968         sanitize_string_field(info_dict, 'id')
1969         sanitize_numeric_fields(info_dict)
1970
1971         if 'playlist' not in info_dict:
1972             # It isn't part of a playlist
1973             info_dict['playlist'] = None
1974             info_dict['playlist_index'] = None
1975
1976         self._sanitize_thumbnails(info_dict)
1977
1978         if self.params.get('list_thumbnails'):
1979             self.list_thumbnails(info_dict)
1980             return
1981
1982         thumbnail = info_dict.get('thumbnail')
1983         thumbnails = info_dict.get('thumbnails')
1984         if thumbnail:
1985             info_dict['thumbnail'] = sanitize_url(thumbnail)
1986         elif thumbnails:
1987             info_dict['thumbnail'] = thumbnails[-1]['url']
1988
1989         if 'display_id' not in info_dict and 'id' in info_dict:
1990             info_dict['display_id'] = info_dict['id']
1991
1992         for ts_key, date_key in (
1993                 ('timestamp', 'upload_date'),
1994                 ('release_timestamp', 'release_date'),
1995         ):
1996             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
1997                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1998                 # see http://bugs.python.org/issue1646728)
1999                 try:
2000                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2001                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2002                 except (ValueError, OverflowError, OSError):
2003                     pass
2004
2005         # Auto generate title fields corresponding to the *_number fields when missing
2006         # in order to always have clean titles. This is very common for TV series.
2007         for field in ('chapter', 'season', 'episode'):
2008             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2009                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2010
2011         for cc_kind in ('subtitles', 'automatic_captions'):
2012             cc = info_dict.get(cc_kind)
2013             if cc:
2014                 for _, subtitle in cc.items():
2015                     for subtitle_format in subtitle:
2016                         if subtitle_format.get('url'):
2017                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2018                         if subtitle_format.get('ext') is None:
2019                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2020
2021         automatic_captions = info_dict.get('automatic_captions')
2022         subtitles = info_dict.get('subtitles')
2023
2024         if self.params.get('listsubtitles', False):
2025             if 'automatic_captions' in info_dict:
2026                 self.list_subtitles(
2027                     info_dict['id'], automatic_captions, 'automatic captions')
2028             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2029             return
2030
2031         info_dict['requested_subtitles'] = self.process_subtitles(
2032             info_dict['id'], subtitles, automatic_captions)
2033
2034         # We now pick which formats have to be downloaded
2035         if info_dict.get('formats') is None:
2036             # There's only one format available
2037             formats = [info_dict]
2038         else:
2039             formats = info_dict['formats']
2040
2041         if not formats:
2042             if not self.params.get('ignore_no_formats_error'):
2043                 raise ExtractorError('No video formats found!')
2044             else:
2045                 self.report_warning('No video formats found!')
2046
2047         def is_wellformed(f):
2048             url = f.get('url')
2049             if not url:
2050                 self.report_warning(
2051                     '"url" field is missing or empty - skipping format, '
2052                     'there is an error in extractor')
2053                 return False
2054             if isinstance(url, bytes):
2055                 sanitize_string_field(f, 'url')
2056             return True
2057
2058         # Filter out malformed formats for better extraction robustness
2059         formats = list(filter(is_wellformed, formats))
2060
2061         formats_dict = {}
2062
2063         # We check that all the formats have the format and format_id fields
2064         for i, format in enumerate(formats):
2065             sanitize_string_field(format, 'format_id')
2066             sanitize_numeric_fields(format)
2067             format['url'] = sanitize_url(format['url'])
2068             if not format.get('format_id'):
2069                 format['format_id'] = compat_str(i)
2070             else:
2071                 # Sanitize format_id from characters used in format selector expression
2072                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2073             format_id = format['format_id']
2074             if format_id not in formats_dict:
2075                 formats_dict[format_id] = []
2076             formats_dict[format_id].append(format)
2077
2078         # Make sure all formats have unique format_id
2079         for format_id, ambiguous_formats in formats_dict.items():
2080             if len(ambiguous_formats) > 1:
2081                 for i, format in enumerate(ambiguous_formats):
2082                     format['format_id'] = '%s-%d' % (format_id, i)
2083
2084         for i, format in enumerate(formats):
2085             if format.get('format') is None:
2086                 format['format'] = '{id} - {res}{note}'.format(
2087                     id=format['format_id'],
2088                     res=self.format_resolution(format),
2089                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
2090                 )
2091             # Automatically determine file extension if missing
2092             if format.get('ext') is None:
2093                 format['ext'] = determine_ext(format['url']).lower()
2094             # Automatically determine protocol if missing (useful for format
2095             # selection purposes)
2096             if format.get('protocol') is None:
2097                 format['protocol'] = determine_protocol(format)
2098             # Add HTTP headers, so that external programs can use them from the
2099             # json output
2100             full_format_info = info_dict.copy()
2101             full_format_info.update(format)
2102             format['http_headers'] = self._calc_headers(full_format_info)
2103         # Remove private housekeeping stuff
2104         if '__x_forwarded_for_ip' in info_dict:
2105             del info_dict['__x_forwarded_for_ip']
2106
2107         # TODO Central sorting goes here
2108
2109         if formats and formats[0] is not info_dict:
2110             # only set the 'formats' fields if the original info_dict list them
2111             # otherwise we end up with a circular reference, the first (and unique)
2112             # element in the 'formats' field in info_dict is info_dict itself,
2113             # which can't be exported to json
2114             info_dict['formats'] = formats
2115
2116         info_dict, _ = self.pre_process(info_dict)
2117
2118         if self.params.get('listformats'):
2119             if not info_dict.get('formats'):
2120                 raise ExtractorError('No video formats found', expected=True)
2121             self.list_formats(info_dict)
2122             return
2123
2124         req_format = self.params.get('format')
2125         if req_format is None:
2126             req_format = self._default_format_spec(info_dict, download=download)
2127             self.write_debug('Default format spec: %s' % req_format)
2128
2129         format_selector = self.build_format_selector(req_format)
2130
2131         # While in format selection we may need to have an access to the original
2132         # format set in order to calculate some metrics or do some processing.
2133         # For now we need to be able to guess whether original formats provided
2134         # by extractor are incomplete or not (i.e. whether extractor provides only
2135         # video-only or audio-only formats) for proper formats selection for
2136         # extractors with such incomplete formats (see
2137         # https://github.com/ytdl-org/youtube-dl/pull/5556).
2138         # Since formats may be filtered during format selection and may not match
2139         # the original formats the results may be incorrect. Thus original formats
2140         # or pre-calculated metrics should be passed to format selection routines
2141         # as well.
2142         # We will pass a context object containing all necessary additional data
2143         # instead of just formats.
2144         # This fixes incorrect format selection issue (see
2145         # https://github.com/ytdl-org/youtube-dl/issues/10083).
2146         incomplete_formats = (
2147             # All formats are video-only or
2148             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2149             # all formats are audio-only
2150             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2151
2152         ctx = {
2153             'formats': formats,
2154             'incomplete_formats': incomplete_formats,
2155         }
2156
2157         formats_to_download = list(format_selector(ctx))
2158         if not formats_to_download:
2159             if not self.params.get('ignore_no_formats_error'):
2160                 raise ExtractorError('Requested format is not available', expected=True)
2161             else:
2162                 self.report_warning('Requested format is not available')
2163         elif download:
2164             self.to_screen(
2165                 '[info] %s: Downloading %d format(s): %s' % (
2166                     info_dict['id'], len(formats_to_download),
2167                     ", ".join([f['format_id'] for f in formats_to_download])))
2168             for fmt in formats_to_download:
2169                 new_info = dict(info_dict)
2170                 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2171                 new_info['__original_infodict'] = info_dict
2172                 new_info.update(fmt)
2173                 self.process_info(new_info)
2174         # We update the info dict with the best quality format (backwards compatibility)
2175         if formats_to_download:
2176             info_dict.update(formats_to_download[-1])
2177         return info_dict
2178
2179     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2180         """Select the requested subtitles and their format"""
2181         available_subs = {}
2182         if normal_subtitles and self.params.get('writesubtitles'):
2183             available_subs.update(normal_subtitles)
2184         if automatic_captions and self.params.get('writeautomaticsub'):
2185             for lang, cap_info in automatic_captions.items():
2186                 if lang not in available_subs:
2187                     available_subs[lang] = cap_info
2188
2189         if (not self.params.get('writesubtitles') and not
2190                 self.params.get('writeautomaticsub') or not
2191                 available_subs):
2192             return None
2193
2194         all_sub_langs = available_subs.keys()
2195         if self.params.get('allsubtitles', False):
2196             requested_langs = all_sub_langs
2197         elif self.params.get('subtitleslangs', False):
2198             requested_langs = set()
2199             for lang in self.params.get('subtitleslangs'):
2200                 if lang == 'all':
2201                     requested_langs.update(all_sub_langs)
2202                     continue
2203                 discard = lang[0] == '-'
2204                 if discard:
2205                     lang = lang[1:]
2206                 current_langs = filter(re.compile(lang + '$').match, all_sub_langs)
2207                 if discard:
2208                     for lang in current_langs:
2209                         requested_langs.discard(lang)
2210                 else:
2211                     requested_langs.update(current_langs)
2212         elif 'en' in available_subs:
2213             requested_langs = ['en']
2214         else:
2215             requested_langs = [list(all_sub_langs)[0]]
2216         self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2217
2218         formats_query = self.params.get('subtitlesformat', 'best')
2219         formats_preference = formats_query.split('/') if formats_query else []
2220         subs = {}
2221         for lang in requested_langs:
2222             formats = available_subs.get(lang)
2223             if formats is None:
2224                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2225                 continue
2226             for ext in formats_preference:
2227                 if ext == 'best':
2228                     f = formats[-1]
2229                     break
2230                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2231                 if matches:
2232                     f = matches[-1]
2233                     break
2234             else:
2235                 f = formats[-1]
2236                 self.report_warning(
2237                     'No subtitle format found matching "%s" for language %s, '
2238                     'using %s' % (formats_query, lang, f['ext']))
2239             subs[lang] = f
2240         return subs
2241
2242     def __forced_printings(self, info_dict, filename, incomplete):
2243         def print_mandatory(field, actual_field=None):
2244             if actual_field is None:
2245                 actual_field = field
2246             if (self.params.get('force%s' % field, False)
2247                     and (not incomplete or info_dict.get(actual_field) is not None)):
2248                 self.to_stdout(info_dict[actual_field])
2249
2250         def print_optional(field):
2251             if (self.params.get('force%s' % field, False)
2252                     and info_dict.get(field) is not None):
2253                 self.to_stdout(info_dict[field])
2254
2255         info_dict = info_dict.copy()
2256         if filename is not None:
2257             info_dict['filename'] = filename
2258         if info_dict.get('requested_formats') is not None:
2259             # For RTMP URLs, also include the playpath
2260             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2261         elif 'url' in info_dict:
2262             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2263
2264         for tmpl in self.params.get('forceprint', []):
2265             if re.match(r'\w+$', tmpl):
2266                 tmpl = '%({})s'.format(tmpl)
2267             tmpl, info_copy = self.prepare_outtmpl(tmpl, info_dict)
2268             self.to_stdout(tmpl % info_copy)
2269
2270         print_mandatory('title')
2271         print_mandatory('id')
2272         print_mandatory('url', 'urls')
2273         print_optional('thumbnail')
2274         print_optional('description')
2275         print_optional('filename')
2276         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
2277             self.to_stdout(formatSeconds(info_dict['duration']))
2278         print_mandatory('format')
2279
2280         if self.params.get('forcejson', False):
2281             self.post_extract(info_dict)
2282             self.to_stdout(json.dumps(info_dict, default=repr))
2283
2284     def dl(self, name, info, subtitle=False, test=False):
2285
2286         if test:
2287             verbose = self.params.get('verbose')
2288             params = {
2289                 'test': True,
2290                 'quiet': not verbose,
2291                 'verbose': verbose,
2292                 'noprogress': not verbose,
2293                 'nopart': True,
2294                 'skip_unavailable_fragments': False,
2295                 'keep_fragments': False,
2296                 'overwrites': True,
2297                 '_no_ytdl_file': True,
2298             }
2299         else:
2300             params = self.params
2301         fd = get_suitable_downloader(info, params)(self, params)
2302         if not test:
2303             for ph in self._progress_hooks:
2304                 fd.add_progress_hook(ph)
2305             urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2306             self.write_debug('Invoking downloader on "%s"' % urls)
2307         new_info = dict(info)
2308         if new_info.get('http_headers') is None:
2309             new_info['http_headers'] = self._calc_headers(new_info)
2310         return fd.download(name, new_info, subtitle)
2311
2312     def process_info(self, info_dict):
2313         """Process a single resolved IE result."""
2314
2315         assert info_dict.get('_type', 'video') == 'video'
2316
2317         info_dict.setdefault('__postprocessors', [])
2318
2319         max_downloads = self.params.get('max_downloads')
2320         if max_downloads is not None:
2321             if self._num_downloads >= int(max_downloads):
2322                 raise MaxDownloadsReached()
2323
2324         # TODO: backward compatibility, to be removed
2325         info_dict['fulltitle'] = info_dict['title']
2326
2327         if 'format' not in info_dict:
2328             info_dict['format'] = info_dict['ext']
2329
2330         if self._match_entry(info_dict, incomplete=False) is not None:
2331             return
2332
2333         self.post_extract(info_dict)
2334         self._num_downloads += 1
2335
2336         # info_dict['_filename'] needs to be set for backward compatibility
2337         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2338         temp_filename = self.prepare_filename(info_dict, 'temp')
2339         files_to_move = {}
2340
2341         # Forced printings
2342         self.__forced_printings(info_dict, full_filename, incomplete=False)
2343
2344         if self.params.get('simulate', False):
2345             if self.params.get('force_write_download_archive', False):
2346                 self.record_download_archive(info_dict)
2347
2348             # Do nothing else if in simulate mode
2349             return
2350
2351         if full_filename is None:
2352             return
2353
2354         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2355             return
2356         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2357             return
2358
2359         if self.params.get('writedescription', False):
2360             descfn = self.prepare_filename(info_dict, 'description')
2361             if not self._ensure_dir_exists(encodeFilename(descfn)):
2362                 return
2363             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
2364                 self.to_screen('[info] Video description is already present')
2365             elif info_dict.get('description') is None:
2366                 self.report_warning('There\'s no description to write.')
2367             else:
2368                 try:
2369                     self.to_screen('[info] Writing video description to: ' + descfn)
2370                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
2371                         descfile.write(info_dict['description'])
2372                 except (OSError, IOError):
2373                     self.report_error('Cannot write description file ' + descfn)
2374                     return
2375
2376         if self.params.get('writeannotations', False):
2377             annofn = self.prepare_filename(info_dict, 'annotation')
2378             if not self._ensure_dir_exists(encodeFilename(annofn)):
2379                 return
2380             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2381                 self.to_screen('[info] Video annotations are already present')
2382             elif not info_dict.get('annotations'):
2383                 self.report_warning('There are no annotations to write.')
2384             else:
2385                 try:
2386                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2387                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2388                         annofile.write(info_dict['annotations'])
2389                 except (KeyError, TypeError):
2390                     self.report_warning('There are no annotations to write.')
2391                 except (OSError, IOError):
2392                     self.report_error('Cannot write annotations file: ' + annofn)
2393                     return
2394
2395         subtitles_are_requested = any([self.params.get('writesubtitles', False),
2396                                        self.params.get('writeautomaticsub')])
2397
2398         if subtitles_are_requested and info_dict.get('requested_subtitles'):
2399             # subtitles download errors are already managed as troubles in relevant IE
2400             # that way it will silently go on when used with unsupporting IE
2401             subtitles = info_dict['requested_subtitles']
2402             # ie = self.get_info_extractor(info_dict['extractor_key'])
2403             for sub_lang, sub_info in subtitles.items():
2404                 sub_format = sub_info['ext']
2405                 sub_filename = subtitles_filename(temp_filename, sub_lang, sub_format, info_dict.get('ext'))
2406                 sub_filename_final = subtitles_filename(
2407                     self.prepare_filename(info_dict, 'subtitle'), sub_lang, sub_format, info_dict.get('ext'))
2408                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
2409                     self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
2410                     sub_info['filepath'] = sub_filename
2411                     files_to_move[sub_filename] = sub_filename_final
2412                 else:
2413                     self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
2414                     if sub_info.get('data') is not None:
2415                         try:
2416                             # Use newline='' to prevent conversion of newline characters
2417                             # See https://github.com/ytdl-org/youtube-dl/issues/10268
2418                             with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
2419                                 subfile.write(sub_info['data'])
2420                             sub_info['filepath'] = sub_filename
2421                             files_to_move[sub_filename] = sub_filename_final
2422                         except (OSError, IOError):
2423                             self.report_error('Cannot write subtitles file ' + sub_filename)
2424                             return
2425                     else:
2426                         try:
2427                             self.dl(sub_filename, sub_info.copy(), subtitle=True)
2428                             sub_info['filepath'] = sub_filename
2429                             files_to_move[sub_filename] = sub_filename_final
2430                         except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
2431                             self.report_warning('Unable to download subtitle for "%s": %s' %
2432                                                 (sub_lang, error_to_compat_str(err)))
2433                             continue
2434
2435         if self.params.get('writeinfojson', False):
2436             infofn = self.prepare_filename(info_dict, 'infojson')
2437             if not self._ensure_dir_exists(encodeFilename(infofn)):
2438                 return
2439             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
2440                 self.to_screen('[info] Video metadata is already present')
2441             else:
2442                 self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
2443                 try:
2444                     write_json_file(self.filter_requested_info(info_dict, self.params.get('clean_infojson', True)), infofn)
2445                 except (OSError, IOError):
2446                     self.report_error('Cannot write video metadata to JSON file ' + infofn)
2447                     return
2448             info_dict['__infojson_filename'] = infofn
2449
2450         for thumb_ext in self._write_thumbnails(info_dict, temp_filename):
2451             thumb_filename_temp = replace_extension(temp_filename, thumb_ext, info_dict.get('ext'))
2452             thumb_filename = replace_extension(
2453                 self.prepare_filename(info_dict, 'thumbnail'), thumb_ext, info_dict.get('ext'))
2454             files_to_move[thumb_filename_temp] = thumb_filename
2455
2456         # Write internet shortcut files
2457         url_link = webloc_link = desktop_link = False
2458         if self.params.get('writelink', False):
2459             if sys.platform == "darwin":  # macOS.
2460                 webloc_link = True
2461             elif sys.platform.startswith("linux"):
2462                 desktop_link = True
2463             else:  # if sys.platform in ['win32', 'cygwin']:
2464                 url_link = True
2465         if self.params.get('writeurllink', False):
2466             url_link = True
2467         if self.params.get('writewebloclink', False):
2468             webloc_link = True
2469         if self.params.get('writedesktoplink', False):
2470             desktop_link = True
2471
2472         if url_link or webloc_link or desktop_link:
2473             if 'webpage_url' not in info_dict:
2474                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2475                 return
2476             ascii_url = iri_to_uri(info_dict['webpage_url'])
2477
2478         def _write_link_file(extension, template, newline, embed_filename):
2479             linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
2480             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2481                 self.to_screen('[info] Internet shortcut is already present')
2482             else:
2483                 try:
2484                     self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2485                     with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2486                         template_vars = {'url': ascii_url}
2487                         if embed_filename:
2488                             template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2489                         linkfile.write(template % template_vars)
2490                 except (OSError, IOError):
2491                     self.report_error('Cannot write internet shortcut ' + linkfn)
2492                     return False
2493             return True
2494
2495         if url_link:
2496             if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2497                 return
2498         if webloc_link:
2499             if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2500                 return
2501         if desktop_link:
2502             if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2503                 return
2504
2505         try:
2506             info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2507         except PostProcessingError as err:
2508             self.report_error('Preprocessing: %s' % str(err))
2509             return
2510
2511         must_record_download_archive = False
2512         if self.params.get('skip_download', False):
2513             info_dict['filepath'] = temp_filename
2514             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2515             info_dict['__files_to_move'] = files_to_move
2516             info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2517         else:
2518             # Download
2519             try:
2520
2521                 def existing_file(*filepaths):
2522                     ext = info_dict.get('ext')
2523                     final_ext = self.params.get('final_ext', ext)
2524                     existing_files = []
2525                     for file in orderedSet(filepaths):
2526                         if final_ext != ext:
2527                             converted = replace_extension(file, final_ext, ext)
2528                             if os.path.exists(encodeFilename(converted)):
2529                                 existing_files.append(converted)
2530                         if os.path.exists(encodeFilename(file)):
2531                             existing_files.append(file)
2532
2533                     if not existing_files or self.params.get('overwrites', False):
2534                         for file in orderedSet(existing_files):
2535                             self.report_file_delete(file)
2536                             os.remove(encodeFilename(file))
2537                         return None
2538
2539                     self.report_file_already_downloaded(existing_files[0])
2540                     info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2541                     return existing_files[0]
2542
2543                 success = True
2544                 if info_dict.get('requested_formats') is not None:
2545
2546                     def compatible_formats(formats):
2547                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2548                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2549                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2550                         if len(video_formats) > 2 or len(audio_formats) > 2:
2551                             return False
2552
2553                         # Check extension
2554                         exts = set(format.get('ext') for format in formats)
2555                         COMPATIBLE_EXTS = (
2556                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2557                             set(('webm',)),
2558                         )
2559                         for ext_sets in COMPATIBLE_EXTS:
2560                             if ext_sets.issuperset(exts):
2561                                 return True
2562                         # TODO: Check acodec/vcodec
2563                         return False
2564
2565                     requested_formats = info_dict['requested_formats']
2566                     old_ext = info_dict['ext']
2567                     if self.params.get('merge_output_format') is None:
2568                         if not compatible_formats(requested_formats):
2569                             info_dict['ext'] = 'mkv'
2570                             self.report_warning(
2571                                 'Requested formats are incompatible for merge and will be merged into mkv.')
2572                         if (info_dict['ext'] == 'webm'
2573                                 and self.params.get('writethumbnail', False)
2574                                 and info_dict.get('thumbnails')):
2575                             info_dict['ext'] = 'mkv'
2576                             self.report_warning(
2577                                 'webm doesn\'t support embedding a thumbnail, mkv will be used.')
2578
2579                     def correct_ext(filename):
2580                         filename_real_ext = os.path.splitext(filename)[1][1:]
2581                         filename_wo_ext = (
2582                             os.path.splitext(filename)[0]
2583                             if filename_real_ext == old_ext
2584                             else filename)
2585                         return '%s.%s' % (filename_wo_ext, info_dict['ext'])
2586
2587                     # Ensure filename always has a correct extension for successful merge
2588                     full_filename = correct_ext(full_filename)
2589                     temp_filename = correct_ext(temp_filename)
2590                     dl_filename = existing_file(full_filename, temp_filename)
2591                     info_dict['__real_download'] = False
2592
2593                     _protocols = set(determine_protocol(f) for f in requested_formats)
2594                     if len(_protocols) == 1:
2595                         info_dict['protocol'] = _protocols.pop()
2596                     directly_mergable = (
2597                         'no-direct-merge' not in self.params.get('compat_opts', [])
2598                         and info_dict.get('protocol') is not None  # All requested formats have same protocol
2599                         and not self.params.get('allow_unplayable_formats')
2600                         and get_suitable_downloader(info_dict, self.params).__name__ == 'FFmpegFD')
2601                     if directly_mergable:
2602                         info_dict['url'] = requested_formats[0]['url']
2603                         # Treat it as a single download
2604                         dl_filename = existing_file(full_filename, temp_filename)
2605                         if dl_filename is None:
2606                             success, real_download = self.dl(temp_filename, info_dict)
2607                             info_dict['__real_download'] = real_download
2608                     else:
2609                         downloaded = []
2610                         merger = FFmpegMergerPP(self)
2611                         if self.params.get('allow_unplayable_formats'):
2612                             self.report_warning(
2613                                 'You have requested merging of multiple formats '
2614                                 'while also allowing unplayable formats to be downloaded. '
2615                                 'The formats won\'t be merged to prevent data corruption.')
2616                         elif not merger.available:
2617                             self.report_warning(
2618                                 'You have requested merging of multiple formats but ffmpeg is not installed. '
2619                                 'The formats won\'t be merged.')
2620
2621                         if dl_filename is None:
2622                             for f in requested_formats:
2623                                 new_info = dict(info_dict)
2624                                 del new_info['requested_formats']
2625                                 new_info.update(f)
2626                                 fname = prepend_extension(
2627                                     self.prepare_filename(new_info, 'temp'),
2628                                     'f%s' % f['format_id'], new_info['ext'])
2629                                 if not self._ensure_dir_exists(fname):
2630                                     return
2631                                 downloaded.append(fname)
2632                                 partial_success, real_download = self.dl(fname, new_info)
2633                                 info_dict['__real_download'] = info_dict['__real_download'] or real_download
2634                                 success = success and partial_success
2635                             if merger.available and not self.params.get('allow_unplayable_formats'):
2636                                 info_dict['__postprocessors'].append(merger)
2637                                 info_dict['__files_to_merge'] = downloaded
2638                                 # Even if there were no downloads, it is being merged only now
2639                                 info_dict['__real_download'] = True
2640                             else:
2641                                 for file in downloaded:
2642                                     files_to_move[file] = None
2643                 else:
2644                     # Just a single file
2645                     dl_filename = existing_file(full_filename, temp_filename)
2646                     if dl_filename is None:
2647                         success, real_download = self.dl(temp_filename, info_dict)
2648                         info_dict['__real_download'] = real_download
2649
2650                 dl_filename = dl_filename or temp_filename
2651                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2652
2653             except network_exceptions as err:
2654                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2655                 return
2656             except (OSError, IOError) as err:
2657                 raise UnavailableVideoError(err)
2658             except (ContentTooShortError, ) as err:
2659                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2660                 return
2661
2662             if success and full_filename != '-':
2663                 # Fixup content
2664                 fixup_policy = self.params.get('fixup')
2665                 if fixup_policy is None:
2666                     fixup_policy = 'detect_or_warn'
2667
2668                 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg to fix this automatically.'
2669
2670                 stretched_ratio = info_dict.get('stretched_ratio')
2671                 if stretched_ratio is not None and stretched_ratio != 1:
2672                     if fixup_policy == 'warn':
2673                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2674                             info_dict['id'], stretched_ratio))
2675                     elif fixup_policy == 'detect_or_warn':
2676                         stretched_pp = FFmpegFixupStretchedPP(self)
2677                         if stretched_pp.available:
2678                             info_dict['__postprocessors'].append(stretched_pp)
2679                         else:
2680                             self.report_warning(
2681                                 '%s: Non-uniform pixel ratio (%s). %s'
2682                                 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
2683                     else:
2684                         assert fixup_policy in ('ignore', 'never')
2685
2686                 if (info_dict.get('requested_formats') is None
2687                         and info_dict.get('container') == 'm4a_dash'
2688                         and info_dict.get('ext') == 'm4a'):
2689                     if fixup_policy == 'warn':
2690                         self.report_warning(
2691                             '%s: writing DASH m4a. '
2692                             'Only some players support this container.'
2693                             % info_dict['id'])
2694                     elif fixup_policy == 'detect_or_warn':
2695                         fixup_pp = FFmpegFixupM4aPP(self)
2696                         if fixup_pp.available:
2697                             info_dict['__postprocessors'].append(fixup_pp)
2698                         else:
2699                             self.report_warning(
2700                                 '%s: writing DASH m4a. '
2701                                 'Only some players support this container. %s'
2702                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2703                     else:
2704                         assert fixup_policy in ('ignore', 'never')
2705
2706                 if ('protocol' in info_dict
2707                         and get_suitable_downloader(info_dict, self.params).__name__ == 'HlsFD'):
2708                     if fixup_policy == 'warn':
2709                         self.report_warning('%s: malformed AAC bitstream detected.' % (
2710                             info_dict['id']))
2711                     elif fixup_policy == 'detect_or_warn':
2712                         fixup_pp = FFmpegFixupM3u8PP(self)
2713                         if fixup_pp.available:
2714                             info_dict['__postprocessors'].append(fixup_pp)
2715                         else:
2716                             self.report_warning(
2717                                 '%s: malformed AAC bitstream detected. %s'
2718                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2719                     else:
2720                         assert fixup_policy in ('ignore', 'never')
2721
2722                 try:
2723                     info_dict = self.post_process(dl_filename, info_dict, files_to_move)
2724                 except PostProcessingError as err:
2725                     self.report_error('Postprocessing: %s' % str(err))
2726                     return
2727                 try:
2728                     for ph in self._post_hooks:
2729                         ph(info_dict['filepath'])
2730                 except Exception as err:
2731                     self.report_error('post hooks: %s' % str(err))
2732                     return
2733                 must_record_download_archive = True
2734
2735         if must_record_download_archive or self.params.get('force_write_download_archive', False):
2736             self.record_download_archive(info_dict)
2737         max_downloads = self.params.get('max_downloads')
2738         if max_downloads is not None and self._num_downloads >= int(max_downloads):
2739             raise MaxDownloadsReached()
2740
2741     def download(self, url_list):
2742         """Download a given list of URLs."""
2743         outtmpl = self.outtmpl_dict['default']
2744         if (len(url_list) > 1
2745                 and outtmpl != '-'
2746                 and '%' not in outtmpl
2747                 and self.params.get('max_downloads') != 1):
2748             raise SameFileError(outtmpl)
2749
2750         for url in url_list:
2751             try:
2752                 # It also downloads the videos
2753                 res = self.extract_info(
2754                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2755             except UnavailableVideoError:
2756                 self.report_error('unable to download video')
2757             except MaxDownloadsReached:
2758                 self.to_screen('[info] Maximum number of downloaded files reached')
2759                 raise
2760             except ExistingVideoReached:
2761                 self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
2762                 raise
2763             except RejectedVideoReached:
2764                 self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
2765                 raise
2766             else:
2767                 if self.params.get('dump_single_json', False):
2768                     self.post_extract(res)
2769                     self.to_stdout(json.dumps(res, default=repr))
2770
2771         return self._download_retcode
2772
2773     def download_with_info_file(self, info_filename):
2774         with contextlib.closing(fileinput.FileInput(
2775                 [info_filename], mode='r',
2776                 openhook=fileinput.hook_encoded('utf-8'))) as f:
2777             # FileInput doesn't have a read method, we can't call json.load
2778             info = self.filter_requested_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
2779         try:
2780             self.process_ie_result(info, download=True)
2781         except (DownloadError, EntryNotInPlaylist):
2782             webpage_url = info.get('webpage_url')
2783             if webpage_url is not None:
2784                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2785                 return self.download([webpage_url])
2786             else:
2787                 raise
2788         return self._download_retcode
2789
2790     @staticmethod
2791     def filter_requested_info(info_dict, actually_filter=True):
2792         info_dict.pop('__original_infodict', None)  # Always remove this
2793         if not actually_filter:
2794             info_dict['epoch'] = int(time.time())
2795             return info_dict
2796         exceptions = {
2797             'remove': ['requested_formats', 'requested_subtitles', 'requested_entries', 'filepath', 'entries'],
2798             'keep': ['_type'],
2799         }
2800         keep_key = lambda k: k in exceptions['keep'] or not (k.startswith('_') or k in exceptions['remove'])
2801         filter_fn = lambda obj: (
2802             list(map(filter_fn, obj)) if isinstance(obj, (list, tuple))
2803             else obj if not isinstance(obj, dict)
2804             else dict((k, filter_fn(v)) for k, v in obj.items() if keep_key(k)))
2805         return filter_fn(info_dict)
2806
2807     def run_pp(self, pp, infodict):
2808         files_to_delete = []
2809         if '__files_to_move' not in infodict:
2810             infodict['__files_to_move'] = {}
2811         files_to_delete, infodict = pp.run(infodict)
2812         if not files_to_delete:
2813             return infodict
2814
2815         if self.params.get('keepvideo', False):
2816             for f in files_to_delete:
2817                 infodict['__files_to_move'].setdefault(f, '')
2818         else:
2819             for old_filename in set(files_to_delete):
2820                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2821                 try:
2822                     os.remove(encodeFilename(old_filename))
2823                 except (IOError, OSError):
2824                     self.report_warning('Unable to remove downloaded original file')
2825                 if old_filename in infodict['__files_to_move']:
2826                     del infodict['__files_to_move'][old_filename]
2827         return infodict
2828
2829     @staticmethod
2830     def post_extract(info_dict):
2831         def actual_post_extract(info_dict):
2832             if info_dict.get('_type') in ('playlist', 'multi_video'):
2833                 for video_dict in info_dict.get('entries', {}):
2834                     actual_post_extract(video_dict or {})
2835                 return
2836
2837             post_extractor = info_dict.get('__post_extractor') or (lambda: {})
2838             extra = post_extractor().items()
2839             info_dict.update(extra)
2840             info_dict.pop('__post_extractor', None)
2841
2842             original_infodict = info_dict.get('__original_infodict') or {}
2843             original_infodict.update(extra)
2844             original_infodict.pop('__post_extractor', None)
2845
2846         actual_post_extract(info_dict or {})
2847
2848     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
2849         info = dict(ie_info)
2850         info['__files_to_move'] = files_to_move or {}
2851         for pp in self._pps[key]:
2852             info = self.run_pp(pp, info)
2853         return info, info.pop('__files_to_move', None)
2854
2855     def post_process(self, filename, ie_info, files_to_move=None):
2856         """Run all the postprocessors on the given file."""
2857         info = dict(ie_info)
2858         info['filepath'] = filename
2859         info['__files_to_move'] = files_to_move or {}
2860
2861         for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
2862             info = self.run_pp(pp, info)
2863         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
2864         del info['__files_to_move']
2865         for pp in self._pps['after_move']:
2866             info = self.run_pp(pp, info)
2867         return info
2868
2869     def _make_archive_id(self, info_dict):
2870         video_id = info_dict.get('id')
2871         if not video_id:
2872             return
2873         # Future-proof against any change in case
2874         # and backwards compatibility with prior versions
2875         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
2876         if extractor is None:
2877             url = str_or_none(info_dict.get('url'))
2878             if not url:
2879                 return
2880             # Try to find matching extractor for the URL and take its ie_key
2881             for ie in self._ies:
2882                 if ie.suitable(url):
2883                     extractor = ie.ie_key()
2884                     break
2885             else:
2886                 return
2887         return '%s %s' % (extractor.lower(), video_id)
2888
2889     def in_download_archive(self, info_dict):
2890         fn = self.params.get('download_archive')
2891         if fn is None:
2892             return False
2893
2894         vid_id = self._make_archive_id(info_dict)
2895         if not vid_id:
2896             return False  # Incomplete video information
2897
2898         return vid_id in self.archive
2899
2900     def record_download_archive(self, info_dict):
2901         fn = self.params.get('download_archive')
2902         if fn is None:
2903             return
2904         vid_id = self._make_archive_id(info_dict)
2905         assert vid_id
2906         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
2907             archive_file.write(vid_id + '\n')
2908         self.archive.add(vid_id)
2909
2910     @staticmethod
2911     def format_resolution(format, default='unknown'):
2912         if format.get('vcodec') == 'none':
2913             return 'audio only'
2914         if format.get('resolution') is not None:
2915             return format['resolution']
2916         if format.get('width') and format.get('height'):
2917             res = '%dx%d' % (format['width'], format['height'])
2918         elif format.get('height'):
2919             res = '%sp' % format['height']
2920         elif format.get('width'):
2921             res = '%dx?' % format['width']
2922         else:
2923             res = default
2924         return res
2925
2926     def _format_note(self, fdict):
2927         res = ''
2928         if fdict.get('ext') in ['f4f', 'f4m']:
2929             res += '(unsupported) '
2930         if fdict.get('language'):
2931             if res:
2932                 res += ' '
2933             res += '[%s] ' % fdict['language']
2934         if fdict.get('format_note') is not None:
2935             res += fdict['format_note'] + ' '
2936         if fdict.get('tbr') is not None:
2937             res += '%4dk ' % fdict['tbr']
2938         if fdict.get('container') is not None:
2939             if res:
2940                 res += ', '
2941             res += '%s container' % fdict['container']
2942         if (fdict.get('vcodec') is not None
2943                 and fdict.get('vcodec') != 'none'):
2944             if res:
2945                 res += ', '
2946             res += fdict['vcodec']
2947             if fdict.get('vbr') is not None:
2948                 res += '@'
2949         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2950             res += 'video@'
2951         if fdict.get('vbr') is not None:
2952             res += '%4dk' % fdict['vbr']
2953         if fdict.get('fps') is not None:
2954             if res:
2955                 res += ', '
2956             res += '%sfps' % fdict['fps']
2957         if fdict.get('acodec') is not None:
2958             if res:
2959                 res += ', '
2960             if fdict['acodec'] == 'none':
2961                 res += 'video only'
2962             else:
2963                 res += '%-5s' % fdict['acodec']
2964         elif fdict.get('abr') is not None:
2965             if res:
2966                 res += ', '
2967             res += 'audio'
2968         if fdict.get('abr') is not None:
2969             res += '@%3dk' % fdict['abr']
2970         if fdict.get('asr') is not None:
2971             res += ' (%5dHz)' % fdict['asr']
2972         if fdict.get('filesize') is not None:
2973             if res:
2974                 res += ', '
2975             res += format_bytes(fdict['filesize'])
2976         elif fdict.get('filesize_approx') is not None:
2977             if res:
2978                 res += ', '
2979             res += '~' + format_bytes(fdict['filesize_approx'])
2980         return res
2981
2982     def _format_note_table(self, f):
2983         def join_fields(*vargs):
2984             return ', '.join((val for val in vargs if val != ''))
2985
2986         return join_fields(
2987             'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
2988             format_field(f, 'language', '[%s]'),
2989             format_field(f, 'format_note'),
2990             format_field(f, 'container', ignore=(None, f.get('ext'))),
2991             format_field(f, 'asr', '%5dHz'))
2992
2993     def list_formats(self, info_dict):
2994         formats = info_dict.get('formats', [info_dict])
2995         new_format = (
2996             'list-formats' not in self.params.get('compat_opts', [])
2997             and self.params.get('list_formats_as_table', True) is not False)
2998         if new_format:
2999             table = [
3000                 [
3001                     format_field(f, 'format_id'),
3002                     format_field(f, 'ext'),
3003                     self.format_resolution(f),
3004                     format_field(f, 'fps', '%d'),
3005                     '|',
3006                     format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
3007                     format_field(f, 'tbr', '%4dk'),
3008                     shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
3009                     '|',
3010                     format_field(f, 'vcodec', default='unknown').replace('none', ''),
3011                     format_field(f, 'vbr', '%4dk'),
3012                     format_field(f, 'acodec', default='unknown').replace('none', ''),
3013                     format_field(f, 'abr', '%3dk'),
3014                     format_field(f, 'asr', '%5dHz'),
3015                     self._format_note_table(f)]
3016                 for f in formats
3017                 if f.get('preference') is None or f['preference'] >= -1000]
3018             header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', '  TBR', 'PROTO',
3019                            '|', 'VCODEC', '  VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']
3020         else:
3021             table = [
3022                 [
3023                     format_field(f, 'format_id'),
3024                     format_field(f, 'ext'),
3025                     self.format_resolution(f),
3026                     self._format_note(f)]
3027                 for f in formats
3028                 if f.get('preference') is None or f['preference'] >= -1000]
3029             header_line = ['format code', 'extension', 'resolution', 'note']
3030
3031         self.to_screen(
3032             '[info] Available formats for %s:\n%s' % (info_dict['id'], render_table(
3033                 header_line,
3034                 table,
3035                 delim=new_format,
3036                 extraGap=(0 if new_format else 1),
3037                 hideEmpty=new_format)))
3038
3039     def list_thumbnails(self, info_dict):
3040         thumbnails = info_dict.get('thumbnails')
3041         if not thumbnails:
3042             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3043             return
3044
3045         self.to_screen(
3046             '[info] Thumbnails for %s:' % info_dict['id'])
3047         self.to_screen(render_table(
3048             ['ID', 'width', 'height', 'URL'],
3049             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
3050
3051     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3052         if not subtitles:
3053             self.to_screen('%s has no %s' % (video_id, name))
3054             return
3055         self.to_screen(
3056             'Available %s for %s:' % (name, video_id))
3057
3058         def _row(lang, formats):
3059             exts, names = zip(*((f['ext'], f.get('name', 'unknown')) for f in reversed(formats)))
3060             if len(set(names)) == 1:
3061                 names = [] if names[0] == 'unknown' else names[:1]
3062             return [lang, ', '.join(names), ', '.join(exts)]
3063
3064         self.to_screen(render_table(
3065             ['Language', 'Name', 'Formats'],
3066             [_row(lang, formats) for lang, formats in subtitles.items()],
3067             hideEmpty=True))
3068
3069     def urlopen(self, req):
3070         """ Start an HTTP download """
3071         if isinstance(req, compat_basestring):
3072             req = sanitized_Request(req)
3073         return self._opener.open(req, timeout=self._socket_timeout)
3074
3075     def print_debug_header(self):
3076         if not self.params.get('verbose'):
3077             return
3078
3079         if type('') is not compat_str:
3080             # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
3081             self.report_warning(
3082                 'Your Python is broken! Update to a newer and supported version')
3083
3084         stdout_encoding = getattr(
3085             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
3086         encoding_str = (
3087             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
3088                 locale.getpreferredencoding(),
3089                 sys.getfilesystemencoding(),
3090                 stdout_encoding,
3091                 self.get_encoding()))
3092         write_string(encoding_str, encoding=None)
3093
3094         source = (
3095             '(exe)' if hasattr(sys, 'frozen')
3096             else '(zip)' if isinstance(globals().get('__loader__'), zipimporter)
3097             else '(source)' if os.path.basename(sys.argv[0]) == '__main__.py'
3098             else '')
3099         self._write_string('[debug] yt-dlp version %s %s\n' % (__version__, source))
3100         if _LAZY_LOADER:
3101             self._write_string('[debug] Lazy loading extractors enabled\n')
3102         if _PLUGIN_CLASSES:
3103             self._write_string(
3104                 '[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])
3105         if self.params.get('compat_opts'):
3106             self._write_string(
3107                 '[debug] Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts')))
3108         try:
3109             sp = subprocess.Popen(
3110                 ['git', 'rev-parse', '--short', 'HEAD'],
3111                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3112                 cwd=os.path.dirname(os.path.abspath(__file__)))
3113             out, err = process_communicate_or_kill(sp)
3114             out = out.decode().strip()
3115             if re.match('[0-9a-f]+', out):
3116                 self._write_string('[debug] Git HEAD: %s\n' % out)
3117         except Exception:
3118             try:
3119                 sys.exc_clear()
3120             except Exception:
3121                 pass
3122
3123         def python_implementation():
3124             impl_name = platform.python_implementation()
3125             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3126                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3127             return impl_name
3128
3129         self._write_string('[debug] Python version %s (%s %s) - %s\n' % (
3130             platform.python_version(),
3131             python_implementation(),
3132             platform.architecture()[0],
3133             platform_name()))
3134
3135         exe_versions = FFmpegPostProcessor.get_versions(self)
3136         exe_versions['rtmpdump'] = rtmpdump_version()
3137         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3138         exe_str = ', '.join(
3139             '%s %s' % (exe, v)
3140             for exe, v in sorted(exe_versions.items())
3141             if v
3142         )
3143         if not exe_str:
3144             exe_str = 'none'
3145         self._write_string('[debug] exe versions: %s\n' % exe_str)
3146
3147         proxy_map = {}
3148         for handler in self._opener.handlers:
3149             if hasattr(handler, 'proxies'):
3150                 proxy_map.update(handler.proxies)
3151         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
3152
3153         if self.params.get('call_home', False):
3154             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3155             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
3156             return
3157             latest_version = self.urlopen(
3158                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3159             if version_tuple(latest_version) > version_tuple(__version__):
3160                 self.report_warning(
3161                     'You are using an outdated version (newest version: %s)! '
3162                     'See https://yt-dl.org/update if you need help updating.' %
3163                     latest_version)
3164
3165     def _setup_opener(self):
3166         timeout_val = self.params.get('socket_timeout')
3167         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
3168
3169         opts_cookiefile = self.params.get('cookiefile')
3170         opts_proxy = self.params.get('proxy')
3171
3172         if opts_cookiefile is None:
3173             self.cookiejar = compat_cookiejar.CookieJar()
3174         else:
3175             opts_cookiefile = expand_path(opts_cookiefile)
3176             self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
3177             if os.access(opts_cookiefile, os.R_OK):
3178                 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
3179
3180         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3181         if opts_proxy is not None:
3182             if opts_proxy == '':
3183                 proxies = {}
3184             else:
3185                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3186         else:
3187             proxies = compat_urllib_request.getproxies()
3188             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3189             if 'http' in proxies and 'https' not in proxies:
3190                 proxies['https'] = proxies['http']
3191         proxy_handler = PerRequestProxyHandler(proxies)
3192
3193         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3194         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3195         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3196         redirect_handler = YoutubeDLRedirectHandler()
3197         data_handler = compat_urllib_request_DataHandler()
3198
3199         # When passing our own FileHandler instance, build_opener won't add the
3200         # default FileHandler and allows us to disable the file protocol, which
3201         # can be used for malicious purposes (see
3202         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3203         file_handler = compat_urllib_request.FileHandler()
3204
3205         def file_open(*args, **kwargs):
3206             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3207         file_handler.file_open = file_open
3208
3209         opener = compat_urllib_request.build_opener(
3210             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3211
3212         # Delete the default user-agent header, which would otherwise apply in
3213         # cases where our custom HTTP handler doesn't come into play
3214         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3215         opener.addheaders = []
3216         self._opener = opener
3217
3218     def encode(self, s):
3219         if isinstance(s, bytes):
3220             return s  # Already encoded
3221
3222         try:
3223             return s.encode(self.get_encoding())
3224         except UnicodeEncodeError as err:
3225             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3226             raise
3227
3228     def get_encoding(self):
3229         encoding = self.params.get('encoding')
3230         if encoding is None:
3231             encoding = preferredencoding()
3232         return encoding
3233
3234     def _write_thumbnails(self, info_dict, filename):  # return the extensions
3235         write_all = self.params.get('write_all_thumbnails', False)
3236         thumbnails = []
3237         if write_all or self.params.get('writethumbnail', False):
3238             thumbnails = info_dict.get('thumbnails') or []
3239         multiple = write_all and len(thumbnails) > 1
3240
3241         ret = []
3242         for t in thumbnails[::1 if write_all else -1]:
3243             thumb_ext = determine_ext(t['url'], 'jpg')
3244             suffix = '%s.' % t['id'] if multiple else ''
3245             thumb_display_id = '%s ' % t['id'] if multiple else ''
3246             t['filepath'] = thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
3247
3248             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
3249                 ret.append(suffix + thumb_ext)
3250                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
3251                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
3252             else:
3253                 self.to_screen('[%s] %s: Downloading thumbnail %s ...' %
3254                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
3255                 try:
3256                     uf = self.urlopen(t['url'])
3257                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3258                         shutil.copyfileobj(uf, thumbf)
3259                     ret.append(suffix + thumb_ext)
3260                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
3261                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
3262                 except network_exceptions as err:
3263                     self.report_warning('Unable to download thumbnail "%s": %s' %
3264                                         (t['url'], error_to_compat_str(err)))
3265             if ret and not write_all:
3266                 break
3267         return ret