yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import sys
  23 import time
  24 import tokenize
  25 import traceback
  26 import random
  27
  28 from string import ascii_letters
  29 from zipimport import zipimporter
  30
  31 from .compat import (
  32     compat_basestring,
  33     compat_cookiejar,
  34     compat_get_terminal_size,
  35     compat_kwargs,
  36     compat_numeric_types,
  37     compat_os_name,
  38     compat_str,
  39     compat_tokenize_tokenize,
  40     compat_urllib_error,
  41     compat_urllib_request,
  42     compat_urllib_request_DataHandler,
  43 )
  44 from .utils import (
  45     age_restricted,
  46     args_to_str,
  47     ContentTooShortError,
  48     date_from_str,
  49     DateRange,
  50     DEFAULT_OUTTMPL,
  51     determine_ext,
  52     determine_protocol,
  53     DOT_DESKTOP_LINK_TEMPLATE,
  54     DOT_URL_LINK_TEMPLATE,
  55     DOT_WEBLOC_LINK_TEMPLATE,
  56     DownloadError,
  57     encode_compat_str,
  58     encodeFilename,
  59     EntryNotInPlaylist,
  60     error_to_compat_str,
  61     ExistingVideoReached,
  62     expand_path,
  63     ExtractorError,
  64     float_or_none,
  65     format_bytes,
  66     format_field,
  67     FORMAT_RE,
  68     formatSeconds,
  69     GeoRestrictedError,
  70     int_or_none,
  71     iri_to_uri,
  72     ISO3166Utils,
  73     locked_file,
  74     make_dir,
  75     make_HTTPS_handler,
  76     MaxDownloadsReached,
  77     network_exceptions,
  78     orderedSet,
  79     OUTTMPL_TYPES,
  80     PagedList,
  81     parse_filesize,
  82     PerRequestProxyHandler,
  83     platform_name,
  84     PostProcessingError,
  85     preferredencoding,
  86     prepend_extension,
  87     process_communicate_or_kill,
  88     random_uuidv4,
  89     register_socks_protocols,
  90     RejectedVideoReached,
  91     render_table,
  92     replace_extension,
  93     SameFileError,
  94     sanitize_filename,
  95     sanitize_path,
  96     sanitize_url,
  97     sanitized_Request,
  98     std_headers,
  99     str_or_none,
 100     strftime_or_none,
 101     subtitles_filename,
 102     to_high_limit_path,
 103     traverse_dict,
 104     UnavailableVideoError,
 105     url_basename,
 106     version_tuple,
 107     write_json_file,
 108     write_string,
 109     YoutubeDLCookieJar,
 110     YoutubeDLCookieProcessor,
 111     YoutubeDLHandler,
 112     YoutubeDLRedirectHandler,
 113 )
 114 from .cache import Cache
 115 from .extractor import (
 116     gen_extractor_classes,
 117     get_info_extractor,
 118     _LAZY_LOADER,
 119     _PLUGIN_CLASSES
 120 )
 121 from .extractor.openload import PhantomJSwrapper
 122 from .downloader import (
 123     get_suitable_downloader,
 124     shorten_protocol_name
 125 )
 126 from .downloader.rtmp import rtmpdump_version
 127 from .postprocessor import (
 128     FFmpegFixupM3u8PP,
 129     FFmpegFixupM4aPP,
 130     FFmpegFixupStretchedPP,
 131     FFmpegMergerPP,
 132     FFmpegPostProcessor,
 133     # FFmpegSubtitlesConvertorPP,
 134     get_postprocessor,
 135     MoveFilesAfterDownloadPP,
 136 )
 137 from .version import __version__
 138
 139 if compat_os_name == 'nt':
 140     import ctypes
 141
 142
 143 class YoutubeDL(object):
 144     """YoutubeDL class.
 145
 146     YoutubeDL objects are the ones responsible of downloading the
 147     actual video file and writing it to disk if the user has requested
 148     it, among some other tasks. In most cases there should be one per
 149     program. As, given a video URL, the downloader doesn't know how to
 150     extract all the needed information, task that InfoExtractors do, it
 151     has to pass the URL to one of them.
 152
 153     For this, YoutubeDL objects have a method that allows
 154     InfoExtractors to be registered in a given order. When it is passed
 155     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 156     finds that reports being able to handle it. The InfoExtractor extracts
 157     all the information about the video or videos the URL refers to, and
 158     YoutubeDL process the extracted information, possibly using a File
 159     Downloader to download the video.
 160
 161     YoutubeDL objects accept a lot of parameters. In order not to saturate
 162     the object constructor with arguments, it receives a dictionary of
 163     options instead. These options are available through the params
 164     attribute for the InfoExtractors to use. The YoutubeDL also
 165     registers itself as the downloader in charge for the InfoExtractors
 166     that are added to it, so this is a "mutual registration".
 167
 168     Available options:
 169
 170     username:          Username for authentication purposes.
 171     password:          Password for authentication purposes.
 172     videopassword:     Password for accessing a video.
 173     ap_mso:            Adobe Pass multiple-system operator identifier.
 174     ap_username:       Multiple-system operator account username.
 175     ap_password:       Multiple-system operator account password.
 176     usenetrc:          Use netrc for authentication instead.
 177     verbose:           Print additional info to stdout.
 178     quiet:             Do not print messages to stdout.
 179     no_warnings:       Do not print out anything for warnings.
 180     forceprint:        A list of templates to force print
 181     forceurl:          Force printing final URL. (Deprecated)
 182     forcetitle:        Force printing title. (Deprecated)
 183     forceid:           Force printing ID. (Deprecated)
 184     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 185     forcedescription:  Force printing description. (Deprecated)
 186     forcefilename:     Force printing final filename. (Deprecated)
 187     forceduration:     Force printing duration. (Deprecated)
 188     forcejson:         Force printing info_dict as JSON.
 189     dump_single_json:  Force printing the info_dict of the whole playlist
 190                        (or video) as a single JSON line.
 191     force_write_download_archive: Force writing download archive regardless
 192                        of 'skip_download' or 'simulate'.
 193     simulate:          Do not download the video files.
 194     format:            Video format code. see "FORMAT SELECTION" for more details.
 195     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 196     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 197                        extracting metadata even if the video is not actually
 198                        available for download (experimental)
 199     format_sort:       How to sort the video formats. see "Sorting Formats"
 200                        for more details.
 201     format_sort_force: Force the given format_sort. see "Sorting Formats"
 202                        for more details.
 203     allow_multiple_video_streams:   Allow multiple video streams to be merged
 204                        into a single file
 205     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 206                        into a single file
 207     paths:             Dictionary of output paths. The allowed keys are 'home'
 208                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 209     outtmpl:           Dictionary of templates for output names. Allowed keys
 210                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 211                        A string a also accepted for backward compatibility
 212     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 213     restrictfilenames: Do not allow "&" and spaces in file names
 214     trim_file_name:    Limit length of filename (extension excluded)
 215     windowsfilenames:  Force the filenames to be windows compatible
 216     ignoreerrors:      Do not stop on download errors
 217                        (Default True when running yt-dlp,
 218                        but False when directly accessing YoutubeDL class)
 219     skip_playlist_after_errors: Number of allowed failures until the rest of
 220                        the playlist is skipped
 221     force_generic_extractor: Force downloader to use the generic extractor
 222     overwrites:        Overwrite all video and metadata files if True,
 223                        overwrite only non-video files if None
 224                        and don't overwrite any file if False
 225     playliststart:     Playlist item to start at.
 226     playlistend:       Playlist item to end at.
 227     playlist_items:    Specific indices of playlist to download.
 228     playlistreverse:   Download playlist items in reverse order.
 229     playlistrandom:    Download playlist items in random order.
 230     matchtitle:        Download only matching titles.
 231     rejecttitle:       Reject downloads for matching titles.
 232     logger:            Log messages to a logging.Logger instance.
 233     logtostderr:       Log messages to stderr instead of stdout.
 234     writedescription:  Write the video description to a .description file
 235     writeinfojson:     Write the video description to a .info.json file
 236     clean_infojson:    Remove private fields from the infojson
 237     writecomments:     Extract video comments. This will not be written to disk
 238                        unless writeinfojson is also given
 239     writeannotations:  Write the video annotations to a .annotations.xml file
 240     writethumbnail:    Write the thumbnail image to a file
 241     allow_playlist_files: Whether to write playlists' description, infojson etc
 242                        also to disk when using the 'write*' options
 243     write_all_thumbnails:  Write all thumbnail formats to files
 244     writelink:         Write an internet shortcut file, depending on the
 245                        current platform (.url/.webloc/.desktop)
 246     writeurllink:      Write a Windows internet shortcut file (.url)
 247     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 248     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 249     writesubtitles:    Write the video subtitles to a file
 250     writeautomaticsub: Write the automatically generated subtitles to a file
 251     allsubtitles:      Deprecated - Use subtitlelangs = ['all']
 252                        Downloads all the subtitles of the video
 253                        (requires writesubtitles or writeautomaticsub)
 254     listsubtitles:     Lists all available subtitles for the video
 255     subtitlesformat:   The format code for subtitles
 256     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 257                        The list may contain "all" to refer to all the available
 258                        subtitles. The language can be prefixed with a "-" to
 259                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 260     keepvideo:         Keep the video file after post-processing
 261     daterange:         A DateRange object, download only if the upload_date is in the range.
 262     skip_download:     Skip the actual download of the video file
 263     cachedir:          Location of the cache files in the filesystem.
 264                        False to disable filesystem cache.
 265     noplaylist:        Download single video instead of a playlist if in doubt.
 266     age_limit:         An integer representing the user's age in years.
 267                        Unsuitable videos for the given age are skipped.
 268     min_views:         An integer representing the minimum view count the video
 269                        must have in order to not be skipped.
 270                        Videos without view count information are always
 271                        downloaded. None for no limit.
 272     max_views:         An integer representing the maximum view count.
 273                        Videos that are more popular than that are not
 274                        downloaded.
 275                        Videos without view count information are always
 276                        downloaded. None for no limit.
 277     download_archive:  File name of a file where all downloads are recorded.
 278                        Videos already present in the file are not downloaded
 279                        again.
 280     break_on_existing: Stop the download process after attempting to download a
 281                        file that is in the archive.
 282     break_on_reject:   Stop the download process when encountering a video that
 283                        has been filtered out.
 284     cookiefile:        File name where cookies should be read from and dumped to
 285     nocheckcertificate:Do not verify SSL certificates
 286     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 287                        At the moment, this is only supported by YouTube.
 288     proxy:             URL of the proxy server to use
 289     geo_verification_proxy:  URL of the proxy to use for IP address verification
 290                        on geo-restricted sites.
 291     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 292     bidi_workaround:   Work around buggy terminals without bidirectional text
 293                        support, using fridibi
 294     debug_printtraffic:Print out sent and received HTTP traffic
 295     include_ads:       Download ads as well
 296     default_search:    Prepend this string if an input url is not valid.
 297                        'auto' for elaborate guessing
 298     encoding:          Use this encoding instead of the system-specified.
 299     extract_flat:      Do not resolve URLs, return the immediate result.
 300                        Pass in 'in_playlist' to only show this behavior for
 301                        playlist items.
 302     postprocessors:    A list of dictionaries, each with an entry
 303                        * key:  The name of the postprocessor. See
 304                                yt_dlp/postprocessor/__init__.py for a list.
 305                        * when: When to run the postprocessor. Can be one of
 306                                pre_process|before_dl|post_process|after_move.
 307                                Assumed to be 'post_process' if not given
 308     post_hooks:        A list of functions that get called as the final step
 309                        for each video file, after all postprocessors have been
 310                        called. The filename will be passed as the only argument.
 311     progress_hooks:    A list of functions that get called on download
 312                        progress, with a dictionary with the entries
 313                        * status: One of "downloading", "error", or "finished".
 314                                  Check this first and ignore unknown values.
 315
 316                        If status is one of "downloading", or "finished", the
 317                        following properties may also be present:
 318                        * filename: The final filename (always present)
 319                        * tmpfilename: The filename we're currently writing to
 320                        * downloaded_bytes: Bytes on disk
 321                        * total_bytes: Size of the whole file, None if unknown
 322                        * total_bytes_estimate: Guess of the eventual file size,
 323                                                None if unavailable.
 324                        * elapsed: The number of seconds since download started.
 325                        * eta: The estimated time in seconds, None if unknown
 326                        * speed: The download speed in bytes/second, None if
 327                                 unknown
 328                        * fragment_index: The counter of the currently
 329                                          downloaded video fragment.
 330                        * fragment_count: The number of fragments (= individual
 331                                          files that will be merged)
 332
 333                        Progress hooks are guaranteed to be called at least once
 334                        (with status "finished") if the download is successful.
 335     merge_output_format: Extension to use when merging formats.
 336     final_ext:         Expected final extension; used to detect when the file was
 337                        already downloaded and converted. "merge_output_format" is
 338                        replaced by this extension when given
 339     fixup:             Automatically correct known faults of the file.
 340                        One of:
 341                        - "never": do nothing
 342                        - "warn": only emit a warning
 343                        - "detect_or_warn": check whether we can do anything
 344                                            about it, warn otherwise (default)
 345     source_address:    Client-side IP address to bind to.
 346     call_home:         Boolean, true iff we are allowed to contact the
 347                        yt-dlp servers for debugging. (BROKEN)
 348     sleep_interval_requests: Number of seconds to sleep between requests
 349                        during extraction
 350     sleep_interval:    Number of seconds to sleep before each download when
 351                        used alone or a lower bound of a range for randomized
 352                        sleep before each download (minimum possible number
 353                        of seconds to sleep) when used along with
 354                        max_sleep_interval.
 355     max_sleep_interval:Upper bound of a range for randomized sleep before each
 356                        download (maximum possible number of seconds to sleep).
 357                        Must only be used along with sleep_interval.
 358                        Actual sleep time will be a random float from range
 359                        [sleep_interval; max_sleep_interval].
 360     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 361     listformats:       Print an overview of available video formats and exit.
 362     list_thumbnails:   Print a table of all thumbnails and exit.
 363     match_filter:      A function that gets called with the info_dict of
 364                        every video.
 365                        If it returns a message, the video is ignored.
 366                        If it returns None, the video is downloaded.
 367                        match_filter_func in utils.py is one example for this.
 368     no_color:          Do not emit color codes in output.
 369     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 370                        HTTP header
 371     geo_bypass_country:
 372                        Two-letter ISO 3166-2 country code that will be used for
 373                        explicit geographic restriction bypassing via faking
 374                        X-Forwarded-For HTTP header
 375     geo_bypass_ip_block:
 376                        IP range in CIDR notation that will be used similarly to
 377                        geo_bypass_country
 378
 379     The following options determine which downloader is picked:
 380     external_downloader: A dictionary of protocol keys and the executable of the
 381                        external downloader to use for it. The allowed protocols
 382                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 383                        Set the value to 'native' to use the native downloader
 384     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 385                        or {'m3u8': 'ffmpeg'} instead.
 386                        Use the native HLS downloader instead of ffmpeg/avconv
 387                        if True, otherwise use ffmpeg/avconv if False, otherwise
 388                        use downloader suggested by extractor if None.
 389     compat_opts:       Compatibility options. See "Differences in default behavior".
 390                        Note that only format-sort, format-spec, no-live-chat, no-attach-info-json
 391                        playlist-index, list-formats, no-youtube-channel-redirect
 392                        and no-youtube-unavailable-videos works when used via the API
 393
 394     The following parameters are not used by YoutubeDL itself, they are used by
 395     the downloader (see yt_dlp/downloader/common.py):
 396     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 397     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 398     xattr_set_filesize, external_downloader_args, hls_use_mpegts,
 399     http_chunk_size.
 400
 401     The following options are used by the post processors:
 402     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 403                        otherwise prefer ffmpeg. (avconv support is deprecated)
 404     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 405                        to the binary or its containing directory.
 406     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 407                         and a list of additional command-line arguments for the
 408                         postprocessor/executable. The dict can also have "PP+EXE" keys
 409                         which are used when the given exe is used by the given PP.
 410                         Use 'default' as the name for arguments to passed to all PP
 411
 412     The following options are used by the extractors:
 413     extractor_retries: Number of times to retry for known errors
 414     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 415     hls_split_discontinuity: Split HLS playlists to different formats at
 416                        discontinuities such as ad breaks (default: False)
 417     youtube_include_dash_manifest: If True (default), DASH manifests and related
 418                        data will be downloaded and processed by extractor.
 419                        You can reduce network I/O by disabling it if you don't
 420                        care about DASH. (only for youtube)
 421     youtube_include_hls_manifest: If True (default), HLS manifests and related
 422                        data will be downloaded and processed by extractor.
 423                        You can reduce network I/O by disabling it if you don't
 424                        care about HLS. (only for youtube)
 425     """
 426
 427     _NUMERIC_FIELDS = set((
 428         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 429         'timestamp', 'upload_year', 'upload_month', 'upload_day',
 430         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 431         'average_rating', 'comment_count', 'age_limit',
 432         'start_time', 'end_time',
 433         'chapter_number', 'season_number', 'episode_number',
 434         'track_number', 'disc_number', 'release_year',
 435         'playlist_index',
 436     ))
 437
 438     params = None
 439     _ies = []
 440     _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 441     __prepare_filename_warned = False
 442     _first_webpage_request = True
 443     _download_retcode = None
 444     _num_downloads = None
 445     _playlist_level = 0
 446     _playlist_urls = set()
 447     _screen_file = None
 448
 449     def __init__(self, params=None, auto_init=True):
 450         """Create a FileDownloader object with the given options."""
 451         if params is None:
 452             params = {}
 453         self._ies = []
 454         self._ies_instances = {}
 455         self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 456         self.__prepare_filename_warned = False
 457         self._first_webpage_request = True
 458         self._post_hooks = []
 459         self._progress_hooks = []
 460         self._download_retcode = 0
 461         self._num_downloads = 0
 462         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 463         self._err_file = sys.stderr
 464         self.params = {
 465             # Default parameters
 466             'nocheckcertificate': False,
 467         }
 468         self.params.update(params)
 469         self.cache = Cache(self)
 470
 471         if sys.version_info < (3, 6):
 472             self.report_warning(
 473                 'Support for Python version %d.%d have been deprecated and will break in future versions of yt-dlp! '
 474                 'Update to Python 3.6 or above' % sys.version_info[:2])
 475
 476         def check_deprecated(param, option, suggestion):
 477             if self.params.get(param) is not None:
 478                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 479                 return True
 480             return False
 481
 482         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 483             if self.params.get('geo_verification_proxy') is None:
 484                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 485
 486         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 487         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 488         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 489
 490         for msg in self.params.get('warnings', []):
 491             self.report_warning(msg)
 492
 493         if self.params.get('final_ext'):
 494             if self.params.get('merge_output_format'):
 495                 self.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
 496             self.params['merge_output_format'] = self.params['final_ext']
 497
 498         if 'overwrites' in self.params and self.params['overwrites'] is None:
 499             del self.params['overwrites']
 500
 501         if params.get('bidi_workaround', False):
 502             try:
 503                 import pty
 504                 master, slave = pty.openpty()
 505                 width = compat_get_terminal_size().columns
 506                 if width is None:
 507                     width_args = []
 508                 else:
 509                     width_args = ['-w', str(width)]
 510                 sp_kwargs = dict(
 511                     stdin=subprocess.PIPE,
 512                     stdout=slave,
 513                     stderr=self._err_file)
 514                 try:
 515                     self._output_process = subprocess.Popen(
 516                         ['bidiv'] + width_args, **sp_kwargs
 517                     )
 518                 except OSError:
 519                     self._output_process = subprocess.Popen(
 520                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 521                 self._output_channel = os.fdopen(master, 'rb')
 522             except OSError as ose:
 523                 if ose.errno == errno.ENOENT:
 524                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 525                 else:
 526                     raise
 527
 528         if (sys.platform != 'win32'
 529                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 530                 and not params.get('restrictfilenames', False)):
 531             # Unicode filesystem API will throw errors (#1474, #13027)
 532             self.report_warning(
 533                 'Assuming --restrict-filenames since file system encoding '
 534                 'cannot encode all characters. '
 535                 'Set the LC_ALL environment variable to fix this.')
 536             self.params['restrictfilenames'] = True
 537
 538         self.outtmpl_dict = self.parse_outtmpl()
 539
 540         self._setup_opener()
 541
 542         """Preload the archive, if any is specified"""
 543         def preload_download_archive(fn):
 544             if fn is None:
 545                 return False
 546             self.write_debug('Loading archive file %r\n' % fn)
 547             try:
 548                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 549                     for line in archive_file:
 550                         self.archive.add(line.strip())
 551             except IOError as ioe:
 552                 if ioe.errno != errno.ENOENT:
 553                     raise
 554                 return False
 555             return True
 556
 557         self.archive = set()
 558         preload_download_archive(self.params.get('download_archive'))
 559
 560         if auto_init:
 561             self.print_debug_header()
 562             self.add_default_info_extractors()
 563
 564         for pp_def_raw in self.params.get('postprocessors', []):
 565             pp_class = get_postprocessor(pp_def_raw['key'])
 566             pp_def = dict(pp_def_raw)
 567             del pp_def['key']
 568             if 'when' in pp_def:
 569                 when = pp_def['when']
 570                 del pp_def['when']
 571             else:
 572                 when = 'post_process'
 573             pp = pp_class(self, **compat_kwargs(pp_def))
 574             self.add_post_processor(pp, when=when)
 575
 576         for ph in self.params.get('post_hooks', []):
 577             self.add_post_hook(ph)
 578
 579         for ph in self.params.get('progress_hooks', []):
 580             self.add_progress_hook(ph)
 581
 582         register_socks_protocols()
 583
 584     def warn_if_short_id(self, argv):
 585         # short YouTube ID starting with dash?
 586         idxs = [
 587             i for i, a in enumerate(argv)
 588             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 589         if idxs:
 590             correct_argv = (
 591                 ['yt-dlp']
 592                 + [a for i, a in enumerate(argv) if i not in idxs]
 593                 + ['--'] + [argv[i] for i in idxs]
 594             )
 595             self.report_warning(
 596                 'Long argument string detected. '
 597                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 598                 args_to_str(correct_argv))
 599
 600     def add_info_extractor(self, ie):
 601         """Add an InfoExtractor object to the end of the list."""
 602         self._ies.append(ie)
 603         if not isinstance(ie, type):
 604             self._ies_instances[ie.ie_key()] = ie
 605             ie.set_downloader(self)
 606
 607     def get_info_extractor(self, ie_key):
 608         """
 609         Get an instance of an IE with name ie_key, it will try to get one from
 610         the _ies list, if there's no instance it will create a new one and add
 611         it to the extractor list.
 612         """
 613         ie = self._ies_instances.get(ie_key)
 614         if ie is None:
 615             ie = get_info_extractor(ie_key)()
 616             self.add_info_extractor(ie)
 617         return ie
 618
 619     def add_default_info_extractors(self):
 620         """
 621         Add the InfoExtractors returned by gen_extractors to the end of the list
 622         """
 623         for ie in gen_extractor_classes():
 624             self.add_info_extractor(ie)
 625
 626     def add_post_processor(self, pp, when='post_process'):
 627         """Add a PostProcessor object to the end of the chain."""
 628         self._pps[when].append(pp)
 629         pp.set_downloader(self)
 630
 631     def add_post_hook(self, ph):
 632         """Add the post hook"""
 633         self._post_hooks.append(ph)
 634
 635     def add_progress_hook(self, ph):
 636         """Add the progress hook (currently only for the file downloader)"""
 637         self._progress_hooks.append(ph)
 638
 639     def _bidi_workaround(self, message):
 640         if not hasattr(self, '_output_channel'):
 641             return message
 642
 643         assert hasattr(self, '_output_process')
 644         assert isinstance(message, compat_str)
 645         line_count = message.count('\n') + 1
 646         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 647         self._output_process.stdin.flush()
 648         res = ''.join(self._output_channel.readline().decode('utf-8')
 649                       for _ in range(line_count))
 650         return res[:-len('\n')]
 651
 652     def _write_string(self, s, out=None):
 653         write_string(s, out=out, encoding=self.params.get('encoding'))
 654
 655     def to_stdout(self, message, skip_eol=False, quiet=False):
 656         """Print message to stdout"""
 657         if self.params.get('logger'):
 658             self.params['logger'].debug(message)
 659         elif not quiet:
 660             message = self._bidi_workaround(message)
 661             terminator = ['\n', ''][skip_eol]
 662             output = message + terminator
 663
 664             self._write_string(output, self._screen_file)
 665
 666     def to_stderr(self, message):
 667         """Print message to stderr"""
 668         assert isinstance(message, compat_str)
 669         if self.params.get('logger'):
 670             self.params['logger'].error(message)
 671         else:
 672             message = self._bidi_workaround(message)
 673             output = message + '\n'
 674             self._write_string(output, self._err_file)
 675
 676     def to_console_title(self, message):
 677         if not self.params.get('consoletitle', False):
 678             return
 679         if compat_os_name == 'nt':
 680             if ctypes.windll.kernel32.GetConsoleWindow():
 681                 # c_wchar_p() might not be necessary if `message` is
 682                 # already of type unicode()
 683                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 684         elif 'TERM' in os.environ:
 685             self._write_string('\033]0;%s\007' % message, self._screen_file)
 686
 687     def save_console_title(self):
 688         if not self.params.get('consoletitle', False):
 689             return
 690         if self.params.get('simulate', False):
 691             return
 692         if compat_os_name != 'nt' and 'TERM' in os.environ:
 693             # Save the title on stack
 694             self._write_string('\033[22;0t', self._screen_file)
 695
 696     def restore_console_title(self):
 697         if not self.params.get('consoletitle', False):
 698             return
 699         if self.params.get('simulate', False):
 700             return
 701         if compat_os_name != 'nt' and 'TERM' in os.environ:
 702             # Restore the title from stack
 703             self._write_string('\033[23;0t', self._screen_file)
 704
 705     def __enter__(self):
 706         self.save_console_title()
 707         return self
 708
 709     def __exit__(self, *args):
 710         self.restore_console_title()
 711
 712         if self.params.get('cookiefile') is not None:
 713             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 714
 715     def trouble(self, message=None, tb=None):
 716         """Determine action to take when a download problem appears.
 717
 718         Depending on if the downloader has been configured to ignore
 719         download errors or not, this method may throw an exception or
 720         not when errors are found, after printing the message.
 721
 722         tb, if given, is additional traceback information.
 723         """
 724         if message is not None:
 725             self.to_stderr(message)
 726         if self.params.get('verbose'):
 727             if tb is None:
 728                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 729                     tb = ''
 730                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 731                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 732                     tb += encode_compat_str(traceback.format_exc())
 733                 else:
 734                     tb_data = traceback.format_list(traceback.extract_stack())
 735                     tb = ''.join(tb_data)
 736             self.to_stderr(tb)
 737         if not self.params.get('ignoreerrors', False):
 738             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 739                 exc_info = sys.exc_info()[1].exc_info
 740             else:
 741                 exc_info = sys.exc_info()
 742             raise DownloadError(message, exc_info)
 743         self._download_retcode = 1
 744
 745     def to_screen(self, message, skip_eol=False):
 746         """Print message to stdout if not in quiet mode"""
 747         self.to_stdout(
 748             message, skip_eol, quiet=self.params.get('quiet', False))
 749
 750     def report_warning(self, message):
 751         '''
 752         Print the message to stderr, it will be prefixed with 'WARNING:'
 753         If stderr is a tty file the 'WARNING:' will be colored
 754         '''
 755         if self.params.get('logger') is not None:
 756             self.params['logger'].warning(message)
 757         else:
 758             if self.params.get('no_warnings'):
 759                 return
 760             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 761                 _msg_header = '\033[0;33mWARNING:\033[0m'
 762             else:
 763                 _msg_header = 'WARNING:'
 764             warning_message = '%s %s' % (_msg_header, message)
 765             self.to_stderr(warning_message)
 766
 767     def report_error(self, message, tb=None):
 768         '''
 769         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 770         in red if stderr is a tty file.
 771         '''
 772         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 773             _msg_header = '\033[0;31mERROR:\033[0m'
 774         else:
 775             _msg_header = 'ERROR:'
 776         error_message = '%s %s' % (_msg_header, message)
 777         self.trouble(error_message, tb)
 778
 779     def write_debug(self, message):
 780         '''Log debug message or Print message to stderr'''
 781         if not self.params.get('verbose', False):
 782             return
 783         message = '[debug] %s' % message
 784         if self.params.get('logger'):
 785             self.params['logger'].debug(message)
 786         else:
 787             self._write_string('%s\n' % message)
 788
 789     def report_file_already_downloaded(self, file_name):
 790         """Report file has already been fully downloaded."""
 791         try:
 792             self.to_screen('[download] %s has already been downloaded' % file_name)
 793         except UnicodeEncodeError:
 794             self.to_screen('[download] The file has already been downloaded')
 795
 796     def report_file_delete(self, file_name):
 797         """Report that existing file will be deleted."""
 798         try:
 799             self.to_screen('Deleting existing file %s' % file_name)
 800         except UnicodeEncodeError:
 801             self.to_screen('Deleting existing file')
 802
 803     def parse_outtmpl(self):
 804         outtmpl_dict = self.params.get('outtmpl', {})
 805         if not isinstance(outtmpl_dict, dict):
 806             outtmpl_dict = {'default': outtmpl_dict}
 807         outtmpl_dict.update({
 808             k: v for k, v in DEFAULT_OUTTMPL.items()
 809             if not outtmpl_dict.get(k)})
 810         for key, val in outtmpl_dict.items():
 811             if isinstance(val, bytes):
 812                 self.report_warning(
 813                     'Parameter outtmpl is bytes, but should be a unicode string. '
 814                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 815         return outtmpl_dict
 816
 817     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
 818         """ Make the template and info_dict suitable for substitution (outtmpl % info_dict)"""
 819         template_dict = dict(info_dict)
 820         na = self.params.get('outtmpl_na_placeholder', 'NA')
 821
 822         # duration_string
 823         template_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
 824             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
 825             if info_dict.get('duration', None) is not None
 826             else None)
 827
 828         # epoch
 829         template_dict['epoch'] = int(time.time())
 830
 831         # autonumber
 832         autonumber_size = self.params.get('autonumber_size')
 833         if autonumber_size is None:
 834             autonumber_size = 5
 835         template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
 836
 837         # resolution if not defined
 838         if template_dict.get('resolution') is None:
 839             if template_dict.get('width') and template_dict.get('height'):
 840                 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 841             elif template_dict.get('height'):
 842                 template_dict['resolution'] = '%sp' % template_dict['height']
 843             elif template_dict.get('width'):
 844                 template_dict['resolution'] = '%dx?' % template_dict['width']
 845
 846         # For fields playlist_index and autonumber convert all occurrences
 847         # of %(field)s to %(field)0Nd for backward compatibility
 848         field_size_compat_map = {
 849             'playlist_index': len(str(template_dict.get('_last_playlist_index') or '')),
 850             'autonumber': autonumber_size,
 851         }
 852         FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
 853         mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
 854         if mobj:
 855             outtmpl = re.sub(
 856                 FIELD_SIZE_COMPAT_RE,
 857                 r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
 858                 outtmpl)
 859
 860         numeric_fields = list(self._NUMERIC_FIELDS)
 861         if sanitize is None:
 862             sanitize = lambda k, v: v
 863
 864         EXTERNAL_FORMAT_RE = FORMAT_RE.format('(?P<key>[^)]*)')
 865         # Field is of the form key1.key2...
 866         # where keys (except first) can be string, int or slice
 867         FIELD_RE = r'\w+(?:\.(?:\w+|[-\d]*(?::[-\d]*){0,2}))*'
 868         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
 869             (?P<negate>-)?
 870             (?P<fields>{0})
 871             (?P<maths>(?:[-+]-?(?:\d+(?:\.\d+)?|{0}))*)
 872             (?:>(?P<strf_format>.+?))?
 873             (?:\|(?P<default>.*?))?
 874             $'''.format(FIELD_RE))
 875         MATH_OPERATORS_RE = re.compile(r'(?<![-+])([-+])')
 876         MATH_FUNCTIONS = {
 877             '+': float.__add__,
 878             '-': float.__sub__,
 879         }
 880         for outer_mobj in re.finditer(EXTERNAL_FORMAT_RE, outtmpl):
 881             final_key = outer_mobj.group('key')
 882             str_type = outer_mobj.group('type')
 883             value = None
 884             mobj = re.match(INTERNAL_FORMAT_RE, final_key)
 885             if mobj is not None:
 886                 mobj = mobj.groupdict()
 887                 # Object traversal
 888                 fields = mobj['fields'].split('.')
 889                 value = traverse_dict(template_dict, fields)
 890                 # Negative
 891                 if mobj['negate']:
 892                     value = float_or_none(value)
 893                     if value is not None:
 894                         value *= -1
 895                 # Do maths
 896                 if mobj['maths']:
 897                     value = float_or_none(value)
 898                     operator = None
 899                     for item in MATH_OPERATORS_RE.split(mobj['maths'])[1:]:
 900                         if item == '':
 901                             value = None
 902                         if value is None:
 903                             break
 904                         if operator:
 905                             item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
 906                             offset = float_or_none(item)
 907                             if offset is None:
 908                                 offset = float_or_none(traverse_dict(template_dict, item.split('.')))
 909                             try:
 910                                 value = operator(value, multiplier * offset)
 911                             except (TypeError, ZeroDivisionError):
 912                                 value = None
 913                             operator = None
 914                         else:
 915                             operator = MATH_FUNCTIONS[item]
 916                 # Datetime formatting
 917                 if mobj['strf_format']:
 918                     value = strftime_or_none(value, mobj['strf_format'])
 919                 # Set default
 920                 if value is None and mobj['default'] is not None:
 921                     value = mobj['default']
 922             # Sanitize
 923             if str_type in 'crs' and value is not None:  # string
 924                 value = sanitize('%{}'.format(str_type) % fields[-1], value)
 925             else:  # numeric
 926                 numeric_fields.append(final_key)
 927                 value = float_or_none(value)
 928             if value is not None:
 929                 template_dict[final_key] = value
 930
 931         # Missing numeric fields used together with integer presentation types
 932         # in format specification will break the argument substitution since
 933         # string NA placeholder is returned for missing fields. We will patch
 934         # output template for missing fields to meet string presentation type.
 935         for numeric_field in numeric_fields:
 936             if template_dict.get(numeric_field) is None:
 937                 outtmpl = re.sub(
 938                     FORMAT_RE.format(re.escape(numeric_field)),
 939                     r'%({0})s'.format(numeric_field), outtmpl)
 940
 941         template_dict = collections.defaultdict(lambda: na, (
 942             (k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
 943             for k, v in template_dict.items() if v is not None))
 944         return outtmpl, template_dict
 945
 946     def _prepare_filename(self, info_dict, tmpl_type='default'):
 947         try:
 948             sanitize = lambda k, v: sanitize_filename(
 949                 compat_str(v),
 950                 restricted=self.params.get('restrictfilenames'),
 951                 is_id=(k == 'id' or k.endswith('_id')))
 952             outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
 953             outtmpl, template_dict = self.prepare_outtmpl(outtmpl, info_dict, sanitize)
 954
 955             # expand_path translates '%%' into '%' and '$$' into '$'
 956             # correspondingly that is not what we want since we need to keep
 957             # '%%' intact for template dict substitution step. Working around
 958             # with boundary-alike separator hack.
 959             sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 960             outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 961
 962             # outtmpl should be expand_path'ed before template dict substitution
 963             # because meta fields may contain env variables we don't want to
 964             # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 965             # title "Hello $PATH", we don't want `$PATH` to be expanded.
 966             filename = expand_path(outtmpl).replace(sep, '') % template_dict
 967
 968             force_ext = OUTTMPL_TYPES.get(tmpl_type)
 969             if force_ext is not None:
 970                 filename = replace_extension(filename, force_ext, template_dict.get('ext'))
 971
 972             # https://github.com/blackjack4494/youtube-dlc/issues/85
 973             trim_file_name = self.params.get('trim_file_name', False)
 974             if trim_file_name:
 975                 fn_groups = filename.rsplit('.')
 976                 ext = fn_groups[-1]
 977                 sub_ext = ''
 978                 if len(fn_groups) > 2:
 979                     sub_ext = fn_groups[-2]
 980                 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
 981
 982             return filename
 983         except ValueError as err:
 984             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 985             return None
 986
 987     def prepare_filename(self, info_dict, dir_type='', warn=False):
 988         """Generate the output filename."""
 989         paths = self.params.get('paths', {})
 990         assert isinstance(paths, dict)
 991         filename = self._prepare_filename(info_dict, dir_type or 'default')
 992
 993         if warn and not self.__prepare_filename_warned:
 994             if not paths:
 995                 pass
 996             elif filename == '-':
 997                 self.report_warning('--paths is ignored when an outputting to stdout')
 998             elif os.path.isabs(filename):
 999                 self.report_warning('--paths is ignored since an absolute path is given in output template')
1000             self.__prepare_filename_warned = True
1001         if filename == '-' or not filename:
1002             return filename
1003
1004         homepath = expand_path(paths.get('home', '').strip())
1005         assert isinstance(homepath, compat_str)
1006         subdir = expand_path(paths.get(dir_type, '').strip()) if dir_type else ''
1007         assert isinstance(subdir, compat_str)
1008         path = os.path.join(homepath, subdir, filename)
1009
1010         # Temporary fix for #4787
1011         # 'Treat' all problem characters by passing filename through preferredencoding
1012         # to workaround encoding issues with subprocess on python2 @ Windows
1013         if sys.version_info < (3, 0) and sys.platform == 'win32':
1014             path = encodeFilename(path, True).decode(preferredencoding())
1015         return sanitize_path(path, force=self.params.get('windowsfilenames'))
1016
1017     def _match_entry(self, info_dict, incomplete):
1018         """ Returns None if the file should be downloaded """
1019
1020         def check_filter():
1021             video_title = info_dict.get('title', info_dict.get('id', 'video'))
1022             if 'title' in info_dict:
1023                 # This can happen when we're just evaluating the playlist
1024                 title = info_dict['title']
1025                 matchtitle = self.params.get('matchtitle', False)
1026                 if matchtitle:
1027                     if not re.search(matchtitle, title, re.IGNORECASE):
1028                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1029                 rejecttitle = self.params.get('rejecttitle', False)
1030                 if rejecttitle:
1031                     if re.search(rejecttitle, title, re.IGNORECASE):
1032                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1033             date = info_dict.get('upload_date')
1034             if date is not None:
1035                 dateRange = self.params.get('daterange', DateRange())
1036                 if date not in dateRange:
1037                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1038             view_count = info_dict.get('view_count')
1039             if view_count is not None:
1040                 min_views = self.params.get('min_views')
1041                 if min_views is not None and view_count < min_views:
1042                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1043                 max_views = self.params.get('max_views')
1044                 if max_views is not None and view_count > max_views:
1045                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1046             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1047                 return 'Skipping "%s" because it is age restricted' % video_title
1048             if self.in_download_archive(info_dict):
1049                 return '%s has already been recorded in archive' % video_title
1050
1051             if not incomplete:
1052                 match_filter = self.params.get('match_filter')
1053                 if match_filter is not None:
1054                     ret = match_filter(info_dict)
1055                     if ret is not None:
1056                         return ret
1057             return None
1058
1059         reason = check_filter()
1060         if reason is not None:
1061             self.to_screen('[download] ' + reason)
1062             if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing', False):
1063                 raise ExistingVideoReached()
1064             elif self.params.get('break_on_reject', False):
1065                 raise RejectedVideoReached()
1066         return reason
1067
1068     @staticmethod
1069     def add_extra_info(info_dict, extra_info):
1070         '''Set the keys from extra_info in info dict if they are missing'''
1071         for key, value in extra_info.items():
1072             info_dict.setdefault(key, value)
1073
1074     def extract_info(self, url, download=True, ie_key=None, extra_info={},
1075                      process=True, force_generic_extractor=False):
1076         """
1077         Return a list with a dictionary for each video extracted.
1078
1079         Arguments:
1080         url -- URL to extract
1081
1082         Keyword arguments:
1083         download -- whether to download videos during extraction
1084         ie_key -- extractor key hint
1085         extra_info -- dictionary containing the extra values to add to each result
1086         process -- whether to resolve all unresolved references (URLs, playlist items),
1087             must be True for download to work.
1088         force_generic_extractor -- force using the generic extractor
1089         """
1090
1091         if not ie_key and force_generic_extractor:
1092             ie_key = 'Generic'
1093
1094         if ie_key:
1095             ies = [self.get_info_extractor(ie_key)]
1096         else:
1097             ies = self._ies
1098
1099         for ie in ies:
1100             if not ie.suitable(url):
1101                 continue
1102
1103             ie_key = ie.ie_key()
1104             ie = self.get_info_extractor(ie_key)
1105             if not ie.working():
1106                 self.report_warning('The program functionality for this site has been marked as broken, '
1107                                     'and will probably not work.')
1108
1109             try:
1110                 temp_id = str_or_none(
1111                     ie.extract_id(url) if callable(getattr(ie, 'extract_id', None))
1112                     else ie._match_id(url))
1113             except (AssertionError, IndexError, AttributeError):
1114                 temp_id = None
1115             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1116                 self.to_screen("[%s] %s: has already been recorded in archive" % (
1117                                ie_key, temp_id))
1118                 break
1119             return self.__extract_info(url, ie, download, extra_info, process)
1120         else:
1121             self.report_error('no suitable InfoExtractor for URL %s' % url)
1122
1123     def __handle_extraction_exceptions(func):
1124         def wrapper(self, *args, **kwargs):
1125             try:
1126                 return func(self, *args, **kwargs)
1127             except GeoRestrictedError as e:
1128                 msg = e.msg
1129                 if e.countries:
1130                     msg += '\nThis video is available in %s.' % ', '.join(
1131                         map(ISO3166Utils.short2full, e.countries))
1132                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1133                 self.report_error(msg)
1134             except ExtractorError as e:  # An error we somewhat expected
1135                 self.report_error(compat_str(e), e.format_traceback())
1136             except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
1137                 raise
1138             except Exception as e:
1139                 if self.params.get('ignoreerrors', False):
1140                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
1141                 else:
1142                     raise
1143         return wrapper
1144
1145     @__handle_extraction_exceptions
1146     def __extract_info(self, url, ie, download, extra_info, process):
1147         ie_result = ie.extract(url)
1148         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1149             return
1150         if isinstance(ie_result, list):
1151             # Backwards compatibility: old IE result format
1152             ie_result = {
1153                 '_type': 'compat_list',
1154                 'entries': ie_result,
1155             }
1156         self.add_default_extra_info(ie_result, ie, url)
1157         if process:
1158             return self.process_ie_result(ie_result, download, extra_info)
1159         else:
1160             return ie_result
1161
1162     def add_default_extra_info(self, ie_result, ie, url):
1163         self.add_extra_info(ie_result, {
1164             'extractor': ie.IE_NAME,
1165             'webpage_url': url,
1166             'webpage_url_basename': url_basename(url),
1167             'extractor_key': ie.ie_key(),
1168         })
1169
1170     def process_ie_result(self, ie_result, download=True, extra_info={}):
1171         """
1172         Take the result of the ie(may be modified) and resolve all unresolved
1173         references (URLs, playlist items).
1174
1175         It will also download the videos if 'download'.
1176         Returns the resolved ie_result.
1177         """
1178         result_type = ie_result.get('_type', 'video')
1179
1180         if result_type in ('url', 'url_transparent'):
1181             ie_result['url'] = sanitize_url(ie_result['url'])
1182             extract_flat = self.params.get('extract_flat', False)
1183             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1184                     or extract_flat is True):
1185                 self.__forced_printings(ie_result, self.prepare_filename(ie_result), incomplete=True)
1186                 return ie_result
1187
1188         if result_type == 'video':
1189             self.add_extra_info(ie_result, extra_info)
1190             return self.process_video_result(ie_result, download=download)
1191         elif result_type == 'url':
1192             # We have to add extra_info to the results because it may be
1193             # contained in a playlist
1194             return self.extract_info(
1195                 ie_result['url'], download,
1196                 ie_key=ie_result.get('ie_key'),
1197                 extra_info=extra_info)
1198         elif result_type == 'url_transparent':
1199             # Use the information from the embedding page
1200             info = self.extract_info(
1201                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1202                 extra_info=extra_info, download=False, process=False)
1203
1204             # extract_info may return None when ignoreerrors is enabled and
1205             # extraction failed with an error, don't crash and return early
1206             # in this case
1207             if not info:
1208                 return info
1209
1210             force_properties = dict(
1211                 (k, v) for k, v in ie_result.items() if v is not None)
1212             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1213                 if f in force_properties:
1214                     del force_properties[f]
1215             new_result = info.copy()
1216             new_result.update(force_properties)
1217
1218             # Extracted info may not be a video result (i.e.
1219             # info.get('_type', 'video') != video) but rather an url or
1220             # url_transparent. In such cases outer metadata (from ie_result)
1221             # should be propagated to inner one (info). For this to happen
1222             # _type of info should be overridden with url_transparent. This
1223             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1224             if new_result.get('_type') == 'url':
1225                 new_result['_type'] = 'url_transparent'
1226
1227             return self.process_ie_result(
1228                 new_result, download=download, extra_info=extra_info)
1229         elif result_type in ('playlist', 'multi_video'):
1230             # Protect from infinite recursion due to recursively nested playlists
1231             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1232             webpage_url = ie_result['webpage_url']
1233             if webpage_url in self._playlist_urls:
1234                 self.to_screen(
1235                     '[download] Skipping already downloaded playlist: %s'
1236                     % ie_result.get('title') or ie_result.get('id'))
1237                 return
1238
1239             self._playlist_level += 1
1240             self._playlist_urls.add(webpage_url)
1241             try:
1242                 return self.__process_playlist(ie_result, download)
1243             finally:
1244                 self._playlist_level -= 1
1245                 if not self._playlist_level:
1246                     self._playlist_urls.clear()
1247         elif result_type == 'compat_list':
1248             self.report_warning(
1249                 'Extractor %s returned a compat_list result. '
1250                 'It needs to be updated.' % ie_result.get('extractor'))
1251
1252             def _fixup(r):
1253                 self.add_extra_info(
1254                     r,
1255                     {
1256                         'extractor': ie_result['extractor'],
1257                         'webpage_url': ie_result['webpage_url'],
1258                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
1259                         'extractor_key': ie_result['extractor_key'],
1260                     }
1261                 )
1262                 return r
1263             ie_result['entries'] = [
1264                 self.process_ie_result(_fixup(r), download, extra_info)
1265                 for r in ie_result['entries']
1266             ]
1267             return ie_result
1268         else:
1269             raise Exception('Invalid result type: %s' % result_type)
1270
1271     def _ensure_dir_exists(self, path):
1272         return make_dir(path, self.report_error)
1273
1274     def __process_playlist(self, ie_result, download):
1275         # We process each entry in the playlist
1276         playlist = ie_result.get('title') or ie_result.get('id')
1277         self.to_screen('[download] Downloading playlist: %s' % playlist)
1278
1279         if 'entries' not in ie_result:
1280             raise EntryNotInPlaylist()
1281         incomplete_entries = bool(ie_result.get('requested_entries'))
1282         if incomplete_entries:
1283             def fill_missing_entries(entries, indexes):
1284                 ret = [None] * max(*indexes)
1285                 for i, entry in zip(indexes, entries):
1286                     ret[i - 1] = entry
1287                 return ret
1288             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1289
1290         playlist_results = []
1291
1292         playliststart = self.params.get('playliststart', 1) - 1
1293         playlistend = self.params.get('playlistend')
1294         # For backwards compatibility, interpret -1 as whole list
1295         if playlistend == -1:
1296             playlistend = None
1297
1298         playlistitems_str = self.params.get('playlist_items')
1299         playlistitems = None
1300         if playlistitems_str is not None:
1301             def iter_playlistitems(format):
1302                 for string_segment in format.split(','):
1303                     if '-' in string_segment:
1304                         start, end = string_segment.split('-')
1305                         for item in range(int(start), int(end) + 1):
1306                             yield int(item)
1307                     else:
1308                         yield int(string_segment)
1309             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1310
1311         ie_entries = ie_result['entries']
1312
1313         def make_playlistitems_entries(list_ie_entries):
1314             num_entries = len(list_ie_entries)
1315             for i in playlistitems:
1316                 if -num_entries < i <= num_entries:
1317                     yield list_ie_entries[i - 1]
1318                 elif incomplete_entries:
1319                     raise EntryNotInPlaylist()
1320
1321         if isinstance(ie_entries, list):
1322             n_all_entries = len(ie_entries)
1323             if playlistitems:
1324                 entries = list(make_playlistitems_entries(ie_entries))
1325             else:
1326                 entries = ie_entries[playliststart:playlistend]
1327             n_entries = len(entries)
1328             msg = 'Collected %d videos; downloading %d of them' % (n_all_entries, n_entries)
1329         elif isinstance(ie_entries, PagedList):
1330             if playlistitems:
1331                 entries = []
1332                 for item in playlistitems:
1333                     entries.extend(ie_entries.getslice(
1334                         item - 1, item
1335                     ))
1336             else:
1337                 entries = ie_entries.getslice(
1338                     playliststart, playlistend)
1339             n_entries = len(entries)
1340             msg = 'Downloading %d videos' % n_entries
1341         else:  # iterable
1342             if playlistitems:
1343                 entries = list(make_playlistitems_entries(list(itertools.islice(
1344                     ie_entries, 0, max(playlistitems)))))
1345             else:
1346                 entries = list(itertools.islice(
1347                     ie_entries, playliststart, playlistend))
1348             n_entries = len(entries)
1349             msg = 'Downloading %d videos' % n_entries
1350
1351         if any((entry is None for entry in entries)):
1352             raise EntryNotInPlaylist()
1353         if not playlistitems and (playliststart or playlistend):
1354             playlistitems = list(range(1 + playliststart, 1 + playliststart + len(entries)))
1355         ie_result['entries'] = entries
1356         ie_result['requested_entries'] = playlistitems
1357
1358         if self.params.get('allow_playlist_files', True):
1359             ie_copy = {
1360                 'playlist': playlist,
1361                 'playlist_id': ie_result.get('id'),
1362                 'playlist_title': ie_result.get('title'),
1363                 'playlist_uploader': ie_result.get('uploader'),
1364                 'playlist_uploader_id': ie_result.get('uploader_id'),
1365                 'playlist_index': 0,
1366             }
1367             ie_copy.update(dict(ie_result))
1368
1369             if self.params.get('writeinfojson', False):
1370                 infofn = self.prepare_filename(ie_copy, 'pl_infojson')
1371                 if not self._ensure_dir_exists(encodeFilename(infofn)):
1372                     return
1373                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
1374                     self.to_screen('[info] Playlist metadata is already present')
1375                 else:
1376                     self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
1377                     try:
1378                         write_json_file(self.filter_requested_info(ie_result, self.params.get('clean_infojson', True)), infofn)
1379                     except (OSError, IOError):
1380                         self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
1381
1382             # TODO: This should be passed to ThumbnailsConvertor if necessary
1383             self._write_thumbnails(ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1384
1385             if self.params.get('writedescription', False):
1386                 descfn = self.prepare_filename(ie_copy, 'pl_description')
1387                 if not self._ensure_dir_exists(encodeFilename(descfn)):
1388                     return
1389                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
1390                     self.to_screen('[info] Playlist description is already present')
1391                 elif ie_result.get('description') is None:
1392                     self.report_warning('There\'s no playlist description to write.')
1393                 else:
1394                     try:
1395                         self.to_screen('[info] Writing playlist description to: ' + descfn)
1396                         with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1397                             descfile.write(ie_result['description'])
1398                     except (OSError, IOError):
1399                         self.report_error('Cannot write playlist description file ' + descfn)
1400                         return
1401
1402         # Save playlist_index before re-ordering
1403         entries = [
1404             ((playlistitems[i - 1] if playlistitems else i), entry)
1405             for i, entry in enumerate(entries, 1)]
1406
1407         if self.params.get('playlistreverse', False):
1408             entries = entries[::-1]
1409         if self.params.get('playlistrandom', False):
1410             random.shuffle(entries)
1411
1412         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1413
1414         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg))
1415         failures = 0
1416         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1417         for i, entry_tuple in enumerate(entries, 1):
1418             playlist_index, entry = entry_tuple
1419             if 'playlist_index' in self.params.get('compat_options', []):
1420                 playlist_index = playlistitems[i - 1] if playlistitems else i
1421             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1422             # This __x_forwarded_for_ip thing is a bit ugly but requires
1423             # minimal changes
1424             if x_forwarded_for:
1425                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1426             extra = {
1427                 'n_entries': n_entries,
1428                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1429                 'playlist_index': playlist_index,
1430                 'playlist_autonumber': i,
1431                 'playlist': playlist,
1432                 'playlist_id': ie_result.get('id'),
1433                 'playlist_title': ie_result.get('title'),
1434                 'playlist_uploader': ie_result.get('uploader'),
1435                 'playlist_uploader_id': ie_result.get('uploader_id'),
1436                 'extractor': ie_result['extractor'],
1437                 'webpage_url': ie_result['webpage_url'],
1438                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1439                 'extractor_key': ie_result['extractor_key'],
1440             }
1441
1442             if self._match_entry(entry, incomplete=True) is not None:
1443                 continue
1444
1445             entry_result = self.__process_iterable_entry(entry, download, extra)
1446             if not entry_result:
1447                 failures += 1
1448             if failures >= max_failures:
1449                 self.report_error(
1450                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1451                 break
1452             # TODO: skip failed (empty) entries?
1453             playlist_results.append(entry_result)
1454         ie_result['entries'] = playlist_results
1455         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1456         return ie_result
1457
1458     @__handle_extraction_exceptions
1459     def __process_iterable_entry(self, entry, download, extra_info):
1460         return self.process_ie_result(
1461             entry, download=download, extra_info=extra_info)
1462
1463     def _build_format_filter(self, filter_spec):
1464         " Returns a function to filter the formats according to the filter_spec "
1465
1466         OPERATORS = {
1467             '<': operator.lt,
1468             '<=': operator.le,
1469             '>': operator.gt,
1470             '>=': operator.ge,
1471             '=': operator.eq,
1472             '!=': operator.ne,
1473         }
1474         operator_rex = re.compile(r'''(?x)\s*
1475             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
1476             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1477             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
1478             $
1479             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1480         m = operator_rex.search(filter_spec)
1481         if m:
1482             try:
1483                 comparison_value = int(m.group('value'))
1484             except ValueError:
1485                 comparison_value = parse_filesize(m.group('value'))
1486                 if comparison_value is None:
1487                     comparison_value = parse_filesize(m.group('value') + 'B')
1488                 if comparison_value is None:
1489                     raise ValueError(
1490                         'Invalid value %r in format specification %r' % (
1491                             m.group('value'), filter_spec))
1492             op = OPERATORS[m.group('op')]
1493
1494         if not m:
1495             STR_OPERATORS = {
1496                 '=': operator.eq,
1497                 '^=': lambda attr, value: attr.startswith(value),
1498                 '$=': lambda attr, value: attr.endswith(value),
1499                 '*=': lambda attr, value: value in attr,
1500             }
1501             str_operator_rex = re.compile(r'''(?x)
1502                 \s*(?P<key>[a-zA-Z0-9._-]+)
1503                 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
1504                 \s*(?P<value>[a-zA-Z0-9._-]+)
1505                 \s*$
1506                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1507             m = str_operator_rex.search(filter_spec)
1508             if m:
1509                 comparison_value = m.group('value')
1510                 str_op = STR_OPERATORS[m.group('op')]
1511                 if m.group('negation'):
1512                     op = lambda attr, value: not str_op(attr, value)
1513                 else:
1514                     op = str_op
1515
1516         if not m:
1517             raise ValueError('Invalid filter specification %r' % filter_spec)
1518
1519         def _filter(f):
1520             actual_value = f.get(m.group('key'))
1521             if actual_value is None:
1522                 return m.group('none_inclusive')
1523             return op(actual_value, comparison_value)
1524         return _filter
1525
1526     def _default_format_spec(self, info_dict, download=True):
1527
1528         def can_merge():
1529             merger = FFmpegMergerPP(self)
1530             return merger.available and merger.can_merge()
1531
1532         prefer_best = (
1533             not self.params.get('simulate', False)
1534             and download
1535             and (
1536                 not can_merge()
1537                 or info_dict.get('is_live', False)
1538                 or self.outtmpl_dict['default'] == '-'))
1539         compat = (
1540             prefer_best
1541             or self.params.get('allow_multiple_audio_streams', False)
1542             or 'format-spec' in self.params.get('compat_opts', []))
1543
1544         return (
1545             'best/bestvideo+bestaudio' if prefer_best
1546             else 'bestvideo*+bestaudio/best' if not compat
1547             else 'bestvideo+bestaudio/best')
1548
1549     def build_format_selector(self, format_spec):
1550         def syntax_error(note, start):
1551             message = (
1552                 'Invalid format specification: '
1553                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1554             return SyntaxError(message)
1555
1556         PICKFIRST = 'PICKFIRST'
1557         MERGE = 'MERGE'
1558         SINGLE = 'SINGLE'
1559         GROUP = 'GROUP'
1560         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1561
1562         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1563                                   'video': self.params.get('allow_multiple_video_streams', False)}
1564
1565         check_formats = self.params.get('check_formats')
1566
1567         def _parse_filter(tokens):
1568             filter_parts = []
1569             for type, string, start, _, _ in tokens:
1570                 if type == tokenize.OP and string == ']':
1571                     return ''.join(filter_parts)
1572                 else:
1573                     filter_parts.append(string)
1574
1575         def _remove_unused_ops(tokens):
1576             # Remove operators that we don't use and join them with the surrounding strings
1577             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1578             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1579             last_string, last_start, last_end, last_line = None, None, None, None
1580             for type, string, start, end, line in tokens:
1581                 if type == tokenize.OP and string == '[':
1582                     if last_string:
1583                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1584                         last_string = None
1585                     yield type, string, start, end, line
1586                     # everything inside brackets will be handled by _parse_filter
1587                     for type, string, start, end, line in tokens:
1588                         yield type, string, start, end, line
1589                         if type == tokenize.OP and string == ']':
1590                             break
1591                 elif type == tokenize.OP and string in ALLOWED_OPS:
1592                     if last_string:
1593                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1594                         last_string = None
1595                     yield type, string, start, end, line
1596                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1597                     if not last_string:
1598                         last_string = string
1599                         last_start = start
1600                         last_end = end
1601                     else:
1602                         last_string += string
1603             if last_string:
1604                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1605
1606         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1607             selectors = []
1608             current_selector = None
1609             for type, string, start, _, _ in tokens:
1610                 # ENCODING is only defined in python 3.x
1611                 if type == getattr(tokenize, 'ENCODING', None):
1612                     continue
1613                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1614                     current_selector = FormatSelector(SINGLE, string, [])
1615                 elif type == tokenize.OP:
1616                     if string == ')':
1617                         if not inside_group:
1618                             # ')' will be handled by the parentheses group
1619                             tokens.restore_last_token()
1620                         break
1621                     elif inside_merge and string in ['/', ',']:
1622                         tokens.restore_last_token()
1623                         break
1624                     elif inside_choice and string == ',':
1625                         tokens.restore_last_token()
1626                         break
1627                     elif string == ',':
1628                         if not current_selector:
1629                             raise syntax_error('"," must follow a format selector', start)
1630                         selectors.append(current_selector)
1631                         current_selector = None
1632                     elif string == '/':
1633                         if not current_selector:
1634                             raise syntax_error('"/" must follow a format selector', start)
1635                         first_choice = current_selector
1636                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1637                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1638                     elif string == '[':
1639                         if not current_selector:
1640                             current_selector = FormatSelector(SINGLE, 'best', [])
1641                         format_filter = _parse_filter(tokens)
1642                         current_selector.filters.append(format_filter)
1643                     elif string == '(':
1644                         if current_selector:
1645                             raise syntax_error('Unexpected "("', start)
1646                         group = _parse_format_selection(tokens, inside_group=True)
1647                         current_selector = FormatSelector(GROUP, group, [])
1648                     elif string == '+':
1649                         if not current_selector:
1650                             raise syntax_error('Unexpected "+"', start)
1651                         selector_1 = current_selector
1652                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1653                         if not selector_2:
1654                             raise syntax_error('Expected a selector', start)
1655                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1656                     else:
1657                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1658                 elif type == tokenize.ENDMARKER:
1659                     break
1660             if current_selector:
1661                 selectors.append(current_selector)
1662             return selectors
1663
1664         def _merge(formats_pair):
1665             format_1, format_2 = formats_pair
1666
1667             formats_info = []
1668             formats_info.extend(format_1.get('requested_formats', (format_1,)))
1669             formats_info.extend(format_2.get('requested_formats', (format_2,)))
1670
1671             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1672                 get_no_more = {"video": False, "audio": False}
1673                 for (i, fmt_info) in enumerate(formats_info):
1674                     for aud_vid in ["audio", "video"]:
1675                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1676                             if get_no_more[aud_vid]:
1677                                 formats_info.pop(i)
1678                             get_no_more[aud_vid] = True
1679
1680             if len(formats_info) == 1:
1681                 return formats_info[0]
1682
1683             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1684             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1685
1686             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1687             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1688
1689             output_ext = self.params.get('merge_output_format')
1690             if not output_ext:
1691                 if the_only_video:
1692                     output_ext = the_only_video['ext']
1693                 elif the_only_audio and not video_fmts:
1694                     output_ext = the_only_audio['ext']
1695                 else:
1696                     output_ext = 'mkv'
1697
1698             new_dict = {
1699                 'requested_formats': formats_info,
1700                 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1701                 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1702                 'ext': output_ext,
1703             }
1704
1705             if the_only_video:
1706                 new_dict.update({
1707                     'width': the_only_video.get('width'),
1708                     'height': the_only_video.get('height'),
1709                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1710                     'fps': the_only_video.get('fps'),
1711                     'vcodec': the_only_video.get('vcodec'),
1712                     'vbr': the_only_video.get('vbr'),
1713                     'stretched_ratio': the_only_video.get('stretched_ratio'),
1714                 })
1715
1716             if the_only_audio:
1717                 new_dict.update({
1718                     'acodec': the_only_audio.get('acodec'),
1719                     'abr': the_only_audio.get('abr'),
1720                 })
1721
1722             return new_dict
1723
1724         def _check_formats(formats):
1725             for f in formats:
1726                 self.to_screen('[info] Testing format %s' % f['format_id'])
1727                 paths = self.params.get('paths', {})
1728                 temp_file = os.path.join(
1729                     expand_path(paths.get('home', '').strip()),
1730                     expand_path(paths.get('temp', '').strip()),
1731                     'ytdl.%s.f%s.check-format' % (random_uuidv4(), f['format_id']))
1732                 try:
1733                     dl, _ = self.dl(temp_file, f, test=True)
1734                 except (ExtractorError, IOError, OSError, ValueError) + network_exceptions:
1735                     dl = False
1736                 finally:
1737                     if os.path.exists(temp_file):
1738                         os.remove(temp_file)
1739                 if dl:
1740                     yield f
1741                 else:
1742                     self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1743
1744         def _build_selector_function(selector):
1745             if isinstance(selector, list):  # ,
1746                 fs = [_build_selector_function(s) for s in selector]
1747
1748                 def selector_function(ctx):
1749                     for f in fs:
1750                         for format in f(ctx):
1751                             yield format
1752                 return selector_function
1753
1754             elif selector.type == GROUP:  # ()
1755                 selector_function = _build_selector_function(selector.selector)
1756
1757             elif selector.type == PICKFIRST:  # /
1758                 fs = [_build_selector_function(s) for s in selector.selector]
1759
1760                 def selector_function(ctx):
1761                     for f in fs:
1762                         picked_formats = list(f(ctx))
1763                         if picked_formats:
1764                             return picked_formats
1765                     return []
1766
1767             elif selector.type == SINGLE:  # atom
1768                 format_spec = selector.selector or 'best'
1769
1770                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
1771                 if format_spec == 'all':
1772                     def selector_function(ctx):
1773                         formats = list(ctx['formats'])
1774                         if check_formats:
1775                             formats = _check_formats(formats)
1776                         for f in formats:
1777                             yield f
1778                 elif format_spec == 'mergeall':
1779                     def selector_function(ctx):
1780                         formats = list(_check_formats(ctx['formats']))
1781                         if not formats:
1782                             return
1783                         merged_format = formats[-1]
1784                         for f in formats[-2::-1]:
1785                             merged_format = _merge((merged_format, f))
1786                         yield merged_format
1787
1788                 else:
1789                     format_fallback, format_reverse, format_idx = False, True, 1
1790                     mobj = re.match(
1791                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
1792                         format_spec)
1793                     if mobj is not None:
1794                         format_idx = int_or_none(mobj.group('n'), default=1)
1795                         format_reverse = mobj.group('bw')[0] == 'b'
1796                         format_type = (mobj.group('type') or [None])[0]
1797                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
1798                         format_modified = mobj.group('mod') is not None
1799
1800                         format_fallback = not format_type and not format_modified  # for b, w
1801                         filter_f = (
1802                             (lambda f: f.get('%scodec' % format_type) != 'none')
1803                             if format_type and format_modified  # bv*, ba*, wv*, wa*
1804                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
1805                             if format_type  # bv, ba, wv, wa
1806                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1807                             if not format_modified  # b, w
1808                             else None)  # b*, w*
1809                     else:
1810                         filter_f = ((lambda f: f.get('ext') == format_spec)
1811                                     if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']  # extension
1812                                     else (lambda f: f.get('format_id') == format_spec))  # id
1813
1814                     def selector_function(ctx):
1815                         formats = list(ctx['formats'])
1816                         if not formats:
1817                             return
1818                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
1819                         if format_fallback and ctx['incomplete_formats'] and not matches:
1820                             # for extractors with incomplete formats (audio only (soundcloud)
1821                             # or video only (imgur)) best/worst will fallback to
1822                             # best/worst {video,audio}-only format
1823                             matches = formats
1824                         if format_reverse:
1825                             matches = matches[::-1]
1826                         if check_formats:
1827                             matches = list(itertools.islice(_check_formats(matches), format_idx))
1828                         n = len(matches)
1829                         if -n <= format_idx - 1 < n:
1830                             yield matches[format_idx - 1]
1831
1832             elif selector.type == MERGE:        # +
1833                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1834
1835                 def selector_function(ctx):
1836                     for pair in itertools.product(
1837                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1838                         yield _merge(pair)
1839
1840             filters = [self._build_format_filter(f) for f in selector.filters]
1841
1842             def final_selector(ctx):
1843                 ctx_copy = copy.deepcopy(ctx)
1844                 for _filter in filters:
1845                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1846                 return selector_function(ctx_copy)
1847             return final_selector
1848
1849         stream = io.BytesIO(format_spec.encode('utf-8'))
1850         try:
1851             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1852         except tokenize.TokenError:
1853             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1854
1855         class TokenIterator(object):
1856             def __init__(self, tokens):
1857                 self.tokens = tokens
1858                 self.counter = 0
1859
1860             def __iter__(self):
1861                 return self
1862
1863             def __next__(self):
1864                 if self.counter >= len(self.tokens):
1865                     raise StopIteration()
1866                 value = self.tokens[self.counter]
1867                 self.counter += 1
1868                 return value
1869
1870             next = __next__
1871
1872             def restore_last_token(self):
1873                 self.counter -= 1
1874
1875         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1876         return _build_selector_function(parsed_selector)
1877
1878     def _calc_headers(self, info_dict):
1879         res = std_headers.copy()
1880
1881         add_headers = info_dict.get('http_headers')
1882         if add_headers:
1883             res.update(add_headers)
1884
1885         cookies = self._calc_cookies(info_dict)
1886         if cookies:
1887             res['Cookie'] = cookies
1888
1889         if 'X-Forwarded-For' not in res:
1890             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1891             if x_forwarded_for_ip:
1892                 res['X-Forwarded-For'] = x_forwarded_for_ip
1893
1894         return res
1895
1896     def _calc_cookies(self, info_dict):
1897         pr = sanitized_Request(info_dict['url'])
1898         self.cookiejar.add_cookie_header(pr)
1899         return pr.get_header('Cookie')
1900
1901     def process_video_result(self, info_dict, download=True):
1902         assert info_dict.get('_type', 'video') == 'video'
1903
1904         if 'id' not in info_dict:
1905             raise ExtractorError('Missing "id" field in extractor result')
1906         if 'title' not in info_dict:
1907             raise ExtractorError('Missing "title" field in extractor result')
1908
1909         def report_force_conversion(field, field_not, conversion):
1910             self.report_warning(
1911                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1912                 % (field, field_not, conversion))
1913
1914         def sanitize_string_field(info, string_field):
1915             field = info.get(string_field)
1916             if field is None or isinstance(field, compat_str):
1917                 return
1918             report_force_conversion(string_field, 'a string', 'string')
1919             info[string_field] = compat_str(field)
1920
1921         def sanitize_numeric_fields(info):
1922             for numeric_field in self._NUMERIC_FIELDS:
1923                 field = info.get(numeric_field)
1924                 if field is None or isinstance(field, compat_numeric_types):
1925                     continue
1926                 report_force_conversion(numeric_field, 'numeric', 'int')
1927                 info[numeric_field] = int_or_none(field)
1928
1929         sanitize_string_field(info_dict, 'id')
1930         sanitize_numeric_fields(info_dict)
1931
1932         if 'playlist' not in info_dict:
1933             # It isn't part of a playlist
1934             info_dict['playlist'] = None
1935             info_dict['playlist_index'] = None
1936
1937         thumbnails = info_dict.get('thumbnails')
1938         if thumbnails is None:
1939             thumbnail = info_dict.get('thumbnail')
1940             if thumbnail:
1941                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1942         if thumbnails:
1943             thumbnails.sort(key=lambda t: (
1944                 t.get('preference') if t.get('preference') is not None else -1,
1945                 t.get('width') if t.get('width') is not None else -1,
1946                 t.get('height') if t.get('height') is not None else -1,
1947                 t.get('id') if t.get('id') is not None else '',
1948                 t.get('url')))
1949             for i, t in enumerate(thumbnails):
1950                 t['url'] = sanitize_url(t['url'])
1951                 if t.get('width') and t.get('height'):
1952                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1953                 if t.get('id') is None:
1954                     t['id'] = '%d' % i
1955
1956         if self.params.get('list_thumbnails'):
1957             self.list_thumbnails(info_dict)
1958             return
1959
1960         thumbnail = info_dict.get('thumbnail')
1961         if thumbnail:
1962             info_dict['thumbnail'] = sanitize_url(thumbnail)
1963         elif thumbnails:
1964             info_dict['thumbnail'] = thumbnails[-1]['url']
1965
1966         if 'display_id' not in info_dict and 'id' in info_dict:
1967             info_dict['display_id'] = info_dict['id']
1968
1969         for ts_key, date_key in (
1970                 ('timestamp', 'upload_date'),
1971                 ('release_timestamp', 'release_date'),
1972         ):
1973             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
1974                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1975                 # see http://bugs.python.org/issue1646728)
1976                 try:
1977                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
1978                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
1979                 except (ValueError, OverflowError, OSError):
1980                     pass
1981
1982         # Auto generate title fields corresponding to the *_number fields when missing
1983         # in order to always have clean titles. This is very common for TV series.
1984         for field in ('chapter', 'season', 'episode'):
1985             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1986                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1987
1988         for cc_kind in ('subtitles', 'automatic_captions'):
1989             cc = info_dict.get(cc_kind)
1990             if cc:
1991                 for _, subtitle in cc.items():
1992                     for subtitle_format in subtitle:
1993                         if subtitle_format.get('url'):
1994                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1995                         if subtitle_format.get('ext') is None:
1996                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1997
1998         automatic_captions = info_dict.get('automatic_captions')
1999         subtitles = info_dict.get('subtitles')
2000
2001         if self.params.get('listsubtitles', False):
2002             if 'automatic_captions' in info_dict:
2003                 self.list_subtitles(
2004                     info_dict['id'], automatic_captions, 'automatic captions')
2005             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2006             return
2007
2008         info_dict['requested_subtitles'] = self.process_subtitles(
2009             info_dict['id'], subtitles, automatic_captions)
2010
2011         # We now pick which formats have to be downloaded
2012         if info_dict.get('formats') is None:
2013             # There's only one format available
2014             formats = [info_dict]
2015         else:
2016             formats = info_dict['formats']
2017
2018         if not formats:
2019             if not self.params.get('ignore_no_formats_error'):
2020                 raise ExtractorError('No video formats found!')
2021             else:
2022                 self.report_warning('No video formats found!')
2023
2024         def is_wellformed(f):
2025             url = f.get('url')
2026             if not url:
2027                 self.report_warning(
2028                     '"url" field is missing or empty - skipping format, '
2029                     'there is an error in extractor')
2030                 return False
2031             if isinstance(url, bytes):
2032                 sanitize_string_field(f, 'url')
2033             return True
2034
2035         # Filter out malformed formats for better extraction robustness
2036         formats = list(filter(is_wellformed, formats))
2037
2038         formats_dict = {}
2039
2040         # We check that all the formats have the format and format_id fields
2041         for i, format in enumerate(formats):
2042             sanitize_string_field(format, 'format_id')
2043             sanitize_numeric_fields(format)
2044             format['url'] = sanitize_url(format['url'])
2045             if not format.get('format_id'):
2046                 format['format_id'] = compat_str(i)
2047             else:
2048                 # Sanitize format_id from characters used in format selector expression
2049                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2050             format_id = format['format_id']
2051             if format_id not in formats_dict:
2052                 formats_dict[format_id] = []
2053             formats_dict[format_id].append(format)
2054
2055         # Make sure all formats have unique format_id
2056         for format_id, ambiguous_formats in formats_dict.items():
2057             if len(ambiguous_formats) > 1:
2058                 for i, format in enumerate(ambiguous_formats):
2059                     format['format_id'] = '%s-%d' % (format_id, i)
2060
2061         for i, format in enumerate(formats):
2062             if format.get('format') is None:
2063                 format['format'] = '{id} - {res}{note}'.format(
2064                     id=format['format_id'],
2065                     res=self.format_resolution(format),
2066                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
2067                 )
2068             # Automatically determine file extension if missing
2069             if format.get('ext') is None:
2070                 format['ext'] = determine_ext(format['url']).lower()
2071             # Automatically determine protocol if missing (useful for format
2072             # selection purposes)
2073             if format.get('protocol') is None:
2074                 format['protocol'] = determine_protocol(format)
2075             # Add HTTP headers, so that external programs can use them from the
2076             # json output
2077             full_format_info = info_dict.copy()
2078             full_format_info.update(format)
2079             format['http_headers'] = self._calc_headers(full_format_info)
2080         # Remove private housekeeping stuff
2081         if '__x_forwarded_for_ip' in info_dict:
2082             del info_dict['__x_forwarded_for_ip']
2083
2084         # TODO Central sorting goes here
2085
2086         if formats and formats[0] is not info_dict:
2087             # only set the 'formats' fields if the original info_dict list them
2088             # otherwise we end up with a circular reference, the first (and unique)
2089             # element in the 'formats' field in info_dict is info_dict itself,
2090             # which can't be exported to json
2091             info_dict['formats'] = formats
2092         if self.params.get('listformats'):
2093             if not info_dict.get('formats'):
2094                 raise ExtractorError('No video formats found', expected=True)
2095             self.list_formats(info_dict)
2096             return
2097
2098         req_format = self.params.get('format')
2099         if req_format is None:
2100             req_format = self._default_format_spec(info_dict, download=download)
2101             self.write_debug('Default format spec: %s' % req_format)
2102
2103         format_selector = self.build_format_selector(req_format)
2104
2105         # While in format selection we may need to have an access to the original
2106         # format set in order to calculate some metrics or do some processing.
2107         # For now we need to be able to guess whether original formats provided
2108         # by extractor are incomplete or not (i.e. whether extractor provides only
2109         # video-only or audio-only formats) for proper formats selection for
2110         # extractors with such incomplete formats (see
2111         # https://github.com/ytdl-org/youtube-dl/pull/5556).
2112         # Since formats may be filtered during format selection and may not match
2113         # the original formats the results may be incorrect. Thus original formats
2114         # or pre-calculated metrics should be passed to format selection routines
2115         # as well.
2116         # We will pass a context object containing all necessary additional data
2117         # instead of just formats.
2118         # This fixes incorrect format selection issue (see
2119         # https://github.com/ytdl-org/youtube-dl/issues/10083).
2120         incomplete_formats = (
2121             # All formats are video-only or
2122             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2123             # all formats are audio-only
2124             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2125
2126         ctx = {
2127             'formats': formats,
2128             'incomplete_formats': incomplete_formats,
2129         }
2130
2131         formats_to_download = list(format_selector(ctx))
2132         if not formats_to_download:
2133             if not self.params.get('ignore_no_formats_error'):
2134                 raise ExtractorError('Requested format is not available', expected=True)
2135             else:
2136                 self.report_warning('Requested format is not available')
2137         elif download:
2138             self.to_screen(
2139                 '[info] %s: Downloading %d format(s): %s' % (
2140                     info_dict['id'], len(formats_to_download),
2141                     ", ".join([f['format_id'] for f in formats_to_download])))
2142             for fmt in formats_to_download:
2143                 new_info = dict(info_dict)
2144                 new_info.update(fmt)
2145                 self.process_info(new_info)
2146         # We update the info dict with the best quality format (backwards compatibility)
2147         if formats_to_download:
2148             info_dict.update(formats_to_download[-1])
2149         return info_dict
2150
2151     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2152         """Select the requested subtitles and their format"""
2153         available_subs = {}
2154         if normal_subtitles and self.params.get('writesubtitles'):
2155             available_subs.update(normal_subtitles)
2156         if automatic_captions and self.params.get('writeautomaticsub'):
2157             for lang, cap_info in automatic_captions.items():
2158                 if lang not in available_subs:
2159                     available_subs[lang] = cap_info
2160
2161         if (not self.params.get('writesubtitles') and not
2162                 self.params.get('writeautomaticsub') or not
2163                 available_subs):
2164             return None
2165
2166         all_sub_langs = available_subs.keys()
2167         if self.params.get('allsubtitles', False):
2168             requested_langs = all_sub_langs
2169         elif self.params.get('subtitleslangs', False):
2170             requested_langs = set()
2171             for lang in self.params.get('subtitleslangs'):
2172                 if lang == 'all':
2173                     requested_langs.update(all_sub_langs)
2174                     continue
2175                 discard = lang[0] == '-'
2176                 if discard:
2177                     lang = lang[1:]
2178                 current_langs = filter(re.compile(lang + '$').match, all_sub_langs)
2179                 if discard:
2180                     for lang in current_langs:
2181                         requested_langs.discard(lang)
2182                 else:
2183                     requested_langs.update(current_langs)
2184         elif 'en' in available_subs:
2185             requested_langs = ['en']
2186         else:
2187             requested_langs = [list(all_sub_langs)[0]]
2188         self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2189
2190         formats_query = self.params.get('subtitlesformat', 'best')
2191         formats_preference = formats_query.split('/') if formats_query else []
2192         subs = {}
2193         for lang in requested_langs:
2194             formats = available_subs.get(lang)
2195             if formats is None:
2196                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2197                 continue
2198             for ext in formats_preference:
2199                 if ext == 'best':
2200                     f = formats[-1]
2201                     break
2202                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2203                 if matches:
2204                     f = matches[-1]
2205                     break
2206             else:
2207                 f = formats[-1]
2208                 self.report_warning(
2209                     'No subtitle format found matching "%s" for language %s, '
2210                     'using %s' % (formats_query, lang, f['ext']))
2211             subs[lang] = f
2212         return subs
2213
2214     def __forced_printings(self, info_dict, filename, incomplete):
2215         def print_mandatory(field, actual_field=None):
2216             if actual_field is None:
2217                 actual_field = field
2218             if (self.params.get('force%s' % field, False)
2219                     and (not incomplete or info_dict.get(actual_field) is not None)):
2220                 self.to_stdout(info_dict[actual_field])
2221
2222         def print_optional(field):
2223             if (self.params.get('force%s' % field, False)
2224                     and info_dict.get(field) is not None):
2225                 self.to_stdout(info_dict[field])
2226
2227         info_dict = info_dict.copy()
2228         if filename is not None:
2229             info_dict['filename'] = filename
2230         if info_dict.get('requested_formats') is not None:
2231             # For RTMP URLs, also include the playpath
2232             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2233         elif 'url' in info_dict:
2234             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2235
2236         for tmpl in self.params.get('forceprint', []):
2237             if re.match(r'\w+$', tmpl):
2238                 tmpl = '%({})s'.format(tmpl)
2239             tmpl, info_copy = self.prepare_outtmpl(tmpl, info_dict)
2240             self.to_stdout(tmpl % info_copy)
2241
2242         print_mandatory('title')
2243         print_mandatory('id')
2244         print_mandatory('url', 'urls')
2245         print_optional('thumbnail')
2246         print_optional('description')
2247         print_optional('filename')
2248         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
2249             self.to_stdout(formatSeconds(info_dict['duration']))
2250         print_mandatory('format')
2251
2252         if self.params.get('forcejson', False):
2253             self.post_extract(info_dict)
2254             self.to_stdout(json.dumps(info_dict, default=repr))
2255
2256     def dl(self, name, info, subtitle=False, test=False):
2257
2258         if test:
2259             verbose = self.params.get('verbose')
2260             params = {
2261                 'test': True,
2262                 'quiet': not verbose,
2263                 'verbose': verbose,
2264                 'noprogress': not verbose,
2265                 'nopart': True,
2266                 'skip_unavailable_fragments': False,
2267                 'keep_fragments': False,
2268                 'overwrites': True,
2269                 '_no_ytdl_file': True,
2270             }
2271         else:
2272             params = self.params
2273         fd = get_suitable_downloader(info, params)(self, params)
2274         if not test:
2275             for ph in self._progress_hooks:
2276                 fd.add_progress_hook(ph)
2277             self.write_debug('Invoking downloader on %r' % info.get('url'))
2278         new_info = dict(info)
2279         if new_info.get('http_headers') is None:
2280             new_info['http_headers'] = self._calc_headers(new_info)
2281         return fd.download(name, new_info, subtitle)
2282
2283     def process_info(self, info_dict):
2284         """Process a single resolved IE result."""
2285
2286         assert info_dict.get('_type', 'video') == 'video'
2287
2288         info_dict.setdefault('__postprocessors', [])
2289
2290         max_downloads = self.params.get('max_downloads')
2291         if max_downloads is not None:
2292             if self._num_downloads >= int(max_downloads):
2293                 raise MaxDownloadsReached()
2294
2295         # TODO: backward compatibility, to be removed
2296         info_dict['fulltitle'] = info_dict['title']
2297
2298         if 'format' not in info_dict:
2299             info_dict['format'] = info_dict['ext']
2300
2301         if self._match_entry(info_dict, incomplete=False) is not None:
2302             return
2303
2304         self.post_extract(info_dict)
2305         self._num_downloads += 1
2306
2307         info_dict, _ = self.pre_process(info_dict)
2308
2309         # info_dict['_filename'] needs to be set for backward compatibility
2310         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2311         temp_filename = self.prepare_filename(info_dict, 'temp')
2312         files_to_move = {}
2313
2314         # Forced printings
2315         self.__forced_printings(info_dict, full_filename, incomplete=False)
2316
2317         if self.params.get('simulate', False):
2318             if self.params.get('force_write_download_archive', False):
2319                 self.record_download_archive(info_dict)
2320
2321             # Do nothing else if in simulate mode
2322             return
2323
2324         if full_filename is None:
2325             return
2326
2327         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2328             return
2329         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2330             return
2331
2332         if self.params.get('writedescription', False):
2333             descfn = self.prepare_filename(info_dict, 'description')
2334             if not self._ensure_dir_exists(encodeFilename(descfn)):
2335                 return
2336             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
2337                 self.to_screen('[info] Video description is already present')
2338             elif info_dict.get('description') is None:
2339                 self.report_warning('There\'s no description to write.')
2340             else:
2341                 try:
2342                     self.to_screen('[info] Writing video description to: ' + descfn)
2343                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
2344                         descfile.write(info_dict['description'])
2345                 except (OSError, IOError):
2346                     self.report_error('Cannot write description file ' + descfn)
2347                     return
2348
2349         if self.params.get('writeannotations', False):
2350             annofn = self.prepare_filename(info_dict, 'annotation')
2351             if not self._ensure_dir_exists(encodeFilename(annofn)):
2352                 return
2353             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2354                 self.to_screen('[info] Video annotations are already present')
2355             elif not info_dict.get('annotations'):
2356                 self.report_warning('There are no annotations to write.')
2357             else:
2358                 try:
2359                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2360                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2361                         annofile.write(info_dict['annotations'])
2362                 except (KeyError, TypeError):
2363                     self.report_warning('There are no annotations to write.')
2364                 except (OSError, IOError):
2365                     self.report_error('Cannot write annotations file: ' + annofn)
2366                     return
2367
2368         subtitles_are_requested = any([self.params.get('writesubtitles', False),
2369                                        self.params.get('writeautomaticsub')])
2370
2371         if subtitles_are_requested and info_dict.get('requested_subtitles'):
2372             # subtitles download errors are already managed as troubles in relevant IE
2373             # that way it will silently go on when used with unsupporting IE
2374             subtitles = info_dict['requested_subtitles']
2375             # ie = self.get_info_extractor(info_dict['extractor_key'])
2376             for sub_lang, sub_info in subtitles.items():
2377                 sub_format = sub_info['ext']
2378                 sub_filename = subtitles_filename(temp_filename, sub_lang, sub_format, info_dict.get('ext'))
2379                 sub_filename_final = subtitles_filename(
2380                     self.prepare_filename(info_dict, 'subtitle'), sub_lang, sub_format, info_dict.get('ext'))
2381                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
2382                     self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
2383                     sub_info['filepath'] = sub_filename
2384                     files_to_move[sub_filename] = sub_filename_final
2385                 else:
2386                     self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
2387                     if sub_info.get('data') is not None:
2388                         try:
2389                             # Use newline='' to prevent conversion of newline characters
2390                             # See https://github.com/ytdl-org/youtube-dl/issues/10268
2391                             with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
2392                                 subfile.write(sub_info['data'])
2393                             sub_info['filepath'] = sub_filename
2394                             files_to_move[sub_filename] = sub_filename_final
2395                         except (OSError, IOError):
2396                             self.report_error('Cannot write subtitles file ' + sub_filename)
2397                             return
2398                     else:
2399                         try:
2400                             self.dl(sub_filename, sub_info.copy(), subtitle=True)
2401                             sub_info['filepath'] = sub_filename
2402                             files_to_move[sub_filename] = sub_filename_final
2403                         except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
2404                             self.report_warning('Unable to download subtitle for "%s": %s' %
2405                                                 (sub_lang, error_to_compat_str(err)))
2406                             continue
2407
2408         if self.params.get('writeinfojson', False):
2409             infofn = self.prepare_filename(info_dict, 'infojson')
2410             if not self._ensure_dir_exists(encodeFilename(infofn)):
2411                 return
2412             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
2413                 self.to_screen('[info] Video metadata is already present')
2414             else:
2415                 self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
2416                 try:
2417                     write_json_file(self.filter_requested_info(info_dict, self.params.get('clean_infojson', True)), infofn)
2418                 except (OSError, IOError):
2419                     self.report_error('Cannot write video metadata to JSON file ' + infofn)
2420                     return
2421             info_dict['__infojson_filename'] = infofn
2422
2423         for thumb_ext in self._write_thumbnails(info_dict, temp_filename):
2424             thumb_filename_temp = replace_extension(temp_filename, thumb_ext, info_dict.get('ext'))
2425             thumb_filename = replace_extension(
2426                 self.prepare_filename(info_dict, 'thumbnail'), thumb_ext, info_dict.get('ext'))
2427             files_to_move[thumb_filename_temp] = thumb_filename
2428
2429         # Write internet shortcut files
2430         url_link = webloc_link = desktop_link = False
2431         if self.params.get('writelink', False):
2432             if sys.platform == "darwin":  # macOS.
2433                 webloc_link = True
2434             elif sys.platform.startswith("linux"):
2435                 desktop_link = True
2436             else:  # if sys.platform in ['win32', 'cygwin']:
2437                 url_link = True
2438         if self.params.get('writeurllink', False):
2439             url_link = True
2440         if self.params.get('writewebloclink', False):
2441             webloc_link = True
2442         if self.params.get('writedesktoplink', False):
2443             desktop_link = True
2444
2445         if url_link or webloc_link or desktop_link:
2446             if 'webpage_url' not in info_dict:
2447                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2448                 return
2449             ascii_url = iri_to_uri(info_dict['webpage_url'])
2450
2451         def _write_link_file(extension, template, newline, embed_filename):
2452             linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
2453             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2454                 self.to_screen('[info] Internet shortcut is already present')
2455             else:
2456                 try:
2457                     self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2458                     with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2459                         template_vars = {'url': ascii_url}
2460                         if embed_filename:
2461                             template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2462                         linkfile.write(template % template_vars)
2463                 except (OSError, IOError):
2464                     self.report_error('Cannot write internet shortcut ' + linkfn)
2465                     return False
2466             return True
2467
2468         if url_link:
2469             if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2470                 return
2471         if webloc_link:
2472             if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2473                 return
2474         if desktop_link:
2475             if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2476                 return
2477
2478         try:
2479             info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2480         except PostProcessingError as err:
2481             self.report_error('Preprocessing: %s' % str(err))
2482             return
2483
2484         must_record_download_archive = False
2485         if self.params.get('skip_download', False):
2486             info_dict['filepath'] = temp_filename
2487             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2488             info_dict['__files_to_move'] = files_to_move
2489             info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2490         else:
2491             # Download
2492             try:
2493
2494                 def existing_file(*filepaths):
2495                     ext = info_dict.get('ext')
2496                     final_ext = self.params.get('final_ext', ext)
2497                     existing_files = []
2498                     for file in orderedSet(filepaths):
2499                         if final_ext != ext:
2500                             converted = replace_extension(file, final_ext, ext)
2501                             if os.path.exists(encodeFilename(converted)):
2502                                 existing_files.append(converted)
2503                         if os.path.exists(encodeFilename(file)):
2504                             existing_files.append(file)
2505
2506                     if not existing_files or self.params.get('overwrites', False):
2507                         for file in orderedSet(existing_files):
2508                             self.report_file_delete(file)
2509                             os.remove(encodeFilename(file))
2510                         return None
2511
2512                     self.report_file_already_downloaded(existing_files[0])
2513                     info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2514                     return existing_files[0]
2515
2516                 success = True
2517                 if info_dict.get('requested_formats') is not None:
2518                     downloaded = []
2519                     merger = FFmpegMergerPP(self)
2520                     if self.params.get('allow_unplayable_formats'):
2521                         self.report_warning(
2522                             'You have requested merging of multiple formats '
2523                             'while also allowing unplayable formats to be downloaded. '
2524                             'The formats won\'t be merged to prevent data corruption.')
2525                     elif not merger.available:
2526                         self.report_warning(
2527                             'You have requested merging of multiple formats but ffmpeg is not installed. '
2528                             'The formats won\'t be merged.')
2529
2530                     def compatible_formats(formats):
2531                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2532                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2533                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2534                         if len(video_formats) > 2 or len(audio_formats) > 2:
2535                             return False
2536
2537                         # Check extension
2538                         exts = set(format.get('ext') for format in formats)
2539                         COMPATIBLE_EXTS = (
2540                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2541                             set(('webm',)),
2542                         )
2543                         for ext_sets in COMPATIBLE_EXTS:
2544                             if ext_sets.issuperset(exts):
2545                                 return True
2546                         # TODO: Check acodec/vcodec
2547                         return False
2548
2549                     requested_formats = info_dict['requested_formats']
2550                     old_ext = info_dict['ext']
2551                     if self.params.get('merge_output_format') is None:
2552                         if not compatible_formats(requested_formats):
2553                             info_dict['ext'] = 'mkv'
2554                             self.report_warning(
2555                                 'Requested formats are incompatible for merge and will be merged into mkv.')
2556                         if (info_dict['ext'] == 'webm'
2557                                 and self.params.get('writethumbnail', False)
2558                                 and info_dict.get('thumbnails')):
2559                             info_dict['ext'] = 'mkv'
2560                             self.report_warning(
2561                                 'webm doesn\'t support embedding a thumbnail, mkv will be used.')
2562
2563                     def correct_ext(filename):
2564                         filename_real_ext = os.path.splitext(filename)[1][1:]
2565                         filename_wo_ext = (
2566                             os.path.splitext(filename)[0]
2567                             if filename_real_ext == old_ext
2568                             else filename)
2569                         return '%s.%s' % (filename_wo_ext, info_dict['ext'])
2570
2571                     # Ensure filename always has a correct extension for successful merge
2572                     full_filename = correct_ext(full_filename)
2573                     temp_filename = correct_ext(temp_filename)
2574                     dl_filename = existing_file(full_filename, temp_filename)
2575                     info_dict['__real_download'] = False
2576                     if dl_filename is None:
2577                         for f in requested_formats:
2578                             new_info = dict(info_dict)
2579                             new_info.update(f)
2580                             fname = prepend_extension(
2581                                 self.prepare_filename(new_info, 'temp'),
2582                                 'f%s' % f['format_id'], new_info['ext'])
2583                             if not self._ensure_dir_exists(fname):
2584                                 return
2585                             downloaded.append(fname)
2586                             partial_success, real_download = self.dl(fname, new_info)
2587                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
2588                             success = success and partial_success
2589                         if merger.available and not self.params.get('allow_unplayable_formats'):
2590                             info_dict['__postprocessors'].append(merger)
2591                             info_dict['__files_to_merge'] = downloaded
2592                             # Even if there were no downloads, it is being merged only now
2593                             info_dict['__real_download'] = True
2594                         else:
2595                             for file in downloaded:
2596                                 files_to_move[file] = None
2597                 else:
2598                     # Just a single file
2599                     dl_filename = existing_file(full_filename, temp_filename)
2600                     if dl_filename is None:
2601                         success, real_download = self.dl(temp_filename, info_dict)
2602                         info_dict['__real_download'] = real_download
2603
2604                 dl_filename = dl_filename or temp_filename
2605                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2606
2607             except network_exceptions as err:
2608                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2609                 return
2610             except (OSError, IOError) as err:
2611                 raise UnavailableVideoError(err)
2612             except (ContentTooShortError, ) as err:
2613                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2614                 return
2615
2616             if success and full_filename != '-':
2617                 # Fixup content
2618                 fixup_policy = self.params.get('fixup')
2619                 if fixup_policy is None:
2620                     fixup_policy = 'detect_or_warn'
2621
2622                 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg to fix this automatically.'
2623
2624                 stretched_ratio = info_dict.get('stretched_ratio')
2625                 if stretched_ratio is not None and stretched_ratio != 1:
2626                     if fixup_policy == 'warn':
2627                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2628                             info_dict['id'], stretched_ratio))
2629                     elif fixup_policy == 'detect_or_warn':
2630                         stretched_pp = FFmpegFixupStretchedPP(self)
2631                         if stretched_pp.available:
2632                             info_dict['__postprocessors'].append(stretched_pp)
2633                         else:
2634                             self.report_warning(
2635                                 '%s: Non-uniform pixel ratio (%s). %s'
2636                                 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
2637                     else:
2638                         assert fixup_policy in ('ignore', 'never')
2639
2640                 if (info_dict.get('requested_formats') is None
2641                         and info_dict.get('container') == 'm4a_dash'
2642                         and info_dict.get('ext') == 'm4a'):
2643                     if fixup_policy == 'warn':
2644                         self.report_warning(
2645                             '%s: writing DASH m4a. '
2646                             'Only some players support this container.'
2647                             % info_dict['id'])
2648                     elif fixup_policy == 'detect_or_warn':
2649                         fixup_pp = FFmpegFixupM4aPP(self)
2650                         if fixup_pp.available:
2651                             info_dict['__postprocessors'].append(fixup_pp)
2652                         else:
2653                             self.report_warning(
2654                                 '%s: writing DASH m4a. '
2655                                 'Only some players support this container. %s'
2656                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2657                     else:
2658                         assert fixup_policy in ('ignore', 'never')
2659
2660                 if ('protocol' in info_dict
2661                         and get_suitable_downloader(info_dict, self.params).__name__ == 'HlsFD'):
2662                     if fixup_policy == 'warn':
2663                         self.report_warning('%s: malformed AAC bitstream detected.' % (
2664                             info_dict['id']))
2665                     elif fixup_policy == 'detect_or_warn':
2666                         fixup_pp = FFmpegFixupM3u8PP(self)
2667                         if fixup_pp.available:
2668                             info_dict['__postprocessors'].append(fixup_pp)
2669                         else:
2670                             self.report_warning(
2671                                 '%s: malformed AAC bitstream detected. %s'
2672                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2673                     else:
2674                         assert fixup_policy in ('ignore', 'never')
2675
2676                 try:
2677                     info_dict = self.post_process(dl_filename, info_dict, files_to_move)
2678                 except PostProcessingError as err:
2679                     self.report_error('Postprocessing: %s' % str(err))
2680                     return
2681                 try:
2682                     for ph in self._post_hooks:
2683                         ph(info_dict['filepath'])
2684                 except Exception as err:
2685                     self.report_error('post hooks: %s' % str(err))
2686                     return
2687                 must_record_download_archive = True
2688
2689         if must_record_download_archive or self.params.get('force_write_download_archive', False):
2690             self.record_download_archive(info_dict)
2691         max_downloads = self.params.get('max_downloads')
2692         if max_downloads is not None and self._num_downloads >= int(max_downloads):
2693             raise MaxDownloadsReached()
2694
2695     def download(self, url_list):
2696         """Download a given list of URLs."""
2697         outtmpl = self.outtmpl_dict['default']
2698         if (len(url_list) > 1
2699                 and outtmpl != '-'
2700                 and '%' not in outtmpl
2701                 and self.params.get('max_downloads') != 1):
2702             raise SameFileError(outtmpl)
2703
2704         for url in url_list:
2705             try:
2706                 # It also downloads the videos
2707                 res = self.extract_info(
2708                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2709             except UnavailableVideoError:
2710                 self.report_error('unable to download video')
2711             except MaxDownloadsReached:
2712                 self.to_screen('[info] Maximum number of downloaded files reached')
2713                 raise
2714             except ExistingVideoReached:
2715                 self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
2716                 raise
2717             except RejectedVideoReached:
2718                 self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
2719                 raise
2720             else:
2721                 if self.params.get('dump_single_json', False):
2722                     self.post_extract(res)
2723                     self.to_stdout(json.dumps(res, default=repr))
2724
2725         return self._download_retcode
2726
2727     def download_with_info_file(self, info_filename):
2728         with contextlib.closing(fileinput.FileInput(
2729                 [info_filename], mode='r',
2730                 openhook=fileinput.hook_encoded('utf-8'))) as f:
2731             # FileInput doesn't have a read method, we can't call json.load
2732             info = self.filter_requested_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
2733         try:
2734             self.process_ie_result(info, download=True)
2735         except (DownloadError, EntryNotInPlaylist):
2736             webpage_url = info.get('webpage_url')
2737             if webpage_url is not None:
2738                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2739                 return self.download([webpage_url])
2740             else:
2741                 raise
2742         return self._download_retcode
2743
2744     @staticmethod
2745     def filter_requested_info(info_dict, actually_filter=True):
2746         if not actually_filter:
2747             info_dict['epoch'] = int(time.time())
2748             return info_dict
2749         exceptions = {
2750             'remove': ['requested_formats', 'requested_subtitles', 'requested_entries', 'filepath', 'entries'],
2751             'keep': ['_type'],
2752         }
2753         keep_key = lambda k: k in exceptions['keep'] or not (k.startswith('_') or k in exceptions['remove'])
2754         filter_fn = lambda obj: (
2755             list(map(filter_fn, obj)) if isinstance(obj, (list, tuple))
2756             else obj if not isinstance(obj, dict)
2757             else dict((k, filter_fn(v)) for k, v in obj.items() if keep_key(k)))
2758         return filter_fn(info_dict)
2759
2760     def run_pp(self, pp, infodict):
2761         files_to_delete = []
2762         if '__files_to_move' not in infodict:
2763             infodict['__files_to_move'] = {}
2764         files_to_delete, infodict = pp.run(infodict)
2765         if not files_to_delete:
2766             return infodict
2767
2768         if self.params.get('keepvideo', False):
2769             for f in files_to_delete:
2770                 infodict['__files_to_move'].setdefault(f, '')
2771         else:
2772             for old_filename in set(files_to_delete):
2773                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2774                 try:
2775                     os.remove(encodeFilename(old_filename))
2776                 except (IOError, OSError):
2777                     self.report_warning('Unable to remove downloaded original file')
2778                 if old_filename in infodict['__files_to_move']:
2779                     del infodict['__files_to_move'][old_filename]
2780         return infodict
2781
2782     @staticmethod
2783     def post_extract(info_dict):
2784         def actual_post_extract(info_dict):
2785             if info_dict.get('_type') in ('playlist', 'multi_video'):
2786                 for video_dict in info_dict.get('entries', {}):
2787                     actual_post_extract(video_dict or {})
2788                 return
2789
2790             post_extractor = info_dict.get('__post_extractor') or (lambda: {})
2791             info_dict.update(post_extractor().items())
2792             info_dict.pop('__post_extractor', None)
2793
2794         actual_post_extract(info_dict or {})
2795
2796     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
2797         info = dict(ie_info)
2798         info['__files_to_move'] = files_to_move or {}
2799         for pp in self._pps[key]:
2800             info = self.run_pp(pp, info)
2801         return info, info.pop('__files_to_move', None)
2802
2803     def post_process(self, filename, ie_info, files_to_move=None):
2804         """Run all the postprocessors on the given file."""
2805         info = dict(ie_info)
2806         info['filepath'] = filename
2807         info['__files_to_move'] = files_to_move or {}
2808
2809         for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
2810             info = self.run_pp(pp, info)
2811         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
2812         del info['__files_to_move']
2813         for pp in self._pps['after_move']:
2814             info = self.run_pp(pp, info)
2815         return info
2816
2817     def _make_archive_id(self, info_dict):
2818         video_id = info_dict.get('id')
2819         if not video_id:
2820             return
2821         # Future-proof against any change in case
2822         # and backwards compatibility with prior versions
2823         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
2824         if extractor is None:
2825             url = str_or_none(info_dict.get('url'))
2826             if not url:
2827                 return
2828             # Try to find matching extractor for the URL and take its ie_key
2829             for ie in self._ies:
2830                 if ie.suitable(url):
2831                     extractor = ie.ie_key()
2832                     break
2833             else:
2834                 return
2835         return '%s %s' % (extractor.lower(), video_id)
2836
2837     def in_download_archive(self, info_dict):
2838         fn = self.params.get('download_archive')
2839         if fn is None:
2840             return False
2841
2842         vid_id = self._make_archive_id(info_dict)
2843         if not vid_id:
2844             return False  # Incomplete video information
2845
2846         return vid_id in self.archive
2847
2848     def record_download_archive(self, info_dict):
2849         fn = self.params.get('download_archive')
2850         if fn is None:
2851             return
2852         vid_id = self._make_archive_id(info_dict)
2853         assert vid_id
2854         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
2855             archive_file.write(vid_id + '\n')
2856         self.archive.add(vid_id)
2857
2858     @staticmethod
2859     def format_resolution(format, default='unknown'):
2860         if format.get('vcodec') == 'none':
2861             return 'audio only'
2862         if format.get('resolution') is not None:
2863             return format['resolution']
2864         if format.get('width') and format.get('height'):
2865             res = '%dx%d' % (format['width'], format['height'])
2866         elif format.get('height'):
2867             res = '%sp' % format['height']
2868         elif format.get('width'):
2869             res = '%dx?' % format['width']
2870         else:
2871             res = default
2872         return res
2873
2874     def _format_note(self, fdict):
2875         res = ''
2876         if fdict.get('ext') in ['f4f', 'f4m']:
2877             res += '(unsupported) '
2878         if fdict.get('language'):
2879             if res:
2880                 res += ' '
2881             res += '[%s] ' % fdict['language']
2882         if fdict.get('format_note') is not None:
2883             res += fdict['format_note'] + ' '
2884         if fdict.get('tbr') is not None:
2885             res += '%4dk ' % fdict['tbr']
2886         if fdict.get('container') is not None:
2887             if res:
2888                 res += ', '
2889             res += '%s container' % fdict['container']
2890         if (fdict.get('vcodec') is not None
2891                 and fdict.get('vcodec') != 'none'):
2892             if res:
2893                 res += ', '
2894             res += fdict['vcodec']
2895             if fdict.get('vbr') is not None:
2896                 res += '@'
2897         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2898             res += 'video@'
2899         if fdict.get('vbr') is not None:
2900             res += '%4dk' % fdict['vbr']
2901         if fdict.get('fps') is not None:
2902             if res:
2903                 res += ', '
2904             res += '%sfps' % fdict['fps']
2905         if fdict.get('acodec') is not None:
2906             if res:
2907                 res += ', '
2908             if fdict['acodec'] == 'none':
2909                 res += 'video only'
2910             else:
2911                 res += '%-5s' % fdict['acodec']
2912         elif fdict.get('abr') is not None:
2913             if res:
2914                 res += ', '
2915             res += 'audio'
2916         if fdict.get('abr') is not None:
2917             res += '@%3dk' % fdict['abr']
2918         if fdict.get('asr') is not None:
2919             res += ' (%5dHz)' % fdict['asr']
2920         if fdict.get('filesize') is not None:
2921             if res:
2922                 res += ', '
2923             res += format_bytes(fdict['filesize'])
2924         elif fdict.get('filesize_approx') is not None:
2925             if res:
2926                 res += ', '
2927             res += '~' + format_bytes(fdict['filesize_approx'])
2928         return res
2929
2930     def _format_note_table(self, f):
2931         def join_fields(*vargs):
2932             return ', '.join((val for val in vargs if val != ''))
2933
2934         return join_fields(
2935             'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
2936             format_field(f, 'language', '[%s]'),
2937             format_field(f, 'format_note'),
2938             format_field(f, 'container', ignore=(None, f.get('ext'))),
2939             format_field(f, 'asr', '%5dHz'))
2940
2941     def list_formats(self, info_dict):
2942         formats = info_dict.get('formats', [info_dict])
2943         new_format = (
2944             'list-formats' not in self.params.get('compat_opts', [])
2945             and self.params.get('list_formats_as_table', True) is not False)
2946         if new_format:
2947             table = [
2948                 [
2949                     format_field(f, 'format_id'),
2950                     format_field(f, 'ext'),
2951                     self.format_resolution(f),
2952                     format_field(f, 'fps', '%d'),
2953                     '|',
2954                     format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
2955                     format_field(f, 'tbr', '%4dk'),
2956                     shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
2957                     '|',
2958                     format_field(f, 'vcodec', default='unknown').replace('none', ''),
2959                     format_field(f, 'vbr', '%4dk'),
2960                     format_field(f, 'acodec', default='unknown').replace('none', ''),
2961                     format_field(f, 'abr', '%3dk'),
2962                     format_field(f, 'asr', '%5dHz'),
2963                     self._format_note_table(f)]
2964                 for f in formats
2965                 if f.get('preference') is None or f['preference'] >= -1000]
2966             header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', '  TBR', 'PROTO',
2967                            '|', 'VCODEC', '  VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']
2968         else:
2969             table = [
2970                 [
2971                     format_field(f, 'format_id'),
2972                     format_field(f, 'ext'),
2973                     self.format_resolution(f),
2974                     self._format_note(f)]
2975                 for f in formats
2976                 if f.get('preference') is None or f['preference'] >= -1000]
2977             header_line = ['format code', 'extension', 'resolution', 'note']
2978
2979         self.to_screen(
2980             '[info] Available formats for %s:\n%s' % (info_dict['id'], render_table(
2981                 header_line,
2982                 table,
2983                 delim=new_format,
2984                 extraGap=(0 if new_format else 1),
2985                 hideEmpty=new_format)))
2986
2987     def list_thumbnails(self, info_dict):
2988         thumbnails = info_dict.get('thumbnails')
2989         if not thumbnails:
2990             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
2991             return
2992
2993         self.to_screen(
2994             '[info] Thumbnails for %s:' % info_dict['id'])
2995         self.to_screen(render_table(
2996             ['ID', 'width', 'height', 'URL'],
2997             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
2998
2999     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3000         if not subtitles:
3001             self.to_screen('%s has no %s' % (video_id, name))
3002             return
3003         self.to_screen(
3004             'Available %s for %s:' % (name, video_id))
3005
3006         def _row(lang, formats):
3007             exts, names = zip(*((f['ext'], f.get('name', 'unknown')) for f in reversed(formats)))
3008             if len(set(names)) == 1:
3009                 names = [] if names[0] == 'unknown' else names[:1]
3010             return [lang, ', '.join(names), ', '.join(exts)]
3011
3012         self.to_screen(render_table(
3013             ['Language', 'Name', 'Formats'],
3014             [_row(lang, formats) for lang, formats in subtitles.items()],
3015             hideEmpty=True))
3016
3017     def urlopen(self, req):
3018         """ Start an HTTP download """
3019         if isinstance(req, compat_basestring):
3020             req = sanitized_Request(req)
3021         return self._opener.open(req, timeout=self._socket_timeout)
3022
3023     def print_debug_header(self):
3024         if not self.params.get('verbose'):
3025             return
3026
3027         if type('') is not compat_str:
3028             # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
3029             self.report_warning(
3030                 'Your Python is broken! Update to a newer and supported version')
3031
3032         stdout_encoding = getattr(
3033             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
3034         encoding_str = (
3035             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
3036                 locale.getpreferredencoding(),
3037                 sys.getfilesystemencoding(),
3038                 stdout_encoding,
3039                 self.get_encoding()))
3040         write_string(encoding_str, encoding=None)
3041
3042         source = (
3043             '(exe)' if hasattr(sys, 'frozen')
3044             else '(zip)' if isinstance(globals().get('__loader__'), zipimporter)
3045             else '(source)' if os.path.basename(sys.argv[0]) == '__main__.py'
3046             else '')
3047         self._write_string('[debug] yt-dlp version %s %s\n' % (__version__, source))
3048         if _LAZY_LOADER:
3049             self._write_string('[debug] Lazy loading extractors enabled\n')
3050         if _PLUGIN_CLASSES:
3051             self._write_string(
3052                 '[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])
3053         if self.params.get('compat_opts'):
3054             self._write_string(
3055                 '[debug] Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts')))
3056         try:
3057             sp = subprocess.Popen(
3058                 ['git', 'rev-parse', '--short', 'HEAD'],
3059                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3060                 cwd=os.path.dirname(os.path.abspath(__file__)))
3061             out, err = process_communicate_or_kill(sp)
3062             out = out.decode().strip()
3063             if re.match('[0-9a-f]+', out):
3064                 self._write_string('[debug] Git HEAD: %s\n' % out)
3065         except Exception:
3066             try:
3067                 sys.exc_clear()
3068             except Exception:
3069                 pass
3070
3071         def python_implementation():
3072             impl_name = platform.python_implementation()
3073             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3074                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3075             return impl_name
3076
3077         self._write_string('[debug] Python version %s (%s %s) - %s\n' % (
3078             platform.python_version(),
3079             python_implementation(),
3080             platform.architecture()[0],
3081             platform_name()))
3082
3083         exe_versions = FFmpegPostProcessor.get_versions(self)
3084         exe_versions['rtmpdump'] = rtmpdump_version()
3085         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3086         exe_str = ', '.join(
3087             '%s %s' % (exe, v)
3088             for exe, v in sorted(exe_versions.items())
3089             if v
3090         )
3091         if not exe_str:
3092             exe_str = 'none'
3093         self._write_string('[debug] exe versions: %s\n' % exe_str)
3094
3095         proxy_map = {}
3096         for handler in self._opener.handlers:
3097             if hasattr(handler, 'proxies'):
3098                 proxy_map.update(handler.proxies)
3099         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
3100
3101         if self.params.get('call_home', False):
3102             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3103             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
3104             return
3105             latest_version = self.urlopen(
3106                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3107             if version_tuple(latest_version) > version_tuple(__version__):
3108                 self.report_warning(
3109                     'You are using an outdated version (newest version: %s)! '
3110                     'See https://yt-dl.org/update if you need help updating.' %
3111                     latest_version)
3112
3113     def _setup_opener(self):
3114         timeout_val = self.params.get('socket_timeout')
3115         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
3116
3117         opts_cookiefile = self.params.get('cookiefile')
3118         opts_proxy = self.params.get('proxy')
3119
3120         if opts_cookiefile is None:
3121             self.cookiejar = compat_cookiejar.CookieJar()
3122         else:
3123             opts_cookiefile = expand_path(opts_cookiefile)
3124             self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
3125             if os.access(opts_cookiefile, os.R_OK):
3126                 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
3127
3128         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3129         if opts_proxy is not None:
3130             if opts_proxy == '':
3131                 proxies = {}
3132             else:
3133                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3134         else:
3135             proxies = compat_urllib_request.getproxies()
3136             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3137             if 'http' in proxies and 'https' not in proxies:
3138                 proxies['https'] = proxies['http']
3139         proxy_handler = PerRequestProxyHandler(proxies)
3140
3141         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3142         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3143         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3144         redirect_handler = YoutubeDLRedirectHandler()
3145         data_handler = compat_urllib_request_DataHandler()
3146
3147         # When passing our own FileHandler instance, build_opener won't add the
3148         # default FileHandler and allows us to disable the file protocol, which
3149         # can be used for malicious purposes (see
3150         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3151         file_handler = compat_urllib_request.FileHandler()
3152
3153         def file_open(*args, **kwargs):
3154             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3155         file_handler.file_open = file_open
3156
3157         opener = compat_urllib_request.build_opener(
3158             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3159
3160         # Delete the default user-agent header, which would otherwise apply in
3161         # cases where our custom HTTP handler doesn't come into play
3162         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3163         opener.addheaders = []
3164         self._opener = opener
3165
3166     def encode(self, s):
3167         if isinstance(s, bytes):
3168             return s  # Already encoded
3169
3170         try:
3171             return s.encode(self.get_encoding())
3172         except UnicodeEncodeError as err:
3173             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3174             raise
3175
3176     def get_encoding(self):
3177         encoding = self.params.get('encoding')
3178         if encoding is None:
3179             encoding = preferredencoding()
3180         return encoding
3181
3182     def _write_thumbnails(self, info_dict, filename):  # return the extensions
3183         write_all = self.params.get('write_all_thumbnails', False)
3184         thumbnails = []
3185         if write_all or self.params.get('writethumbnail', False):
3186             thumbnails = info_dict.get('thumbnails') or []
3187         multiple = write_all and len(thumbnails) > 1
3188
3189         ret = []
3190         for t in thumbnails[::1 if write_all else -1]:
3191             thumb_ext = determine_ext(t['url'], 'jpg')
3192             suffix = '%s.' % t['id'] if multiple else ''
3193             thumb_display_id = '%s ' % t['id'] if multiple else ''
3194             t['filepath'] = thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
3195
3196             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
3197                 ret.append(suffix + thumb_ext)
3198                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
3199                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
3200             else:
3201                 self.to_screen('[%s] %s: Downloading thumbnail %s ...' %
3202                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
3203                 try:
3204                     uf = self.urlopen(t['url'])
3205                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3206                         shutil.copyfileobj(uf, thumbf)
3207                     ret.append(suffix + thumb_ext)
3208                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
3209                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
3210                 except network_exceptions as err:
3211                     self.report_warning('Unable to download thumbnail "%s": %s' %
3212                                         (t['url'], error_to_compat_str(err)))
3213             if ret and not write_all:
3214                 break
3215         return ret