yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import sys
  23 import time
  24 import tokenize
  25 import traceback
  26 import random
  27
  28 from string import ascii_letters
  29 from zipimport import zipimporter
  30
  31 from .compat import (
  32     compat_basestring,
  33     compat_cookiejar,
  34     compat_get_terminal_size,
  35     compat_kwargs,
  36     compat_numeric_types,
  37     compat_os_name,
  38     compat_str,
  39     compat_tokenize_tokenize,
  40     compat_urllib_error,
  41     compat_urllib_request,
  42     compat_urllib_request_DataHandler,
  43 )
  44 from .utils import (
  45     age_restricted,
  46     args_to_str,
  47     ContentTooShortError,
  48     date_from_str,
  49     DateRange,
  50     DEFAULT_OUTTMPL,
  51     determine_ext,
  52     determine_protocol,
  53     DOT_DESKTOP_LINK_TEMPLATE,
  54     DOT_URL_LINK_TEMPLATE,
  55     DOT_WEBLOC_LINK_TEMPLATE,
  56     DownloadError,
  57     encode_compat_str,
  58     encodeFilename,
  59     EntryNotInPlaylist,
  60     error_to_compat_str,
  61     ExistingVideoReached,
  62     expand_path,
  63     ExtractorError,
  64     float_or_none,
  65     format_bytes,
  66     format_field,
  67     STR_FORMAT_RE,
  68     formatSeconds,
  69     GeoRestrictedError,
  70     int_or_none,
  71     iri_to_uri,
  72     ISO3166Utils,
  73     LazyList,
  74     locked_file,
  75     make_dir,
  76     make_HTTPS_handler,
  77     MaxDownloadsReached,
  78     network_exceptions,
  79     orderedSet,
  80     OUTTMPL_TYPES,
  81     PagedList,
  82     parse_filesize,
  83     PerRequestProxyHandler,
  84     platform_name,
  85     PostProcessingError,
  86     preferredencoding,
  87     prepend_extension,
  88     process_communicate_or_kill,
  89     random_uuidv4,
  90     register_socks_protocols,
  91     RejectedVideoReached,
  92     render_table,
  93     replace_extension,
  94     SameFileError,
  95     sanitize_filename,
  96     sanitize_path,
  97     sanitize_url,
  98     sanitized_Request,
  99     std_headers,
 100     str_or_none,
 101     strftime_or_none,
 102     subtitles_filename,
 103     to_high_limit_path,
 104     traverse_obj,
 105     UnavailableVideoError,
 106     url_basename,
 107     version_tuple,
 108     write_json_file,
 109     write_string,
 110     YoutubeDLCookieJar,
 111     YoutubeDLCookieProcessor,
 112     YoutubeDLHandler,
 113     YoutubeDLRedirectHandler,
 114 )
 115 from .cache import Cache
 116 from .extractor import (
 117     gen_extractor_classes,
 118     get_info_extractor,
 119     _LAZY_LOADER,
 120     _PLUGIN_CLASSES
 121 )
 122 from .extractor.openload import PhantomJSwrapper
 123 from .downloader import (
 124     get_suitable_downloader,
 125     shorten_protocol_name
 126 )
 127 from .downloader.rtmp import rtmpdump_version
 128 from .postprocessor import (
 129     FFmpegFixupM3u8PP,
 130     FFmpegFixupM4aPP,
 131     FFmpegFixupStretchedPP,
 132     FFmpegMergerPP,
 133     FFmpegPostProcessor,
 134     # FFmpegSubtitlesConvertorPP,
 135     get_postprocessor,
 136     MoveFilesAfterDownloadPP,
 137 )
 138 from .version import __version__
 139
 140 if compat_os_name == 'nt':
 141     import ctypes
 142
 143
 144 class YoutubeDL(object):
 145     """YoutubeDL class.
 146
 147     YoutubeDL objects are the ones responsible of downloading the
 148     actual video file and writing it to disk if the user has requested
 149     it, among some other tasks. In most cases there should be one per
 150     program. As, given a video URL, the downloader doesn't know how to
 151     extract all the needed information, task that InfoExtractors do, it
 152     has to pass the URL to one of them.
 153
 154     For this, YoutubeDL objects have a method that allows
 155     InfoExtractors to be registered in a given order. When it is passed
 156     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 157     finds that reports being able to handle it. The InfoExtractor extracts
 158     all the information about the video or videos the URL refers to, and
 159     YoutubeDL process the extracted information, possibly using a File
 160     Downloader to download the video.
 161
 162     YoutubeDL objects accept a lot of parameters. In order not to saturate
 163     the object constructor with arguments, it receives a dictionary of
 164     options instead. These options are available through the params
 165     attribute for the InfoExtractors to use. The YoutubeDL also
 166     registers itself as the downloader in charge for the InfoExtractors
 167     that are added to it, so this is a "mutual registration".
 168
 169     Available options:
 170
 171     username:          Username for authentication purposes.
 172     password:          Password for authentication purposes.
 173     videopassword:     Password for accessing a video.
 174     ap_mso:            Adobe Pass multiple-system operator identifier.
 175     ap_username:       Multiple-system operator account username.
 176     ap_password:       Multiple-system operator account password.
 177     usenetrc:          Use netrc for authentication instead.
 178     verbose:           Print additional info to stdout.
 179     quiet:             Do not print messages to stdout.
 180     no_warnings:       Do not print out anything for warnings.
 181     forceprint:        A list of templates to force print
 182     forceurl:          Force printing final URL. (Deprecated)
 183     forcetitle:        Force printing title. (Deprecated)
 184     forceid:           Force printing ID. (Deprecated)
 185     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 186     forcedescription:  Force printing description. (Deprecated)
 187     forcefilename:     Force printing final filename. (Deprecated)
 188     forceduration:     Force printing duration. (Deprecated)
 189     forcejson:         Force printing info_dict as JSON.
 190     dump_single_json:  Force printing the info_dict of the whole playlist
 191                        (or video) as a single JSON line.
 192     force_write_download_archive: Force writing download archive regardless
 193                        of 'skip_download' or 'simulate'.
 194     simulate:          Do not download the video files.
 195     format:            Video format code. see "FORMAT SELECTION" for more details.
 196     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 197     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 198                        extracting metadata even if the video is not actually
 199                        available for download (experimental)
 200     format_sort:       How to sort the video formats. see "Sorting Formats"
 201                        for more details.
 202     format_sort_force: Force the given format_sort. see "Sorting Formats"
 203                        for more details.
 204     allow_multiple_video_streams:   Allow multiple video streams to be merged
 205                        into a single file
 206     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 207                        into a single file
 208     paths:             Dictionary of output paths. The allowed keys are 'home'
 209                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 210     outtmpl:           Dictionary of templates for output names. Allowed keys
 211                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 212                        A string a also accepted for backward compatibility
 213     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 214     restrictfilenames: Do not allow "&" and spaces in file names
 215     trim_file_name:    Limit length of filename (extension excluded)
 216     windowsfilenames:  Force the filenames to be windows compatible
 217     ignoreerrors:      Do not stop on download errors
 218                        (Default True when running yt-dlp,
 219                        but False when directly accessing YoutubeDL class)
 220     skip_playlist_after_errors: Number of allowed failures until the rest of
 221                        the playlist is skipped
 222     force_generic_extractor: Force downloader to use the generic extractor
 223     overwrites:        Overwrite all video and metadata files if True,
 224                        overwrite only non-video files if None
 225                        and don't overwrite any file if False
 226     playliststart:     Playlist item to start at.
 227     playlistend:       Playlist item to end at.
 228     playlist_items:    Specific indices of playlist to download.
 229     playlistreverse:   Download playlist items in reverse order.
 230     playlistrandom:    Download playlist items in random order.
 231     matchtitle:        Download only matching titles.
 232     rejecttitle:       Reject downloads for matching titles.
 233     logger:            Log messages to a logging.Logger instance.
 234     logtostderr:       Log messages to stderr instead of stdout.
 235     writedescription:  Write the video description to a .description file
 236     writeinfojson:     Write the video description to a .info.json file
 237     clean_infojson:    Remove private fields from the infojson
 238     writecomments:     Extract video comments. This will not be written to disk
 239                        unless writeinfojson is also given
 240     writeannotations:  Write the video annotations to a .annotations.xml file
 241     writethumbnail:    Write the thumbnail image to a file
 242     allow_playlist_files: Whether to write playlists' description, infojson etc
 243                        also to disk when using the 'write*' options
 244     write_all_thumbnails:  Write all thumbnail formats to files
 245     writelink:         Write an internet shortcut file, depending on the
 246                        current platform (.url/.webloc/.desktop)
 247     writeurllink:      Write a Windows internet shortcut file (.url)
 248     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 249     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 250     writesubtitles:    Write the video subtitles to a file
 251     writeautomaticsub: Write the automatically generated subtitles to a file
 252     allsubtitles:      Deprecated - Use subtitlelangs = ['all']
 253                        Downloads all the subtitles of the video
 254                        (requires writesubtitles or writeautomaticsub)
 255     listsubtitles:     Lists all available subtitles for the video
 256     subtitlesformat:   The format code for subtitles
 257     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 258                        The list may contain "all" to refer to all the available
 259                        subtitles. The language can be prefixed with a "-" to
 260                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 261     keepvideo:         Keep the video file after post-processing
 262     daterange:         A DateRange object, download only if the upload_date is in the range.
 263     skip_download:     Skip the actual download of the video file
 264     cachedir:          Location of the cache files in the filesystem.
 265                        False to disable filesystem cache.
 266     noplaylist:        Download single video instead of a playlist if in doubt.
 267     age_limit:         An integer representing the user's age in years.
 268                        Unsuitable videos for the given age are skipped.
 269     min_views:         An integer representing the minimum view count the video
 270                        must have in order to not be skipped.
 271                        Videos without view count information are always
 272                        downloaded. None for no limit.
 273     max_views:         An integer representing the maximum view count.
 274                        Videos that are more popular than that are not
 275                        downloaded.
 276                        Videos without view count information are always
 277                        downloaded. None for no limit.
 278     download_archive:  File name of a file where all downloads are recorded.
 279                        Videos already present in the file are not downloaded
 280                        again.
 281     break_on_existing: Stop the download process after attempting to download a
 282                        file that is in the archive.
 283     break_on_reject:   Stop the download process when encountering a video that
 284                        has been filtered out.
 285     cookiefile:        File name where cookies should be read from and dumped to
 286     nocheckcertificate:Do not verify SSL certificates
 287     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 288                        At the moment, this is only supported by YouTube.
 289     proxy:             URL of the proxy server to use
 290     geo_verification_proxy:  URL of the proxy to use for IP address verification
 291                        on geo-restricted sites.
 292     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 293     bidi_workaround:   Work around buggy terminals without bidirectional text
 294                        support, using fridibi
 295     debug_printtraffic:Print out sent and received HTTP traffic
 296     include_ads:       Download ads as well
 297     default_search:    Prepend this string if an input url is not valid.
 298                        'auto' for elaborate guessing
 299     encoding:          Use this encoding instead of the system-specified.
 300     extract_flat:      Do not resolve URLs, return the immediate result.
 301                        Pass in 'in_playlist' to only show this behavior for
 302                        playlist items.
 303     postprocessors:    A list of dictionaries, each with an entry
 304                        * key:  The name of the postprocessor. See
 305                                yt_dlp/postprocessor/__init__.py for a list.
 306                        * when: When to run the postprocessor. Can be one of
 307                                pre_process|before_dl|post_process|after_move.
 308                                Assumed to be 'post_process' if not given
 309     post_hooks:        A list of functions that get called as the final step
 310                        for each video file, after all postprocessors have been
 311                        called. The filename will be passed as the only argument.
 312     progress_hooks:    A list of functions that get called on download
 313                        progress, with a dictionary with the entries
 314                        * status: One of "downloading", "error", or "finished".
 315                                  Check this first and ignore unknown values.
 316
 317                        If status is one of "downloading", or "finished", the
 318                        following properties may also be present:
 319                        * filename: The final filename (always present)
 320                        * tmpfilename: The filename we're currently writing to
 321                        * downloaded_bytes: Bytes on disk
 322                        * total_bytes: Size of the whole file, None if unknown
 323                        * total_bytes_estimate: Guess of the eventual file size,
 324                                                None if unavailable.
 325                        * elapsed: The number of seconds since download started.
 326                        * eta: The estimated time in seconds, None if unknown
 327                        * speed: The download speed in bytes/second, None if
 328                                 unknown
 329                        * fragment_index: The counter of the currently
 330                                          downloaded video fragment.
 331                        * fragment_count: The number of fragments (= individual
 332                                          files that will be merged)
 333
 334                        Progress hooks are guaranteed to be called at least once
 335                        (with status "finished") if the download is successful.
 336     merge_output_format: Extension to use when merging formats.
 337     final_ext:         Expected final extension; used to detect when the file was
 338                        already downloaded and converted. "merge_output_format" is
 339                        replaced by this extension when given
 340     fixup:             Automatically correct known faults of the file.
 341                        One of:
 342                        - "never": do nothing
 343                        - "warn": only emit a warning
 344                        - "detect_or_warn": check whether we can do anything
 345                                            about it, warn otherwise (default)
 346     source_address:    Client-side IP address to bind to.
 347     call_home:         Boolean, true iff we are allowed to contact the
 348                        yt-dlp servers for debugging. (BROKEN)
 349     sleep_interval_requests: Number of seconds to sleep between requests
 350                        during extraction
 351     sleep_interval:    Number of seconds to sleep before each download when
 352                        used alone or a lower bound of a range for randomized
 353                        sleep before each download (minimum possible number
 354                        of seconds to sleep) when used along with
 355                        max_sleep_interval.
 356     max_sleep_interval:Upper bound of a range for randomized sleep before each
 357                        download (maximum possible number of seconds to sleep).
 358                        Must only be used along with sleep_interval.
 359                        Actual sleep time will be a random float from range
 360                        [sleep_interval; max_sleep_interval].
 361     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 362     listformats:       Print an overview of available video formats and exit.
 363     list_thumbnails:   Print a table of all thumbnails and exit.
 364     match_filter:      A function that gets called with the info_dict of
 365                        every video.
 366                        If it returns a message, the video is ignored.
 367                        If it returns None, the video is downloaded.
 368                        match_filter_func in utils.py is one example for this.
 369     no_color:          Do not emit color codes in output.
 370     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 371                        HTTP header
 372     geo_bypass_country:
 373                        Two-letter ISO 3166-2 country code that will be used for
 374                        explicit geographic restriction bypassing via faking
 375                        X-Forwarded-For HTTP header
 376     geo_bypass_ip_block:
 377                        IP range in CIDR notation that will be used similarly to
 378                        geo_bypass_country
 379
 380     The following options determine which downloader is picked:
 381     external_downloader: A dictionary of protocol keys and the executable of the
 382                        external downloader to use for it. The allowed protocols
 383                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 384                        Set the value to 'native' to use the native downloader
 385     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 386                        or {'m3u8': 'ffmpeg'} instead.
 387                        Use the native HLS downloader instead of ffmpeg/avconv
 388                        if True, otherwise use ffmpeg/avconv if False, otherwise
 389                        use downloader suggested by extractor if None.
 390     compat_opts:       Compatibility options. See "Differences in default behavior".
 391                        Note that only format-sort, format-spec, no-live-chat,
 392                        no-attach-info-json, playlist-index, list-formats,
 393                        no-direct-merge, no-youtube-channel-redirect,
 394                        and no-youtube-unavailable-videos works when used via the API
 395
 396     The following parameters are not used by YoutubeDL itself, they are used by
 397     the downloader (see yt_dlp/downloader/common.py):
 398     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 399     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 400     xattr_set_filesize, external_downloader_args, hls_use_mpegts,
 401     http_chunk_size.
 402
 403     The following options are used by the post processors:
 404     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 405                        otherwise prefer ffmpeg. (avconv support is deprecated)
 406     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 407                        to the binary or its containing directory.
 408     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 409                         and a list of additional command-line arguments for the
 410                         postprocessor/executable. The dict can also have "PP+EXE" keys
 411                         which are used when the given exe is used by the given PP.
 412                         Use 'default' as the name for arguments to passed to all PP
 413
 414     The following options are used by the extractors:
 415     extractor_retries: Number of times to retry for known errors
 416     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 417     hls_split_discontinuity: Split HLS playlists to different formats at
 418                        discontinuities such as ad breaks (default: False)
 419     youtube_include_dash_manifest: If True (default), DASH manifests and related
 420                        data will be downloaded and processed by extractor.
 421                        You can reduce network I/O by disabling it if you don't
 422                        care about DASH. (only for youtube)
 423     youtube_include_hls_manifest: If True (default), HLS manifests and related
 424                        data will be downloaded and processed by extractor.
 425                        You can reduce network I/O by disabling it if you don't
 426                        care about HLS. (only for youtube)
 427     """
 428
 429     _NUMERIC_FIELDS = set((
 430         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 431         'timestamp', 'upload_year', 'upload_month', 'upload_day',
 432         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 433         'average_rating', 'comment_count', 'age_limit',
 434         'start_time', 'end_time',
 435         'chapter_number', 'season_number', 'episode_number',
 436         'track_number', 'disc_number', 'release_year',
 437         'playlist_index',
 438     ))
 439
 440     params = None
 441     _ies = []
 442     _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 443     __prepare_filename_warned = False
 444     _first_webpage_request = True
 445     _download_retcode = None
 446     _num_downloads = None
 447     _playlist_level = 0
 448     _playlist_urls = set()
 449     _screen_file = None
 450
 451     def __init__(self, params=None, auto_init=True):
 452         """Create a FileDownloader object with the given options."""
 453         if params is None:
 454             params = {}
 455         self._ies = []
 456         self._ies_instances = {}
 457         self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 458         self.__prepare_filename_warned = False
 459         self._first_webpage_request = True
 460         self._post_hooks = []
 461         self._progress_hooks = []
 462         self._download_retcode = 0
 463         self._num_downloads = 0
 464         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 465         self._err_file = sys.stderr
 466         self.params = {
 467             # Default parameters
 468             'nocheckcertificate': False,
 469         }
 470         self.params.update(params)
 471         self.cache = Cache(self)
 472
 473         if sys.version_info < (3, 6):
 474             self.report_warning(
 475                 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 476
 477         def check_deprecated(param, option, suggestion):
 478             if self.params.get(param) is not None:
 479                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 480                 return True
 481             return False
 482
 483         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 484             if self.params.get('geo_verification_proxy') is None:
 485                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 486
 487         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 488         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 489         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 490
 491         for msg in self.params.get('warnings', []):
 492             self.report_warning(msg)
 493
 494         if self.params.get('final_ext'):
 495             if self.params.get('merge_output_format'):
 496                 self.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
 497             self.params['merge_output_format'] = self.params['final_ext']
 498
 499         if 'overwrites' in self.params and self.params['overwrites'] is None:
 500             del self.params['overwrites']
 501
 502         if params.get('bidi_workaround', False):
 503             try:
 504                 import pty
 505                 master, slave = pty.openpty()
 506                 width = compat_get_terminal_size().columns
 507                 if width is None:
 508                     width_args = []
 509                 else:
 510                     width_args = ['-w', str(width)]
 511                 sp_kwargs = dict(
 512                     stdin=subprocess.PIPE,
 513                     stdout=slave,
 514                     stderr=self._err_file)
 515                 try:
 516                     self._output_process = subprocess.Popen(
 517                         ['bidiv'] + width_args, **sp_kwargs
 518                     )
 519                 except OSError:
 520                     self._output_process = subprocess.Popen(
 521                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 522                 self._output_channel = os.fdopen(master, 'rb')
 523             except OSError as ose:
 524                 if ose.errno == errno.ENOENT:
 525                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 526                 else:
 527                     raise
 528
 529         if (sys.platform != 'win32'
 530                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 531                 and not params.get('restrictfilenames', False)):
 532             # Unicode filesystem API will throw errors (#1474, #13027)
 533             self.report_warning(
 534                 'Assuming --restrict-filenames since file system encoding '
 535                 'cannot encode all characters. '
 536                 'Set the LC_ALL environment variable to fix this.')
 537             self.params['restrictfilenames'] = True
 538
 539         self.outtmpl_dict = self.parse_outtmpl()
 540
 541         self._setup_opener()
 542
 543         """Preload the archive, if any is specified"""
 544         def preload_download_archive(fn):
 545             if fn is None:
 546                 return False
 547             self.write_debug('Loading archive file %r\n' % fn)
 548             try:
 549                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 550                     for line in archive_file:
 551                         self.archive.add(line.strip())
 552             except IOError as ioe:
 553                 if ioe.errno != errno.ENOENT:
 554                     raise
 555                 return False
 556             return True
 557
 558         self.archive = set()
 559         preload_download_archive(self.params.get('download_archive'))
 560
 561         if auto_init:
 562             self.print_debug_header()
 563             self.add_default_info_extractors()
 564
 565         for pp_def_raw in self.params.get('postprocessors', []):
 566             pp_class = get_postprocessor(pp_def_raw['key'])
 567             pp_def = dict(pp_def_raw)
 568             del pp_def['key']
 569             if 'when' in pp_def:
 570                 when = pp_def['when']
 571                 del pp_def['when']
 572             else:
 573                 when = 'post_process'
 574             pp = pp_class(self, **compat_kwargs(pp_def))
 575             self.add_post_processor(pp, when=when)
 576
 577         for ph in self.params.get('post_hooks', []):
 578             self.add_post_hook(ph)
 579
 580         for ph in self.params.get('progress_hooks', []):
 581             self.add_progress_hook(ph)
 582
 583         register_socks_protocols()
 584
 585     def warn_if_short_id(self, argv):
 586         # short YouTube ID starting with dash?
 587         idxs = [
 588             i for i, a in enumerate(argv)
 589             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 590         if idxs:
 591             correct_argv = (
 592                 ['yt-dlp']
 593                 + [a for i, a in enumerate(argv) if i not in idxs]
 594                 + ['--'] + [argv[i] for i in idxs]
 595             )
 596             self.report_warning(
 597                 'Long argument string detected. '
 598                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 599                 args_to_str(correct_argv))
 600
 601     def add_info_extractor(self, ie):
 602         """Add an InfoExtractor object to the end of the list."""
 603         self._ies.append(ie)
 604         if not isinstance(ie, type):
 605             self._ies_instances[ie.ie_key()] = ie
 606             ie.set_downloader(self)
 607
 608     def get_info_extractor(self, ie_key):
 609         """
 610         Get an instance of an IE with name ie_key, it will try to get one from
 611         the _ies list, if there's no instance it will create a new one and add
 612         it to the extractor list.
 613         """
 614         ie = self._ies_instances.get(ie_key)
 615         if ie is None:
 616             ie = get_info_extractor(ie_key)()
 617             self.add_info_extractor(ie)
 618         return ie
 619
 620     def add_default_info_extractors(self):
 621         """
 622         Add the InfoExtractors returned by gen_extractors to the end of the list
 623         """
 624         for ie in gen_extractor_classes():
 625             self.add_info_extractor(ie)
 626
 627     def add_post_processor(self, pp, when='post_process'):
 628         """Add a PostProcessor object to the end of the chain."""
 629         self._pps[when].append(pp)
 630         pp.set_downloader(self)
 631
 632     def add_post_hook(self, ph):
 633         """Add the post hook"""
 634         self._post_hooks.append(ph)
 635
 636     def add_progress_hook(self, ph):
 637         """Add the progress hook (currently only for the file downloader)"""
 638         self._progress_hooks.append(ph)
 639
 640     def _bidi_workaround(self, message):
 641         if not hasattr(self, '_output_channel'):
 642             return message
 643
 644         assert hasattr(self, '_output_process')
 645         assert isinstance(message, compat_str)
 646         line_count = message.count('\n') + 1
 647         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 648         self._output_process.stdin.flush()
 649         res = ''.join(self._output_channel.readline().decode('utf-8')
 650                       for _ in range(line_count))
 651         return res[:-len('\n')]
 652
 653     def _write_string(self, s, out=None):
 654         write_string(s, out=out, encoding=self.params.get('encoding'))
 655
 656     def to_stdout(self, message, skip_eol=False, quiet=False):
 657         """Print message to stdout"""
 658         if self.params.get('logger'):
 659             self.params['logger'].debug(message)
 660         elif not quiet or self.params.get('verbose'):
 661             self._write_string(
 662                 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 663                 self._err_file if quiet else self._screen_file)
 664
 665     def to_stderr(self, message):
 666         """Print message to stderr"""
 667         assert isinstance(message, compat_str)
 668         if self.params.get('logger'):
 669             self.params['logger'].error(message)
 670         else:
 671             self._write_string('%s\n' % self._bidi_workaround(message), self._err_file)
 672
 673     def to_console_title(self, message):
 674         if not self.params.get('consoletitle', False):
 675             return
 676         if compat_os_name == 'nt':
 677             if ctypes.windll.kernel32.GetConsoleWindow():
 678                 # c_wchar_p() might not be necessary if `message` is
 679                 # already of type unicode()
 680                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 681         elif 'TERM' in os.environ:
 682             self._write_string('\033]0;%s\007' % message, self._screen_file)
 683
 684     def save_console_title(self):
 685         if not self.params.get('consoletitle', False):
 686             return
 687         if self.params.get('simulate', False):
 688             return
 689         if compat_os_name != 'nt' and 'TERM' in os.environ:
 690             # Save the title on stack
 691             self._write_string('\033[22;0t', self._screen_file)
 692
 693     def restore_console_title(self):
 694         if not self.params.get('consoletitle', False):
 695             return
 696         if self.params.get('simulate', False):
 697             return
 698         if compat_os_name != 'nt' and 'TERM' in os.environ:
 699             # Restore the title from stack
 700             self._write_string('\033[23;0t', self._screen_file)
 701
 702     def __enter__(self):
 703         self.save_console_title()
 704         return self
 705
 706     def __exit__(self, *args):
 707         self.restore_console_title()
 708
 709         if self.params.get('cookiefile') is not None:
 710             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 711
 712     def trouble(self, message=None, tb=None):
 713         """Determine action to take when a download problem appears.
 714
 715         Depending on if the downloader has been configured to ignore
 716         download errors or not, this method may throw an exception or
 717         not when errors are found, after printing the message.
 718
 719         tb, if given, is additional traceback information.
 720         """
 721         if message is not None:
 722             self.to_stderr(message)
 723         if self.params.get('verbose'):
 724             if tb is None:
 725                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 726                     tb = ''
 727                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 728                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 729                     tb += encode_compat_str(traceback.format_exc())
 730                 else:
 731                     tb_data = traceback.format_list(traceback.extract_stack())
 732                     tb = ''.join(tb_data)
 733             if tb:
 734                 self.to_stderr(tb)
 735         if not self.params.get('ignoreerrors', False):
 736             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 737                 exc_info = sys.exc_info()[1].exc_info
 738             else:
 739                 exc_info = sys.exc_info()
 740             raise DownloadError(message, exc_info)
 741         self._download_retcode = 1
 742
 743     def to_screen(self, message, skip_eol=False):
 744         """Print message to stdout if not in quiet mode"""
 745         self.to_stdout(
 746             message, skip_eol, quiet=self.params.get('quiet', False))
 747
 748     def report_warning(self, message):
 749         '''
 750         Print the message to stderr, it will be prefixed with 'WARNING:'
 751         If stderr is a tty file the 'WARNING:' will be colored
 752         '''
 753         if self.params.get('logger') is not None:
 754             self.params['logger'].warning(message)
 755         else:
 756             if self.params.get('no_warnings'):
 757                 return
 758             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 759                 _msg_header = '\033[0;33mWARNING:\033[0m'
 760             else:
 761                 _msg_header = 'WARNING:'
 762             warning_message = '%s %s' % (_msg_header, message)
 763             self.to_stderr(warning_message)
 764
 765     def report_error(self, message, tb=None):
 766         '''
 767         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 768         in red if stderr is a tty file.
 769         '''
 770         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 771             _msg_header = '\033[0;31mERROR:\033[0m'
 772         else:
 773             _msg_header = 'ERROR:'
 774         error_message = '%s %s' % (_msg_header, message)
 775         self.trouble(error_message, tb)
 776
 777     def write_debug(self, message):
 778         '''Log debug message or Print message to stderr'''
 779         if not self.params.get('verbose', False):
 780             return
 781         message = '[debug] %s' % message
 782         if self.params.get('logger'):
 783             self.params['logger'].debug(message)
 784         else:
 785             self._write_string('%s\n' % message)
 786
 787     def report_file_already_downloaded(self, file_name):
 788         """Report file has already been fully downloaded."""
 789         try:
 790             self.to_screen('[download] %s has already been downloaded' % file_name)
 791         except UnicodeEncodeError:
 792             self.to_screen('[download] The file has already been downloaded')
 793
 794     def report_file_delete(self, file_name):
 795         """Report that existing file will be deleted."""
 796         try:
 797             self.to_screen('Deleting existing file %s' % file_name)
 798         except UnicodeEncodeError:
 799             self.to_screen('Deleting existing file')
 800
 801     def parse_outtmpl(self):
 802         outtmpl_dict = self.params.get('outtmpl', {})
 803         if not isinstance(outtmpl_dict, dict):
 804             outtmpl_dict = {'default': outtmpl_dict}
 805         outtmpl_dict.update({
 806             k: v for k, v in DEFAULT_OUTTMPL.items()
 807             if not outtmpl_dict.get(k)})
 808         for key, val in outtmpl_dict.items():
 809             if isinstance(val, bytes):
 810                 self.report_warning(
 811                     'Parameter outtmpl is bytes, but should be a unicode string. '
 812                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 813         return outtmpl_dict
 814
 815     @staticmethod
 816     def validate_outtmpl(tmpl):
 817         ''' @return None or Exception object '''
 818         try:
 819             re.sub(
 820                 STR_FORMAT_RE.format(''),
 821                 lambda mobj: ('%' if not mobj.group('has_key') else '') + mobj.group(0),
 822                 tmpl
 823             ) % collections.defaultdict(int)
 824             return None
 825         except ValueError as err:
 826             return err
 827
 828     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
 829         """ Make the template and info_dict suitable for substitution (outtmpl % info_dict)"""
 830         info_dict = dict(info_dict)
 831         na = self.params.get('outtmpl_na_placeholder', 'NA')
 832
 833         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
 834             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
 835             if info_dict.get('duration', None) is not None
 836             else None)
 837         info_dict['epoch'] = int(time.time())
 838         info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
 839         if info_dict.get('resolution') is None:
 840             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
 841
 842         # For fields playlist_index and autonumber convert all occurrences
 843         # of %(field)s to %(field)0Nd for backward compatibility
 844         field_size_compat_map = {
 845             'playlist_index': len(str(info_dict.get('_last_playlist_index') or '')),
 846             'autonumber': self.params.get('autonumber_size') or 5,
 847         }
 848
 849         EXTERNAL_FORMAT_RE = STR_FORMAT_RE.format('[^)]*')
 850         # Field is of the form key1.key2...
 851         # where keys (except first) can be string, int or slice
 852         FIELD_RE = r'\w+(?:\.(?:\w+|[-\d]*(?::[-\d]*){0,2}))*'
 853         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
 854             (?P<negate>-)?
 855             (?P<fields>{0})
 856             (?P<maths>(?:[-+]-?(?:\d+(?:\.\d+)?|{0}))*)
 857             (?:>(?P<strf_format>.+?))?
 858             (?:\|(?P<default>.*?))?
 859             $'''.format(FIELD_RE))
 860         MATH_OPERATORS_RE = re.compile(r'(?<![-+])([-+])')
 861         MATH_FUNCTIONS = {
 862             '+': float.__add__,
 863             '-': float.__sub__,
 864         }
 865         tmpl_dict = {}
 866
 867         get_key = lambda k: traverse_obj(
 868             info_dict, k.split('.'), is_user_input=True, traverse_string=True)
 869
 870         def get_value(mdict):
 871             # Object traversal
 872             value = get_key(mdict['fields'])
 873             # Negative
 874             if mdict['negate']:
 875                 value = float_or_none(value)
 876                 if value is not None:
 877                     value *= -1
 878             # Do maths
 879             if mdict['maths']:
 880                 value = float_or_none(value)
 881                 operator = None
 882                 for item in MATH_OPERATORS_RE.split(mdict['maths'])[1:]:
 883                     if item == '' or value is None:
 884                         return None
 885                     if operator:
 886                         item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
 887                         offset = float_or_none(item)
 888                         if offset is None:
 889                             offset = float_or_none(get_key(item))
 890                         try:
 891                             value = operator(value, multiplier * offset)
 892                         except (TypeError, ZeroDivisionError):
 893                             return None
 894                         operator = None
 895                     else:
 896                         operator = MATH_FUNCTIONS[item]
 897             # Datetime formatting
 898             if mdict['strf_format']:
 899                 value = strftime_or_none(value, mdict['strf_format'])
 900
 901             return value
 902
 903         def create_key(outer_mobj):
 904             if not outer_mobj.group('has_key'):
 905                 return '%{}'.format(outer_mobj.group(0))
 906
 907             key = outer_mobj.group('key')
 908             fmt = outer_mobj.group('format')
 909             mobj = re.match(INTERNAL_FORMAT_RE, key)
 910             if mobj is None:
 911                 value, default = None, na
 912             else:
 913                 mobj = mobj.groupdict()
 914                 default = mobj['default'] if mobj['default'] is not None else na
 915                 value = get_value(mobj)
 916
 917             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
 918                 fmt = '0{:d}d'.format(field_size_compat_map[key])
 919
 920             value = default if value is None else value
 921             key += '\0%s' % fmt
 922
 923             if fmt == 'c':
 924                 value = compat_str(value)
 925                 if value is None:
 926                     value, fmt = default, 's'
 927                 else:
 928                     value = value[0]
 929             elif fmt[-1] not in 'rs':  # numeric
 930                 value = float_or_none(value)
 931                 if value is None:
 932                     value, fmt = default, 's'
 933             if sanitize:
 934                 if fmt[-1] == 'r':
 935                     # If value is an object, sanitize might convert it to a string
 936                     # So we convert it to repr first
 937                     value, fmt = repr(value), '%ss' % fmt[:-1]
 938                 value = sanitize(key, value)
 939             tmpl_dict[key] = value
 940             return '%({key}){fmt}'.format(key=key, fmt=fmt)
 941
 942         return re.sub(EXTERNAL_FORMAT_RE, create_key, outtmpl), tmpl_dict
 943
 944     def _prepare_filename(self, info_dict, tmpl_type='default'):
 945         try:
 946             sanitize = lambda k, v: sanitize_filename(
 947                 compat_str(v),
 948                 restricted=self.params.get('restrictfilenames'),
 949                 is_id=(k == 'id' or k.endswith('_id')))
 950             outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
 951             outtmpl, template_dict = self.prepare_outtmpl(outtmpl, info_dict, sanitize)
 952
 953             # expand_path translates '%%' into '%' and '$$' into '$'
 954             # correspondingly that is not what we want since we need to keep
 955             # '%%' intact for template dict substitution step. Working around
 956             # with boundary-alike separator hack.
 957             sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 958             outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 959
 960             # outtmpl should be expand_path'ed before template dict substitution
 961             # because meta fields may contain env variables we don't want to
 962             # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 963             # title "Hello $PATH", we don't want `$PATH` to be expanded.
 964             filename = expand_path(outtmpl).replace(sep, '') % template_dict
 965
 966             force_ext = OUTTMPL_TYPES.get(tmpl_type)
 967             if force_ext is not None:
 968                 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
 969
 970             # https://github.com/blackjack4494/youtube-dlc/issues/85
 971             trim_file_name = self.params.get('trim_file_name', False)
 972             if trim_file_name:
 973                 fn_groups = filename.rsplit('.')
 974                 ext = fn_groups[-1]
 975                 sub_ext = ''
 976                 if len(fn_groups) > 2:
 977                     sub_ext = fn_groups[-2]
 978                 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
 979
 980             return filename
 981         except ValueError as err:
 982             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 983             return None
 984
 985     def prepare_filename(self, info_dict, dir_type='', warn=False):
 986         """Generate the output filename."""
 987         paths = self.params.get('paths', {})
 988         assert isinstance(paths, dict)
 989         filename = self._prepare_filename(info_dict, dir_type or 'default')
 990
 991         if warn and not self.__prepare_filename_warned:
 992             if not paths:
 993                 pass
 994             elif filename == '-':
 995                 self.report_warning('--paths is ignored when an outputting to stdout')
 996             elif os.path.isabs(filename):
 997                 self.report_warning('--paths is ignored since an absolute path is given in output template')
 998             self.__prepare_filename_warned = True
 999         if filename == '-' or not filename:
1000             return filename
1001
1002         homepath = expand_path(paths.get('home', '').strip())
1003         assert isinstance(homepath, compat_str)
1004         subdir = expand_path(paths.get(dir_type, '').strip()) if dir_type else ''
1005         assert isinstance(subdir, compat_str)
1006         path = os.path.join(homepath, subdir, filename)
1007
1008         # Temporary fix for #4787
1009         # 'Treat' all problem characters by passing filename through preferredencoding
1010         # to workaround encoding issues with subprocess on python2 @ Windows
1011         if sys.version_info < (3, 0) and sys.platform == 'win32':
1012             path = encodeFilename(path, True).decode(preferredencoding())
1013         return sanitize_path(path, force=self.params.get('windowsfilenames'))
1014
1015     def _match_entry(self, info_dict, incomplete=False, silent=False):
1016         """ Returns None if the file should be downloaded """
1017
1018         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1019
1020         def check_filter():
1021             if 'title' in info_dict:
1022                 # This can happen when we're just evaluating the playlist
1023                 title = info_dict['title']
1024                 matchtitle = self.params.get('matchtitle', False)
1025                 if matchtitle:
1026                     if not re.search(matchtitle, title, re.IGNORECASE):
1027                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1028                 rejecttitle = self.params.get('rejecttitle', False)
1029                 if rejecttitle:
1030                     if re.search(rejecttitle, title, re.IGNORECASE):
1031                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1032             date = info_dict.get('upload_date')
1033             if date is not None:
1034                 dateRange = self.params.get('daterange', DateRange())
1035                 if date not in dateRange:
1036                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1037             view_count = info_dict.get('view_count')
1038             if view_count is not None:
1039                 min_views = self.params.get('min_views')
1040                 if min_views is not None and view_count < min_views:
1041                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1042                 max_views = self.params.get('max_views')
1043                 if max_views is not None and view_count > max_views:
1044                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1045             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1046                 return 'Skipping "%s" because it is age restricted' % video_title
1047
1048             if not incomplete:
1049                 match_filter = self.params.get('match_filter')
1050                 if match_filter is not None:
1051                     ret = match_filter(info_dict)
1052                     if ret is not None:
1053                         return ret
1054             return None
1055
1056         if self.in_download_archive(info_dict):
1057             reason = '%s has already been recorded in the archive' % video_title
1058             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1059         else:
1060             reason = check_filter()
1061             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1062         if reason is not None:
1063             if not silent:
1064                 self.to_screen('[download] ' + reason)
1065             if self.params.get(break_opt, False):
1066                 raise break_err()
1067         return reason
1068
1069     @staticmethod
1070     def add_extra_info(info_dict, extra_info):
1071         '''Set the keys from extra_info in info dict if they are missing'''
1072         for key, value in extra_info.items():
1073             info_dict.setdefault(key, value)
1074
1075     def extract_info(self, url, download=True, ie_key=None, extra_info={},
1076                      process=True, force_generic_extractor=False):
1077         """
1078         Return a list with a dictionary for each video extracted.
1079
1080         Arguments:
1081         url -- URL to extract
1082
1083         Keyword arguments:
1084         download -- whether to download videos during extraction
1085         ie_key -- extractor key hint
1086         extra_info -- dictionary containing the extra values to add to each result
1087         process -- whether to resolve all unresolved references (URLs, playlist items),
1088             must be True for download to work.
1089         force_generic_extractor -- force using the generic extractor
1090         """
1091
1092         if not ie_key and force_generic_extractor:
1093             ie_key = 'Generic'
1094
1095         if ie_key:
1096             ies = [self.get_info_extractor(ie_key)]
1097         else:
1098             ies = self._ies
1099
1100         for ie in ies:
1101             if not ie.suitable(url):
1102                 continue
1103
1104             ie_key = ie.ie_key()
1105             ie = self.get_info_extractor(ie_key)
1106             if not ie.working():
1107                 self.report_warning('The program functionality for this site has been marked as broken, '
1108                                     'and will probably not work.')
1109
1110             try:
1111                 temp_id = str_or_none(
1112                     ie.extract_id(url) if callable(getattr(ie, 'extract_id', None))
1113                     else ie._match_id(url))
1114             except (AssertionError, IndexError, AttributeError):
1115                 temp_id = None
1116             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1117                 self.to_screen("[%s] %s: has already been recorded in archive" % (
1118                                ie_key, temp_id))
1119                 break
1120             return self.__extract_info(url, ie, download, extra_info, process)
1121         else:
1122             self.report_error('no suitable InfoExtractor for URL %s' % url)
1123
1124     def __handle_extraction_exceptions(func):
1125         def wrapper(self, *args, **kwargs):
1126             try:
1127                 return func(self, *args, **kwargs)
1128             except GeoRestrictedError as e:
1129                 msg = e.msg
1130                 if e.countries:
1131                     msg += '\nThis video is available in %s.' % ', '.join(
1132                         map(ISO3166Utils.short2full, e.countries))
1133                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1134                 self.report_error(msg)
1135             except ExtractorError as e:  # An error we somewhat expected
1136                 self.report_error(compat_str(e), e.format_traceback())
1137             except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
1138                 raise
1139             except Exception as e:
1140                 if self.params.get('ignoreerrors', False):
1141                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
1142                 else:
1143                     raise
1144         return wrapper
1145
1146     @__handle_extraction_exceptions
1147     def __extract_info(self, url, ie, download, extra_info, process):
1148         ie_result = ie.extract(url)
1149         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1150             return
1151         if isinstance(ie_result, list):
1152             # Backwards compatibility: old IE result format
1153             ie_result = {
1154                 '_type': 'compat_list',
1155                 'entries': ie_result,
1156             }
1157         self.add_default_extra_info(ie_result, ie, url)
1158         if process:
1159             return self.process_ie_result(ie_result, download, extra_info)
1160         else:
1161             return ie_result
1162
1163     def add_default_extra_info(self, ie_result, ie, url):
1164         self.add_extra_info(ie_result, {
1165             'extractor': ie.IE_NAME,
1166             'webpage_url': url,
1167             'original_url': url,
1168             'webpage_url_basename': url_basename(url),
1169             'extractor_key': ie.ie_key(),
1170         })
1171
1172     def process_ie_result(self, ie_result, download=True, extra_info={}):
1173         """
1174         Take the result of the ie(may be modified) and resolve all unresolved
1175         references (URLs, playlist items).
1176
1177         It will also download the videos if 'download'.
1178         Returns the resolved ie_result.
1179         """
1180         result_type = ie_result.get('_type', 'video')
1181
1182         if result_type in ('url', 'url_transparent'):
1183             ie_result['url'] = sanitize_url(ie_result['url'])
1184             extract_flat = self.params.get('extract_flat', False)
1185             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1186                     or extract_flat is True):
1187                 info_copy = ie_result.copy()
1188                 self.add_extra_info(info_copy, extra_info)
1189                 self.add_default_extra_info(
1190                     info_copy, self.get_info_extractor(ie_result.get('ie_key')), ie_result['url'])
1191                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1192                 return ie_result
1193
1194         if result_type == 'video':
1195             self.add_extra_info(ie_result, extra_info)
1196             ie_result = self.process_video_result(ie_result, download=download)
1197             additional_urls = (ie_result or {}).get('additional_urls')
1198             if additional_urls:
1199                 # TODO: Improve MetadataFromFieldPP to allow setting a list
1200                 if isinstance(additional_urls, compat_str):
1201                     additional_urls = [additional_urls]
1202                 self.to_screen(
1203                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1204                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1205                 ie_result['additional_entries'] = [
1206                     self.extract_info(
1207                         url, download, extra_info,
1208                         force_generic_extractor=self.params.get('force_generic_extractor'))
1209                     for url in additional_urls
1210                 ]
1211             return ie_result
1212         elif result_type == 'url':
1213             # We have to add extra_info to the results because it may be
1214             # contained in a playlist
1215             return self.extract_info(
1216                 ie_result['url'], download,
1217                 ie_key=ie_result.get('ie_key'),
1218                 extra_info=extra_info)
1219         elif result_type == 'url_transparent':
1220             # Use the information from the embedding page
1221             info = self.extract_info(
1222                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1223                 extra_info=extra_info, download=False, process=False)
1224
1225             # extract_info may return None when ignoreerrors is enabled and
1226             # extraction failed with an error, don't crash and return early
1227             # in this case
1228             if not info:
1229                 return info
1230
1231             force_properties = dict(
1232                 (k, v) for k, v in ie_result.items() if v is not None)
1233             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1234                 if f in force_properties:
1235                     del force_properties[f]
1236             new_result = info.copy()
1237             new_result.update(force_properties)
1238
1239             # Extracted info may not be a video result (i.e.
1240             # info.get('_type', 'video') != video) but rather an url or
1241             # url_transparent. In such cases outer metadata (from ie_result)
1242             # should be propagated to inner one (info). For this to happen
1243             # _type of info should be overridden with url_transparent. This
1244             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1245             if new_result.get('_type') == 'url':
1246                 new_result['_type'] = 'url_transparent'
1247
1248             return self.process_ie_result(
1249                 new_result, download=download, extra_info=extra_info)
1250         elif result_type in ('playlist', 'multi_video'):
1251             # Protect from infinite recursion due to recursively nested playlists
1252             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1253             webpage_url = ie_result['webpage_url']
1254             if webpage_url in self._playlist_urls:
1255                 self.to_screen(
1256                     '[download] Skipping already downloaded playlist: %s'
1257                     % ie_result.get('title') or ie_result.get('id'))
1258                 return
1259
1260             self._playlist_level += 1
1261             self._playlist_urls.add(webpage_url)
1262             self._sanitize_thumbnails(ie_result)
1263             try:
1264                 return self.__process_playlist(ie_result, download)
1265             finally:
1266                 self._playlist_level -= 1
1267                 if not self._playlist_level:
1268                     self._playlist_urls.clear()
1269         elif result_type == 'compat_list':
1270             self.report_warning(
1271                 'Extractor %s returned a compat_list result. '
1272                 'It needs to be updated.' % ie_result.get('extractor'))
1273
1274             def _fixup(r):
1275                 self.add_extra_info(
1276                     r,
1277                     {
1278                         'extractor': ie_result['extractor'],
1279                         'webpage_url': ie_result['webpage_url'],
1280                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
1281                         'extractor_key': ie_result['extractor_key'],
1282                     }
1283                 )
1284                 return r
1285             ie_result['entries'] = [
1286                 self.process_ie_result(_fixup(r), download, extra_info)
1287                 for r in ie_result['entries']
1288             ]
1289             return ie_result
1290         else:
1291             raise Exception('Invalid result type: %s' % result_type)
1292
1293     def _ensure_dir_exists(self, path):
1294         return make_dir(path, self.report_error)
1295
1296     def __process_playlist(self, ie_result, download):
1297         # We process each entry in the playlist
1298         playlist = ie_result.get('title') or ie_result.get('id')
1299         self.to_screen('[download] Downloading playlist: %s' % playlist)
1300
1301         if 'entries' not in ie_result:
1302             raise EntryNotInPlaylist()
1303         incomplete_entries = bool(ie_result.get('requested_entries'))
1304         if incomplete_entries:
1305             def fill_missing_entries(entries, indexes):
1306                 ret = [None] * max(*indexes)
1307                 for i, entry in zip(indexes, entries):
1308                     ret[i - 1] = entry
1309                 return ret
1310             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1311
1312         playlist_results = []
1313
1314         playliststart = self.params.get('playliststart', 1)
1315         playlistend = self.params.get('playlistend')
1316         # For backwards compatibility, interpret -1 as whole list
1317         if playlistend == -1:
1318             playlistend = None
1319
1320         playlistitems_str = self.params.get('playlist_items')
1321         playlistitems = None
1322         if playlistitems_str is not None:
1323             def iter_playlistitems(format):
1324                 for string_segment in format.split(','):
1325                     if '-' in string_segment:
1326                         start, end = string_segment.split('-')
1327                         for item in range(int(start), int(end) + 1):
1328                             yield int(item)
1329                     else:
1330                         yield int(string_segment)
1331             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1332
1333         ie_entries = ie_result['entries']
1334         msg = (
1335             'Downloading %d videos' if not isinstance(ie_entries, list)
1336             else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
1337         if not isinstance(ie_entries, (list, PagedList)):
1338             ie_entries = LazyList(ie_entries)
1339
1340         entries = []
1341         for i in playlistitems or itertools.count(playliststart):
1342             if playlistitems is None and playlistend is not None and playlistend < i:
1343                 break
1344             entry = None
1345             try:
1346                 entry = ie_entries[i - 1]
1347                 if entry is None:
1348                     raise EntryNotInPlaylist()
1349             except (IndexError, EntryNotInPlaylist):
1350                 if incomplete_entries:
1351                     raise EntryNotInPlaylist()
1352                 elif not playlistitems:
1353                     break
1354             entries.append(entry)
1355             try:
1356                 if entry is not None:
1357                     self._match_entry(entry, incomplete=True, silent=True)
1358             except (ExistingVideoReached, RejectedVideoReached):
1359                 break
1360         ie_result['entries'] = entries
1361
1362         # Save playlist_index before re-ordering
1363         entries = [
1364             ((playlistitems[i - 1] if playlistitems else i), entry)
1365             for i, entry in enumerate(entries, 1)
1366             if entry is not None]
1367         n_entries = len(entries)
1368
1369         if not playlistitems and (playliststart or playlistend):
1370             playlistitems = list(range(playliststart, playliststart + n_entries))
1371         ie_result['requested_entries'] = playlistitems
1372
1373         if self.params.get('allow_playlist_files', True):
1374             ie_copy = {
1375                 'playlist': playlist,
1376                 'playlist_id': ie_result.get('id'),
1377                 'playlist_title': ie_result.get('title'),
1378                 'playlist_uploader': ie_result.get('uploader'),
1379                 'playlist_uploader_id': ie_result.get('uploader_id'),
1380                 'playlist_index': 0,
1381             }
1382             ie_copy.update(dict(ie_result))
1383
1384             if self.params.get('writeinfojson', False):
1385                 infofn = self.prepare_filename(ie_copy, 'pl_infojson')
1386                 if not self._ensure_dir_exists(encodeFilename(infofn)):
1387                     return
1388                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
1389                     self.to_screen('[info] Playlist metadata is already present')
1390                 else:
1391                     self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
1392                     try:
1393                         write_json_file(self.filter_requested_info(ie_result, self.params.get('clean_infojson', True)), infofn)
1394                     except (OSError, IOError):
1395                         self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
1396
1397             # TODO: This should be passed to ThumbnailsConvertor if necessary
1398             self._write_thumbnails(ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1399
1400             if self.params.get('writedescription', False):
1401                 descfn = self.prepare_filename(ie_copy, 'pl_description')
1402                 if not self._ensure_dir_exists(encodeFilename(descfn)):
1403                     return
1404                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
1405                     self.to_screen('[info] Playlist description is already present')
1406                 elif ie_result.get('description') is None:
1407                     self.report_warning('There\'s no playlist description to write.')
1408                 else:
1409                     try:
1410                         self.to_screen('[info] Writing playlist description to: ' + descfn)
1411                         with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1412                             descfile.write(ie_result['description'])
1413                     except (OSError, IOError):
1414                         self.report_error('Cannot write playlist description file ' + descfn)
1415                         return
1416
1417         if self.params.get('playlistreverse', False):
1418             entries = entries[::-1]
1419         if self.params.get('playlistrandom', False):
1420             random.shuffle(entries)
1421
1422         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1423
1424         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1425         failures = 0
1426         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1427         for i, entry_tuple in enumerate(entries, 1):
1428             playlist_index, entry = entry_tuple
1429             if 'playlist_index' in self.params.get('compat_options', []):
1430                 playlist_index = playlistitems[i - 1] if playlistitems else i
1431             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1432             # This __x_forwarded_for_ip thing is a bit ugly but requires
1433             # minimal changes
1434             if x_forwarded_for:
1435                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1436             extra = {
1437                 'n_entries': n_entries,
1438                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1439                 'playlist_index': playlist_index,
1440                 'playlist_autonumber': i,
1441                 'playlist': playlist,
1442                 'playlist_id': ie_result.get('id'),
1443                 'playlist_title': ie_result.get('title'),
1444                 'playlist_uploader': ie_result.get('uploader'),
1445                 'playlist_uploader_id': ie_result.get('uploader_id'),
1446                 'extractor': ie_result['extractor'],
1447                 'webpage_url': ie_result['webpage_url'],
1448                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1449                 'extractor_key': ie_result['extractor_key'],
1450             }
1451
1452             if self._match_entry(entry, incomplete=True) is not None:
1453                 continue
1454
1455             entry_result = self.__process_iterable_entry(entry, download, extra)
1456             if not entry_result:
1457                 failures += 1
1458             if failures >= max_failures:
1459                 self.report_error(
1460                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1461                 break
1462             # TODO: skip failed (empty) entries?
1463             playlist_results.append(entry_result)
1464         ie_result['entries'] = playlist_results
1465         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1466         return ie_result
1467
1468     @__handle_extraction_exceptions
1469     def __process_iterable_entry(self, entry, download, extra_info):
1470         return self.process_ie_result(
1471             entry, download=download, extra_info=extra_info)
1472
1473     def _build_format_filter(self, filter_spec):
1474         " Returns a function to filter the formats according to the filter_spec "
1475
1476         OPERATORS = {
1477             '<': operator.lt,
1478             '<=': operator.le,
1479             '>': operator.gt,
1480             '>=': operator.ge,
1481             '=': operator.eq,
1482             '!=': operator.ne,
1483         }
1484         operator_rex = re.compile(r'''(?x)\s*
1485             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
1486             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1487             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
1488             $
1489             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1490         m = operator_rex.search(filter_spec)
1491         if m:
1492             try:
1493                 comparison_value = int(m.group('value'))
1494             except ValueError:
1495                 comparison_value = parse_filesize(m.group('value'))
1496                 if comparison_value is None:
1497                     comparison_value = parse_filesize(m.group('value') + 'B')
1498                 if comparison_value is None:
1499                     raise ValueError(
1500                         'Invalid value %r in format specification %r' % (
1501                             m.group('value'), filter_spec))
1502             op = OPERATORS[m.group('op')]
1503
1504         if not m:
1505             STR_OPERATORS = {
1506                 '=': operator.eq,
1507                 '^=': lambda attr, value: attr.startswith(value),
1508                 '$=': lambda attr, value: attr.endswith(value),
1509                 '*=': lambda attr, value: value in attr,
1510             }
1511             str_operator_rex = re.compile(r'''(?x)
1512                 \s*(?P<key>[a-zA-Z0-9._-]+)
1513                 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
1514                 \s*(?P<value>[a-zA-Z0-9._-]+)
1515                 \s*$
1516                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1517             m = str_operator_rex.search(filter_spec)
1518             if m:
1519                 comparison_value = m.group('value')
1520                 str_op = STR_OPERATORS[m.group('op')]
1521                 if m.group('negation'):
1522                     op = lambda attr, value: not str_op(attr, value)
1523                 else:
1524                     op = str_op
1525
1526         if not m:
1527             raise ValueError('Invalid filter specification %r' % filter_spec)
1528
1529         def _filter(f):
1530             actual_value = f.get(m.group('key'))
1531             if actual_value is None:
1532                 return m.group('none_inclusive')
1533             return op(actual_value, comparison_value)
1534         return _filter
1535
1536     def _default_format_spec(self, info_dict, download=True):
1537
1538         def can_merge():
1539             merger = FFmpegMergerPP(self)
1540             return merger.available and merger.can_merge()
1541
1542         prefer_best = (
1543             not self.params.get('simulate', False)
1544             and download
1545             and (
1546                 not can_merge()
1547                 or info_dict.get('is_live', False)
1548                 or self.outtmpl_dict['default'] == '-'))
1549         compat = (
1550             prefer_best
1551             or self.params.get('allow_multiple_audio_streams', False)
1552             or 'format-spec' in self.params.get('compat_opts', []))
1553
1554         return (
1555             'best/bestvideo+bestaudio' if prefer_best
1556             else 'bestvideo*+bestaudio/best' if not compat
1557             else 'bestvideo+bestaudio/best')
1558
1559     def build_format_selector(self, format_spec):
1560         def syntax_error(note, start):
1561             message = (
1562                 'Invalid format specification: '
1563                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1564             return SyntaxError(message)
1565
1566         PICKFIRST = 'PICKFIRST'
1567         MERGE = 'MERGE'
1568         SINGLE = 'SINGLE'
1569         GROUP = 'GROUP'
1570         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1571
1572         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1573                                   'video': self.params.get('allow_multiple_video_streams', False)}
1574
1575         check_formats = self.params.get('check_formats')
1576
1577         def _parse_filter(tokens):
1578             filter_parts = []
1579             for type, string, start, _, _ in tokens:
1580                 if type == tokenize.OP and string == ']':
1581                     return ''.join(filter_parts)
1582                 else:
1583                     filter_parts.append(string)
1584
1585         def _remove_unused_ops(tokens):
1586             # Remove operators that we don't use and join them with the surrounding strings
1587             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1588             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1589             last_string, last_start, last_end, last_line = None, None, None, None
1590             for type, string, start, end, line in tokens:
1591                 if type == tokenize.OP and string == '[':
1592                     if last_string:
1593                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1594                         last_string = None
1595                     yield type, string, start, end, line
1596                     # everything inside brackets will be handled by _parse_filter
1597                     for type, string, start, end, line in tokens:
1598                         yield type, string, start, end, line
1599                         if type == tokenize.OP and string == ']':
1600                             break
1601                 elif type == tokenize.OP and string in ALLOWED_OPS:
1602                     if last_string:
1603                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1604                         last_string = None
1605                     yield type, string, start, end, line
1606                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1607                     if not last_string:
1608                         last_string = string
1609                         last_start = start
1610                         last_end = end
1611                     else:
1612                         last_string += string
1613             if last_string:
1614                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1615
1616         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1617             selectors = []
1618             current_selector = None
1619             for type, string, start, _, _ in tokens:
1620                 # ENCODING is only defined in python 3.x
1621                 if type == getattr(tokenize, 'ENCODING', None):
1622                     continue
1623                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1624                     current_selector = FormatSelector(SINGLE, string, [])
1625                 elif type == tokenize.OP:
1626                     if string == ')':
1627                         if not inside_group:
1628                             # ')' will be handled by the parentheses group
1629                             tokens.restore_last_token()
1630                         break
1631                     elif inside_merge and string in ['/', ',']:
1632                         tokens.restore_last_token()
1633                         break
1634                     elif inside_choice and string == ',':
1635                         tokens.restore_last_token()
1636                         break
1637                     elif string == ',':
1638                         if not current_selector:
1639                             raise syntax_error('"," must follow a format selector', start)
1640                         selectors.append(current_selector)
1641                         current_selector = None
1642                     elif string == '/':
1643                         if not current_selector:
1644                             raise syntax_error('"/" must follow a format selector', start)
1645                         first_choice = current_selector
1646                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1647                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1648                     elif string == '[':
1649                         if not current_selector:
1650                             current_selector = FormatSelector(SINGLE, 'best', [])
1651                         format_filter = _parse_filter(tokens)
1652                         current_selector.filters.append(format_filter)
1653                     elif string == '(':
1654                         if current_selector:
1655                             raise syntax_error('Unexpected "("', start)
1656                         group = _parse_format_selection(tokens, inside_group=True)
1657                         current_selector = FormatSelector(GROUP, group, [])
1658                     elif string == '+':
1659                         if not current_selector:
1660                             raise syntax_error('Unexpected "+"', start)
1661                         selector_1 = current_selector
1662                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1663                         if not selector_2:
1664                             raise syntax_error('Expected a selector', start)
1665                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1666                     else:
1667                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1668                 elif type == tokenize.ENDMARKER:
1669                     break
1670             if current_selector:
1671                 selectors.append(current_selector)
1672             return selectors
1673
1674         def _merge(formats_pair):
1675             format_1, format_2 = formats_pair
1676
1677             formats_info = []
1678             formats_info.extend(format_1.get('requested_formats', (format_1,)))
1679             formats_info.extend(format_2.get('requested_formats', (format_2,)))
1680
1681             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1682                 get_no_more = {"video": False, "audio": False}
1683                 for (i, fmt_info) in enumerate(formats_info):
1684                     for aud_vid in ["audio", "video"]:
1685                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1686                             if get_no_more[aud_vid]:
1687                                 formats_info.pop(i)
1688                             get_no_more[aud_vid] = True
1689
1690             if len(formats_info) == 1:
1691                 return formats_info[0]
1692
1693             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1694             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1695
1696             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1697             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1698
1699             output_ext = self.params.get('merge_output_format')
1700             if not output_ext:
1701                 if the_only_video:
1702                     output_ext = the_only_video['ext']
1703                 elif the_only_audio and not video_fmts:
1704                     output_ext = the_only_audio['ext']
1705                 else:
1706                     output_ext = 'mkv'
1707
1708             new_dict = {
1709                 'requested_formats': formats_info,
1710                 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1711                 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1712                 'ext': output_ext,
1713             }
1714
1715             if the_only_video:
1716                 new_dict.update({
1717                     'width': the_only_video.get('width'),
1718                     'height': the_only_video.get('height'),
1719                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1720                     'fps': the_only_video.get('fps'),
1721                     'vcodec': the_only_video.get('vcodec'),
1722                     'vbr': the_only_video.get('vbr'),
1723                     'stretched_ratio': the_only_video.get('stretched_ratio'),
1724                 })
1725
1726             if the_only_audio:
1727                 new_dict.update({
1728                     'acodec': the_only_audio.get('acodec'),
1729                     'abr': the_only_audio.get('abr'),
1730                 })
1731
1732             return new_dict
1733
1734         def _check_formats(formats):
1735             for f in formats:
1736                 self.to_screen('[info] Testing format %s' % f['format_id'])
1737                 paths = self.params.get('paths', {})
1738                 temp_file = os.path.join(
1739                     expand_path(paths.get('home', '').strip()),
1740                     expand_path(paths.get('temp', '').strip()),
1741                     'ytdl.%s.f%s.check-format' % (random_uuidv4(), f['format_id']))
1742                 try:
1743                     dl, _ = self.dl(temp_file, f, test=True)
1744                 except (ExtractorError, IOError, OSError, ValueError) + network_exceptions:
1745                     dl = False
1746                 finally:
1747                     if os.path.exists(temp_file):
1748                         os.remove(temp_file)
1749                 if dl:
1750                     yield f
1751                 else:
1752                     self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1753
1754         def _build_selector_function(selector):
1755             if isinstance(selector, list):  # ,
1756                 fs = [_build_selector_function(s) for s in selector]
1757
1758                 def selector_function(ctx):
1759                     for f in fs:
1760                         for format in f(ctx):
1761                             yield format
1762                 return selector_function
1763
1764             elif selector.type == GROUP:  # ()
1765                 selector_function = _build_selector_function(selector.selector)
1766
1767             elif selector.type == PICKFIRST:  # /
1768                 fs = [_build_selector_function(s) for s in selector.selector]
1769
1770                 def selector_function(ctx):
1771                     for f in fs:
1772                         picked_formats = list(f(ctx))
1773                         if picked_formats:
1774                             return picked_formats
1775                     return []
1776
1777             elif selector.type == SINGLE:  # atom
1778                 format_spec = selector.selector or 'best'
1779
1780                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
1781                 if format_spec == 'all':
1782                     def selector_function(ctx):
1783                         formats = list(ctx['formats'])
1784                         if check_formats:
1785                             formats = _check_formats(formats)
1786                         for f in formats:
1787                             yield f
1788                 elif format_spec == 'mergeall':
1789                     def selector_function(ctx):
1790                         formats = list(_check_formats(ctx['formats']))
1791                         if not formats:
1792                             return
1793                         merged_format = formats[-1]
1794                         for f in formats[-2::-1]:
1795                             merged_format = _merge((merged_format, f))
1796                         yield merged_format
1797
1798                 else:
1799                     format_fallback, format_reverse, format_idx = False, True, 1
1800                     mobj = re.match(
1801                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
1802                         format_spec)
1803                     if mobj is not None:
1804                         format_idx = int_or_none(mobj.group('n'), default=1)
1805                         format_reverse = mobj.group('bw')[0] == 'b'
1806                         format_type = (mobj.group('type') or [None])[0]
1807                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
1808                         format_modified = mobj.group('mod') is not None
1809
1810                         format_fallback = not format_type and not format_modified  # for b, w
1811                         filter_f = (
1812                             (lambda f: f.get('%scodec' % format_type) != 'none')
1813                             if format_type and format_modified  # bv*, ba*, wv*, wa*
1814                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
1815                             if format_type  # bv, ba, wv, wa
1816                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1817                             if not format_modified  # b, w
1818                             else None)  # b*, w*
1819                     else:
1820                         filter_f = ((lambda f: f.get('ext') == format_spec)
1821                                     if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']  # extension
1822                                     else (lambda f: f.get('format_id') == format_spec))  # id
1823
1824                     def selector_function(ctx):
1825                         formats = list(ctx['formats'])
1826                         if not formats:
1827                             return
1828                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
1829                         if format_fallback and ctx['incomplete_formats'] and not matches:
1830                             # for extractors with incomplete formats (audio only (soundcloud)
1831                             # or video only (imgur)) best/worst will fallback to
1832                             # best/worst {video,audio}-only format
1833                             matches = formats
1834                         if format_reverse:
1835                             matches = matches[::-1]
1836                         if check_formats:
1837                             matches = list(itertools.islice(_check_formats(matches), format_idx))
1838                         n = len(matches)
1839                         if -n <= format_idx - 1 < n:
1840                             yield matches[format_idx - 1]
1841
1842             elif selector.type == MERGE:        # +
1843                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1844
1845                 def selector_function(ctx):
1846                     for pair in itertools.product(
1847                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1848                         yield _merge(pair)
1849
1850             filters = [self._build_format_filter(f) for f in selector.filters]
1851
1852             def final_selector(ctx):
1853                 ctx_copy = copy.deepcopy(ctx)
1854                 for _filter in filters:
1855                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1856                 return selector_function(ctx_copy)
1857             return final_selector
1858
1859         stream = io.BytesIO(format_spec.encode('utf-8'))
1860         try:
1861             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1862         except tokenize.TokenError:
1863             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1864
1865         class TokenIterator(object):
1866             def __init__(self, tokens):
1867                 self.tokens = tokens
1868                 self.counter = 0
1869
1870             def __iter__(self):
1871                 return self
1872
1873             def __next__(self):
1874                 if self.counter >= len(self.tokens):
1875                     raise StopIteration()
1876                 value = self.tokens[self.counter]
1877                 self.counter += 1
1878                 return value
1879
1880             next = __next__
1881
1882             def restore_last_token(self):
1883                 self.counter -= 1
1884
1885         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1886         return _build_selector_function(parsed_selector)
1887
1888     def _calc_headers(self, info_dict):
1889         res = std_headers.copy()
1890
1891         add_headers = info_dict.get('http_headers')
1892         if add_headers:
1893             res.update(add_headers)
1894
1895         cookies = self._calc_cookies(info_dict)
1896         if cookies:
1897             res['Cookie'] = cookies
1898
1899         if 'X-Forwarded-For' not in res:
1900             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1901             if x_forwarded_for_ip:
1902                 res['X-Forwarded-For'] = x_forwarded_for_ip
1903
1904         return res
1905
1906     def _calc_cookies(self, info_dict):
1907         pr = sanitized_Request(info_dict['url'])
1908         self.cookiejar.add_cookie_header(pr)
1909         return pr.get_header('Cookie')
1910
1911     @staticmethod
1912     def _sanitize_thumbnails(info_dict):
1913         thumbnails = info_dict.get('thumbnails')
1914         if thumbnails is None:
1915             thumbnail = info_dict.get('thumbnail')
1916             if thumbnail:
1917                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1918         if thumbnails:
1919             thumbnails.sort(key=lambda t: (
1920                 t.get('preference') if t.get('preference') is not None else -1,
1921                 t.get('width') if t.get('width') is not None else -1,
1922                 t.get('height') if t.get('height') is not None else -1,
1923                 t.get('id') if t.get('id') is not None else '',
1924                 t.get('url')))
1925             for i, t in enumerate(thumbnails):
1926                 t['url'] = sanitize_url(t['url'])
1927                 if t.get('width') and t.get('height'):
1928                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1929                 if t.get('id') is None:
1930                     t['id'] = '%d' % i
1931
1932     def process_video_result(self, info_dict, download=True):
1933         assert info_dict.get('_type', 'video') == 'video'
1934
1935         if 'id' not in info_dict:
1936             raise ExtractorError('Missing "id" field in extractor result')
1937         if 'title' not in info_dict:
1938             raise ExtractorError('Missing "title" field in extractor result')
1939
1940         def report_force_conversion(field, field_not, conversion):
1941             self.report_warning(
1942                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1943                 % (field, field_not, conversion))
1944
1945         def sanitize_string_field(info, string_field):
1946             field = info.get(string_field)
1947             if field is None or isinstance(field, compat_str):
1948                 return
1949             report_force_conversion(string_field, 'a string', 'string')
1950             info[string_field] = compat_str(field)
1951
1952         def sanitize_numeric_fields(info):
1953             for numeric_field in self._NUMERIC_FIELDS:
1954                 field = info.get(numeric_field)
1955                 if field is None or isinstance(field, compat_numeric_types):
1956                     continue
1957                 report_force_conversion(numeric_field, 'numeric', 'int')
1958                 info[numeric_field] = int_or_none(field)
1959
1960         sanitize_string_field(info_dict, 'id')
1961         sanitize_numeric_fields(info_dict)
1962
1963         if 'playlist' not in info_dict:
1964             # It isn't part of a playlist
1965             info_dict['playlist'] = None
1966             info_dict['playlist_index'] = None
1967
1968         self._sanitize_thumbnails(info_dict)
1969
1970         if self.params.get('list_thumbnails'):
1971             self.list_thumbnails(info_dict)
1972             return
1973
1974         thumbnail = info_dict.get('thumbnail')
1975         thumbnails = info_dict.get('thumbnails')
1976         if thumbnail:
1977             info_dict['thumbnail'] = sanitize_url(thumbnail)
1978         elif thumbnails:
1979             info_dict['thumbnail'] = thumbnails[-1]['url']
1980
1981         if 'display_id' not in info_dict and 'id' in info_dict:
1982             info_dict['display_id'] = info_dict['id']
1983
1984         for ts_key, date_key in (
1985                 ('timestamp', 'upload_date'),
1986                 ('release_timestamp', 'release_date'),
1987         ):
1988             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
1989                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1990                 # see http://bugs.python.org/issue1646728)
1991                 try:
1992                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
1993                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
1994                 except (ValueError, OverflowError, OSError):
1995                     pass
1996
1997         # Auto generate title fields corresponding to the *_number fields when missing
1998         # in order to always have clean titles. This is very common for TV series.
1999         for field in ('chapter', 'season', 'episode'):
2000             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2001                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2002
2003         for cc_kind in ('subtitles', 'automatic_captions'):
2004             cc = info_dict.get(cc_kind)
2005             if cc:
2006                 for _, subtitle in cc.items():
2007                     for subtitle_format in subtitle:
2008                         if subtitle_format.get('url'):
2009                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2010                         if subtitle_format.get('ext') is None:
2011                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2012
2013         automatic_captions = info_dict.get('automatic_captions')
2014         subtitles = info_dict.get('subtitles')
2015
2016         if self.params.get('listsubtitles', False):
2017             if 'automatic_captions' in info_dict:
2018                 self.list_subtitles(
2019                     info_dict['id'], automatic_captions, 'automatic captions')
2020             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2021             return
2022
2023         info_dict['requested_subtitles'] = self.process_subtitles(
2024             info_dict['id'], subtitles, automatic_captions)
2025
2026         # We now pick which formats have to be downloaded
2027         if info_dict.get('formats') is None:
2028             # There's only one format available
2029             formats = [info_dict]
2030         else:
2031             formats = info_dict['formats']
2032
2033         if not formats:
2034             if not self.params.get('ignore_no_formats_error'):
2035                 raise ExtractorError('No video formats found!')
2036             else:
2037                 self.report_warning('No video formats found!')
2038
2039         def is_wellformed(f):
2040             url = f.get('url')
2041             if not url:
2042                 self.report_warning(
2043                     '"url" field is missing or empty - skipping format, '
2044                     'there is an error in extractor')
2045                 return False
2046             if isinstance(url, bytes):
2047                 sanitize_string_field(f, 'url')
2048             return True
2049
2050         # Filter out malformed formats for better extraction robustness
2051         formats = list(filter(is_wellformed, formats))
2052
2053         formats_dict = {}
2054
2055         # We check that all the formats have the format and format_id fields
2056         for i, format in enumerate(formats):
2057             sanitize_string_field(format, 'format_id')
2058             sanitize_numeric_fields(format)
2059             format['url'] = sanitize_url(format['url'])
2060             if not format.get('format_id'):
2061                 format['format_id'] = compat_str(i)
2062             else:
2063                 # Sanitize format_id from characters used in format selector expression
2064                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2065             format_id = format['format_id']
2066             if format_id not in formats_dict:
2067                 formats_dict[format_id] = []
2068             formats_dict[format_id].append(format)
2069
2070         # Make sure all formats have unique format_id
2071         for format_id, ambiguous_formats in formats_dict.items():
2072             if len(ambiguous_formats) > 1:
2073                 for i, format in enumerate(ambiguous_formats):
2074                     format['format_id'] = '%s-%d' % (format_id, i)
2075
2076         for i, format in enumerate(formats):
2077             if format.get('format') is None:
2078                 format['format'] = '{id} - {res}{note}'.format(
2079                     id=format['format_id'],
2080                     res=self.format_resolution(format),
2081                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
2082                 )
2083             # Automatically determine file extension if missing
2084             if format.get('ext') is None:
2085                 format['ext'] = determine_ext(format['url']).lower()
2086             # Automatically determine protocol if missing (useful for format
2087             # selection purposes)
2088             if format.get('protocol') is None:
2089                 format['protocol'] = determine_protocol(format)
2090             # Add HTTP headers, so that external programs can use them from the
2091             # json output
2092             full_format_info = info_dict.copy()
2093             full_format_info.update(format)
2094             format['http_headers'] = self._calc_headers(full_format_info)
2095         # Remove private housekeeping stuff
2096         if '__x_forwarded_for_ip' in info_dict:
2097             del info_dict['__x_forwarded_for_ip']
2098
2099         # TODO Central sorting goes here
2100
2101         if formats and formats[0] is not info_dict:
2102             # only set the 'formats' fields if the original info_dict list them
2103             # otherwise we end up with a circular reference, the first (and unique)
2104             # element in the 'formats' field in info_dict is info_dict itself,
2105             # which can't be exported to json
2106             info_dict['formats'] = formats
2107
2108         info_dict, _ = self.pre_process(info_dict)
2109
2110         if self.params.get('listformats'):
2111             if not info_dict.get('formats'):
2112                 raise ExtractorError('No video formats found', expected=True)
2113             self.list_formats(info_dict)
2114             return
2115
2116         req_format = self.params.get('format')
2117         if req_format is None:
2118             req_format = self._default_format_spec(info_dict, download=download)
2119             self.write_debug('Default format spec: %s' % req_format)
2120
2121         format_selector = self.build_format_selector(req_format)
2122
2123         # While in format selection we may need to have an access to the original
2124         # format set in order to calculate some metrics or do some processing.
2125         # For now we need to be able to guess whether original formats provided
2126         # by extractor are incomplete or not (i.e. whether extractor provides only
2127         # video-only or audio-only formats) for proper formats selection for
2128         # extractors with such incomplete formats (see
2129         # https://github.com/ytdl-org/youtube-dl/pull/5556).
2130         # Since formats may be filtered during format selection and may not match
2131         # the original formats the results may be incorrect. Thus original formats
2132         # or pre-calculated metrics should be passed to format selection routines
2133         # as well.
2134         # We will pass a context object containing all necessary additional data
2135         # instead of just formats.
2136         # This fixes incorrect format selection issue (see
2137         # https://github.com/ytdl-org/youtube-dl/issues/10083).
2138         incomplete_formats = (
2139             # All formats are video-only or
2140             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2141             # all formats are audio-only
2142             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2143
2144         ctx = {
2145             'formats': formats,
2146             'incomplete_formats': incomplete_formats,
2147         }
2148
2149         formats_to_download = list(format_selector(ctx))
2150         if not formats_to_download:
2151             if not self.params.get('ignore_no_formats_error'):
2152                 raise ExtractorError('Requested format is not available', expected=True)
2153             else:
2154                 self.report_warning('Requested format is not available')
2155         elif download:
2156             self.to_screen(
2157                 '[info] %s: Downloading %d format(s): %s' % (
2158                     info_dict['id'], len(formats_to_download),
2159                     ", ".join([f['format_id'] for f in formats_to_download])))
2160             for fmt in formats_to_download:
2161                 new_info = dict(info_dict)
2162                 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2163                 new_info['__original_infodict'] = info_dict
2164                 new_info.update(fmt)
2165                 self.process_info(new_info)
2166         # We update the info dict with the best quality format (backwards compatibility)
2167         if formats_to_download:
2168             info_dict.update(formats_to_download[-1])
2169         return info_dict
2170
2171     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2172         """Select the requested subtitles and their format"""
2173         available_subs = {}
2174         if normal_subtitles and self.params.get('writesubtitles'):
2175             available_subs.update(normal_subtitles)
2176         if automatic_captions and self.params.get('writeautomaticsub'):
2177             for lang, cap_info in automatic_captions.items():
2178                 if lang not in available_subs:
2179                     available_subs[lang] = cap_info
2180
2181         if (not self.params.get('writesubtitles') and not
2182                 self.params.get('writeautomaticsub') or not
2183                 available_subs):
2184             return None
2185
2186         all_sub_langs = available_subs.keys()
2187         if self.params.get('allsubtitles', False):
2188             requested_langs = all_sub_langs
2189         elif self.params.get('subtitleslangs', False):
2190             requested_langs = set()
2191             for lang in self.params.get('subtitleslangs'):
2192                 if lang == 'all':
2193                     requested_langs.update(all_sub_langs)
2194                     continue
2195                 discard = lang[0] == '-'
2196                 if discard:
2197                     lang = lang[1:]
2198                 current_langs = filter(re.compile(lang + '$').match, all_sub_langs)
2199                 if discard:
2200                     for lang in current_langs:
2201                         requested_langs.discard(lang)
2202                 else:
2203                     requested_langs.update(current_langs)
2204         elif 'en' in available_subs:
2205             requested_langs = ['en']
2206         else:
2207             requested_langs = [list(all_sub_langs)[0]]
2208         self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2209
2210         formats_query = self.params.get('subtitlesformat', 'best')
2211         formats_preference = formats_query.split('/') if formats_query else []
2212         subs = {}
2213         for lang in requested_langs:
2214             formats = available_subs.get(lang)
2215             if formats is None:
2216                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2217                 continue
2218             for ext in formats_preference:
2219                 if ext == 'best':
2220                     f = formats[-1]
2221                     break
2222                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2223                 if matches:
2224                     f = matches[-1]
2225                     break
2226             else:
2227                 f = formats[-1]
2228                 self.report_warning(
2229                     'No subtitle format found matching "%s" for language %s, '
2230                     'using %s' % (formats_query, lang, f['ext']))
2231             subs[lang] = f
2232         return subs
2233
2234     def __forced_printings(self, info_dict, filename, incomplete):
2235         def print_mandatory(field, actual_field=None):
2236             if actual_field is None:
2237                 actual_field = field
2238             if (self.params.get('force%s' % field, False)
2239                     and (not incomplete or info_dict.get(actual_field) is not None)):
2240                 self.to_stdout(info_dict[actual_field])
2241
2242         def print_optional(field):
2243             if (self.params.get('force%s' % field, False)
2244                     and info_dict.get(field) is not None):
2245                 self.to_stdout(info_dict[field])
2246
2247         info_dict = info_dict.copy()
2248         if filename is not None:
2249             info_dict['filename'] = filename
2250         if info_dict.get('requested_formats') is not None:
2251             # For RTMP URLs, also include the playpath
2252             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2253         elif 'url' in info_dict:
2254             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2255
2256         for tmpl in self.params.get('forceprint', []):
2257             if re.match(r'\w+$', tmpl):
2258                 tmpl = '%({})s'.format(tmpl)
2259             tmpl, info_copy = self.prepare_outtmpl(tmpl, info_dict)
2260             self.to_stdout(tmpl % info_copy)
2261
2262         print_mandatory('title')
2263         print_mandatory('id')
2264         print_mandatory('url', 'urls')
2265         print_optional('thumbnail')
2266         print_optional('description')
2267         print_optional('filename')
2268         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
2269             self.to_stdout(formatSeconds(info_dict['duration']))
2270         print_mandatory('format')
2271
2272         if self.params.get('forcejson', False):
2273             self.post_extract(info_dict)
2274             self.to_stdout(json.dumps(info_dict, default=repr))
2275
2276     def dl(self, name, info, subtitle=False, test=False):
2277
2278         if test:
2279             verbose = self.params.get('verbose')
2280             params = {
2281                 'test': True,
2282                 'quiet': not verbose,
2283                 'verbose': verbose,
2284                 'noprogress': not verbose,
2285                 'nopart': True,
2286                 'skip_unavailable_fragments': False,
2287                 'keep_fragments': False,
2288                 'overwrites': True,
2289                 '_no_ytdl_file': True,
2290             }
2291         else:
2292             params = self.params
2293         fd = get_suitable_downloader(info, params)(self, params)
2294         if not test:
2295             for ph in self._progress_hooks:
2296                 fd.add_progress_hook(ph)
2297             urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2298             self.write_debug('Invoking downloader on "%s"' % urls)
2299         new_info = dict(info)
2300         if new_info.get('http_headers') is None:
2301             new_info['http_headers'] = self._calc_headers(new_info)
2302         return fd.download(name, new_info, subtitle)
2303
2304     def process_info(self, info_dict):
2305         """Process a single resolved IE result."""
2306
2307         assert info_dict.get('_type', 'video') == 'video'
2308
2309         info_dict.setdefault('__postprocessors', [])
2310
2311         max_downloads = self.params.get('max_downloads')
2312         if max_downloads is not None:
2313             if self._num_downloads >= int(max_downloads):
2314                 raise MaxDownloadsReached()
2315
2316         # TODO: backward compatibility, to be removed
2317         info_dict['fulltitle'] = info_dict['title']
2318
2319         if 'format' not in info_dict:
2320             info_dict['format'] = info_dict['ext']
2321
2322         if self._match_entry(info_dict) is not None:
2323             return
2324
2325         self.post_extract(info_dict)
2326         self._num_downloads += 1
2327
2328         # info_dict['_filename'] needs to be set for backward compatibility
2329         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2330         temp_filename = self.prepare_filename(info_dict, 'temp')
2331         files_to_move = {}
2332
2333         # Forced printings
2334         self.__forced_printings(info_dict, full_filename, incomplete=False)
2335
2336         if self.params.get('simulate', False):
2337             if self.params.get('force_write_download_archive', False):
2338                 self.record_download_archive(info_dict)
2339
2340             # Do nothing else if in simulate mode
2341             return
2342
2343         if full_filename is None:
2344             return
2345
2346         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2347             return
2348         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2349             return
2350
2351         if self.params.get('writedescription', False):
2352             descfn = self.prepare_filename(info_dict, 'description')
2353             if not self._ensure_dir_exists(encodeFilename(descfn)):
2354                 return
2355             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
2356                 self.to_screen('[info] Video description is already present')
2357             elif info_dict.get('description') is None:
2358                 self.report_warning('There\'s no description to write.')
2359             else:
2360                 try:
2361                     self.to_screen('[info] Writing video description to: ' + descfn)
2362                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
2363                         descfile.write(info_dict['description'])
2364                 except (OSError, IOError):
2365                     self.report_error('Cannot write description file ' + descfn)
2366                     return
2367
2368         if self.params.get('writeannotations', False):
2369             annofn = self.prepare_filename(info_dict, 'annotation')
2370             if not self._ensure_dir_exists(encodeFilename(annofn)):
2371                 return
2372             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2373                 self.to_screen('[info] Video annotations are already present')
2374             elif not info_dict.get('annotations'):
2375                 self.report_warning('There are no annotations to write.')
2376             else:
2377                 try:
2378                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2379                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2380                         annofile.write(info_dict['annotations'])
2381                 except (KeyError, TypeError):
2382                     self.report_warning('There are no annotations to write.')
2383                 except (OSError, IOError):
2384                     self.report_error('Cannot write annotations file: ' + annofn)
2385                     return
2386
2387         subtitles_are_requested = any([self.params.get('writesubtitles', False),
2388                                        self.params.get('writeautomaticsub')])
2389
2390         if subtitles_are_requested and info_dict.get('requested_subtitles'):
2391             # subtitles download errors are already managed as troubles in relevant IE
2392             # that way it will silently go on when used with unsupporting IE
2393             subtitles = info_dict['requested_subtitles']
2394             # ie = self.get_info_extractor(info_dict['extractor_key'])
2395             for sub_lang, sub_info in subtitles.items():
2396                 sub_format = sub_info['ext']
2397                 sub_filename = subtitles_filename(temp_filename, sub_lang, sub_format, info_dict.get('ext'))
2398                 sub_filename_final = subtitles_filename(
2399                     self.prepare_filename(info_dict, 'subtitle'), sub_lang, sub_format, info_dict.get('ext'))
2400                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
2401                     self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
2402                     sub_info['filepath'] = sub_filename
2403                     files_to_move[sub_filename] = sub_filename_final
2404                 else:
2405                     self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
2406                     if sub_info.get('data') is not None:
2407                         try:
2408                             # Use newline='' to prevent conversion of newline characters
2409                             # See https://github.com/ytdl-org/youtube-dl/issues/10268
2410                             with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
2411                                 subfile.write(sub_info['data'])
2412                             sub_info['filepath'] = sub_filename
2413                             files_to_move[sub_filename] = sub_filename_final
2414                         except (OSError, IOError):
2415                             self.report_error('Cannot write subtitles file ' + sub_filename)
2416                             return
2417                     else:
2418                         try:
2419                             self.dl(sub_filename, sub_info.copy(), subtitle=True)
2420                             sub_info['filepath'] = sub_filename
2421                             files_to_move[sub_filename] = sub_filename_final
2422                         except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
2423                             self.report_warning('Unable to download subtitle for "%s": %s' %
2424                                                 (sub_lang, error_to_compat_str(err)))
2425                             continue
2426
2427         if self.params.get('writeinfojson', False):
2428             infofn = self.prepare_filename(info_dict, 'infojson')
2429             if not self._ensure_dir_exists(encodeFilename(infofn)):
2430                 return
2431             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
2432                 self.to_screen('[info] Video metadata is already present')
2433             else:
2434                 self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
2435                 try:
2436                     write_json_file(self.filter_requested_info(info_dict, self.params.get('clean_infojson', True)), infofn)
2437                 except (OSError, IOError):
2438                     self.report_error('Cannot write video metadata to JSON file ' + infofn)
2439                     return
2440             info_dict['__infojson_filename'] = infofn
2441
2442         for thumb_ext in self._write_thumbnails(info_dict, temp_filename):
2443             thumb_filename_temp = replace_extension(temp_filename, thumb_ext, info_dict.get('ext'))
2444             thumb_filename = replace_extension(
2445                 self.prepare_filename(info_dict, 'thumbnail'), thumb_ext, info_dict.get('ext'))
2446             files_to_move[thumb_filename_temp] = thumb_filename
2447
2448         # Write internet shortcut files
2449         url_link = webloc_link = desktop_link = False
2450         if self.params.get('writelink', False):
2451             if sys.platform == "darwin":  # macOS.
2452                 webloc_link = True
2453             elif sys.platform.startswith("linux"):
2454                 desktop_link = True
2455             else:  # if sys.platform in ['win32', 'cygwin']:
2456                 url_link = True
2457         if self.params.get('writeurllink', False):
2458             url_link = True
2459         if self.params.get('writewebloclink', False):
2460             webloc_link = True
2461         if self.params.get('writedesktoplink', False):
2462             desktop_link = True
2463
2464         if url_link or webloc_link or desktop_link:
2465             if 'webpage_url' not in info_dict:
2466                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2467                 return
2468             ascii_url = iri_to_uri(info_dict['webpage_url'])
2469
2470         def _write_link_file(extension, template, newline, embed_filename):
2471             linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
2472             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2473                 self.to_screen('[info] Internet shortcut is already present')
2474             else:
2475                 try:
2476                     self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2477                     with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2478                         template_vars = {'url': ascii_url}
2479                         if embed_filename:
2480                             template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2481                         linkfile.write(template % template_vars)
2482                 except (OSError, IOError):
2483                     self.report_error('Cannot write internet shortcut ' + linkfn)
2484                     return False
2485             return True
2486
2487         if url_link:
2488             if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2489                 return
2490         if webloc_link:
2491             if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2492                 return
2493         if desktop_link:
2494             if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2495                 return
2496
2497         try:
2498             info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2499         except PostProcessingError as err:
2500             self.report_error('Preprocessing: %s' % str(err))
2501             return
2502
2503         must_record_download_archive = False
2504         if self.params.get('skip_download', False):
2505             info_dict['filepath'] = temp_filename
2506             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2507             info_dict['__files_to_move'] = files_to_move
2508             info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2509         else:
2510             # Download
2511             try:
2512
2513                 def existing_file(*filepaths):
2514                     ext = info_dict.get('ext')
2515                     final_ext = self.params.get('final_ext', ext)
2516                     existing_files = []
2517                     for file in orderedSet(filepaths):
2518                         if final_ext != ext:
2519                             converted = replace_extension(file, final_ext, ext)
2520                             if os.path.exists(encodeFilename(converted)):
2521                                 existing_files.append(converted)
2522                         if os.path.exists(encodeFilename(file)):
2523                             existing_files.append(file)
2524
2525                     if not existing_files or self.params.get('overwrites', False):
2526                         for file in orderedSet(existing_files):
2527                             self.report_file_delete(file)
2528                             os.remove(encodeFilename(file))
2529                         return None
2530
2531                     self.report_file_already_downloaded(existing_files[0])
2532                     info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2533                     return existing_files[0]
2534
2535                 success = True
2536                 if info_dict.get('requested_formats') is not None:
2537
2538                     def compatible_formats(formats):
2539                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2540                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2541                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2542                         if len(video_formats) > 2 or len(audio_formats) > 2:
2543                             return False
2544
2545                         # Check extension
2546                         exts = set(format.get('ext') for format in formats)
2547                         COMPATIBLE_EXTS = (
2548                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2549                             set(('webm',)),
2550                         )
2551                         for ext_sets in COMPATIBLE_EXTS:
2552                             if ext_sets.issuperset(exts):
2553                                 return True
2554                         # TODO: Check acodec/vcodec
2555                         return False
2556
2557                     requested_formats = info_dict['requested_formats']
2558                     old_ext = info_dict['ext']
2559                     if self.params.get('merge_output_format') is None:
2560                         if not compatible_formats(requested_formats):
2561                             info_dict['ext'] = 'mkv'
2562                             self.report_warning(
2563                                 'Requested formats are incompatible for merge and will be merged into mkv.')
2564                         if (info_dict['ext'] == 'webm'
2565                                 and self.params.get('writethumbnail', False)
2566                                 and info_dict.get('thumbnails')):
2567                             info_dict['ext'] = 'mkv'
2568                             self.report_warning(
2569                                 'webm doesn\'t support embedding a thumbnail, mkv will be used.')
2570
2571                     def correct_ext(filename):
2572                         filename_real_ext = os.path.splitext(filename)[1][1:]
2573                         filename_wo_ext = (
2574                             os.path.splitext(filename)[0]
2575                             if filename_real_ext == old_ext
2576                             else filename)
2577                         return '%s.%s' % (filename_wo_ext, info_dict['ext'])
2578
2579                     # Ensure filename always has a correct extension for successful merge
2580                     full_filename = correct_ext(full_filename)
2581                     temp_filename = correct_ext(temp_filename)
2582                     dl_filename = existing_file(full_filename, temp_filename)
2583                     info_dict['__real_download'] = False
2584
2585                     _protocols = set(determine_protocol(f) for f in requested_formats)
2586                     if len(_protocols) == 1:
2587                         info_dict['protocol'] = _protocols.pop()
2588                     directly_mergable = (
2589                         'no-direct-merge' not in self.params.get('compat_opts', [])
2590                         and info_dict.get('protocol') is not None  # All requested formats have same protocol
2591                         and not self.params.get('allow_unplayable_formats')
2592                         and get_suitable_downloader(info_dict, self.params).__name__ == 'FFmpegFD')
2593                     if directly_mergable:
2594                         info_dict['url'] = requested_formats[0]['url']
2595                         # Treat it as a single download
2596                         dl_filename = existing_file(full_filename, temp_filename)
2597                         if dl_filename is None:
2598                             success, real_download = self.dl(temp_filename, info_dict)
2599                             info_dict['__real_download'] = real_download
2600                     else:
2601                         downloaded = []
2602                         merger = FFmpegMergerPP(self)
2603                         if self.params.get('allow_unplayable_formats'):
2604                             self.report_warning(
2605                                 'You have requested merging of multiple formats '
2606                                 'while also allowing unplayable formats to be downloaded. '
2607                                 'The formats won\'t be merged to prevent data corruption.')
2608                         elif not merger.available:
2609                             self.report_warning(
2610                                 'You have requested merging of multiple formats but ffmpeg is not installed. '
2611                                 'The formats won\'t be merged.')
2612
2613                         if dl_filename is None:
2614                             for f in requested_formats:
2615                                 new_info = dict(info_dict)
2616                                 del new_info['requested_formats']
2617                                 new_info.update(f)
2618                                 fname = prepend_extension(
2619                                     self.prepare_filename(new_info, 'temp'),
2620                                     'f%s' % f['format_id'], new_info['ext'])
2621                                 if not self._ensure_dir_exists(fname):
2622                                     return
2623                                 downloaded.append(fname)
2624                                 partial_success, real_download = self.dl(fname, new_info)
2625                                 info_dict['__real_download'] = info_dict['__real_download'] or real_download
2626                                 success = success and partial_success
2627                             if merger.available and not self.params.get('allow_unplayable_formats'):
2628                                 info_dict['__postprocessors'].append(merger)
2629                                 info_dict['__files_to_merge'] = downloaded
2630                                 # Even if there were no downloads, it is being merged only now
2631                                 info_dict['__real_download'] = True
2632                             else:
2633                                 for file in downloaded:
2634                                     files_to_move[file] = None
2635                 else:
2636                     # Just a single file
2637                     dl_filename = existing_file(full_filename, temp_filename)
2638                     if dl_filename is None:
2639                         success, real_download = self.dl(temp_filename, info_dict)
2640                         info_dict['__real_download'] = real_download
2641
2642                 dl_filename = dl_filename or temp_filename
2643                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2644
2645             except network_exceptions as err:
2646                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2647                 return
2648             except (OSError, IOError) as err:
2649                 raise UnavailableVideoError(err)
2650             except (ContentTooShortError, ) as err:
2651                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2652                 return
2653
2654             if success and full_filename != '-':
2655                 # Fixup content
2656                 fixup_policy = self.params.get('fixup')
2657                 if fixup_policy is None:
2658                     fixup_policy = 'detect_or_warn'
2659
2660                 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg to fix this automatically.'
2661
2662                 stretched_ratio = info_dict.get('stretched_ratio')
2663                 if stretched_ratio is not None and stretched_ratio != 1:
2664                     if fixup_policy == 'warn':
2665                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2666                             info_dict['id'], stretched_ratio))
2667                     elif fixup_policy == 'detect_or_warn':
2668                         stretched_pp = FFmpegFixupStretchedPP(self)
2669                         if stretched_pp.available:
2670                             info_dict['__postprocessors'].append(stretched_pp)
2671                         else:
2672                             self.report_warning(
2673                                 '%s: Non-uniform pixel ratio (%s). %s'
2674                                 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
2675                     else:
2676                         assert fixup_policy in ('ignore', 'never')
2677
2678                 if (info_dict.get('requested_formats') is None
2679                         and info_dict.get('container') == 'm4a_dash'
2680                         and info_dict.get('ext') == 'm4a'):
2681                     if fixup_policy == 'warn':
2682                         self.report_warning(
2683                             '%s: writing DASH m4a. '
2684                             'Only some players support this container.'
2685                             % info_dict['id'])
2686                     elif fixup_policy == 'detect_or_warn':
2687                         fixup_pp = FFmpegFixupM4aPP(self)
2688                         if fixup_pp.available:
2689                             info_dict['__postprocessors'].append(fixup_pp)
2690                         else:
2691                             self.report_warning(
2692                                 '%s: writing DASH m4a. '
2693                                 'Only some players support this container. %s'
2694                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2695                     else:
2696                         assert fixup_policy in ('ignore', 'never')
2697
2698                 if ('protocol' in info_dict
2699                         and get_suitable_downloader(info_dict, self.params).__name__ == 'HlsFD'):
2700                     if fixup_policy == 'warn':
2701                         self.report_warning('%s: malformed AAC bitstream detected.' % (
2702                             info_dict['id']))
2703                     elif fixup_policy == 'detect_or_warn':
2704                         fixup_pp = FFmpegFixupM3u8PP(self)
2705                         if fixup_pp.available:
2706                             info_dict['__postprocessors'].append(fixup_pp)
2707                         else:
2708                             self.report_warning(
2709                                 '%s: malformed AAC bitstream detected. %s'
2710                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2711                     else:
2712                         assert fixup_policy in ('ignore', 'never')
2713
2714                 try:
2715                     info_dict = self.post_process(dl_filename, info_dict, files_to_move)
2716                 except PostProcessingError as err:
2717                     self.report_error('Postprocessing: %s' % str(err))
2718                     return
2719                 try:
2720                     for ph in self._post_hooks:
2721                         ph(info_dict['filepath'])
2722                 except Exception as err:
2723                     self.report_error('post hooks: %s' % str(err))
2724                     return
2725                 must_record_download_archive = True
2726
2727         if must_record_download_archive or self.params.get('force_write_download_archive', False):
2728             self.record_download_archive(info_dict)
2729         max_downloads = self.params.get('max_downloads')
2730         if max_downloads is not None and self._num_downloads >= int(max_downloads):
2731             raise MaxDownloadsReached()
2732
2733     def download(self, url_list):
2734         """Download a given list of URLs."""
2735         outtmpl = self.outtmpl_dict['default']
2736         if (len(url_list) > 1
2737                 and outtmpl != '-'
2738                 and '%' not in outtmpl
2739                 and self.params.get('max_downloads') != 1):
2740             raise SameFileError(outtmpl)
2741
2742         for url in url_list:
2743             try:
2744                 # It also downloads the videos
2745                 res = self.extract_info(
2746                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2747             except UnavailableVideoError:
2748                 self.report_error('unable to download video')
2749             except MaxDownloadsReached:
2750                 self.to_screen('[info] Maximum number of downloaded files reached')
2751                 raise
2752             except ExistingVideoReached:
2753                 self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
2754                 raise
2755             except RejectedVideoReached:
2756                 self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
2757                 raise
2758             else:
2759                 if self.params.get('dump_single_json', False):
2760                     self.post_extract(res)
2761                     self.to_stdout(json.dumps(res, default=repr))
2762
2763         return self._download_retcode
2764
2765     def download_with_info_file(self, info_filename):
2766         with contextlib.closing(fileinput.FileInput(
2767                 [info_filename], mode='r',
2768                 openhook=fileinput.hook_encoded('utf-8'))) as f:
2769             # FileInput doesn't have a read method, we can't call json.load
2770             info = self.filter_requested_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
2771         try:
2772             self.process_ie_result(info, download=True)
2773         except (DownloadError, EntryNotInPlaylist):
2774             webpage_url = info.get('webpage_url')
2775             if webpage_url is not None:
2776                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2777                 return self.download([webpage_url])
2778             else:
2779                 raise
2780         return self._download_retcode
2781
2782     @staticmethod
2783     def filter_requested_info(info_dict, actually_filter=True):
2784         remove_keys = ['__original_infodict']  # Always remove this since this may contain a copy of the entire dict
2785         keep_keys = ['_type'],  # Always keep this to facilitate load-info-json
2786         if actually_filter:
2787             remove_keys += ('requested_formats', 'requested_subtitles', 'requested_entries', 'filepath', 'entries', 'original_url')
2788             empty_values = (None, {}, [], set(), tuple())
2789             reject = lambda k, v: k not in keep_keys and (
2790                 k.startswith('_') or k in remove_keys or v in empty_values)
2791         else:
2792             info_dict['epoch'] = int(time.time())
2793             reject = lambda k, v: k in remove_keys
2794         filter_fn = lambda obj: (
2795             list(map(filter_fn, obj)) if isinstance(obj, (list, tuple, set))
2796             else obj if not isinstance(obj, dict)
2797             else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
2798         return filter_fn(info_dict)
2799
2800     def run_pp(self, pp, infodict):
2801         files_to_delete = []
2802         if '__files_to_move' not in infodict:
2803             infodict['__files_to_move'] = {}
2804         files_to_delete, infodict = pp.run(infodict)
2805         if not files_to_delete:
2806             return infodict
2807
2808         if self.params.get('keepvideo', False):
2809             for f in files_to_delete:
2810                 infodict['__files_to_move'].setdefault(f, '')
2811         else:
2812             for old_filename in set(files_to_delete):
2813                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2814                 try:
2815                     os.remove(encodeFilename(old_filename))
2816                 except (IOError, OSError):
2817                     self.report_warning('Unable to remove downloaded original file')
2818                 if old_filename in infodict['__files_to_move']:
2819                     del infodict['__files_to_move'][old_filename]
2820         return infodict
2821
2822     @staticmethod
2823     def post_extract(info_dict):
2824         def actual_post_extract(info_dict):
2825             if info_dict.get('_type') in ('playlist', 'multi_video'):
2826                 for video_dict in info_dict.get('entries', {}):
2827                     actual_post_extract(video_dict or {})
2828                 return
2829
2830             post_extractor = info_dict.get('__post_extractor') or (lambda: {})
2831             extra = post_extractor().items()
2832             info_dict.update(extra)
2833             info_dict.pop('__post_extractor', None)
2834
2835             original_infodict = info_dict.get('__original_infodict') or {}
2836             original_infodict.update(extra)
2837             original_infodict.pop('__post_extractor', None)
2838
2839         actual_post_extract(info_dict or {})
2840
2841     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
2842         info = dict(ie_info)
2843         info['__files_to_move'] = files_to_move or {}
2844         for pp in self._pps[key]:
2845             info = self.run_pp(pp, info)
2846         return info, info.pop('__files_to_move', None)
2847
2848     def post_process(self, filename, ie_info, files_to_move=None):
2849         """Run all the postprocessors on the given file."""
2850         info = dict(ie_info)
2851         info['filepath'] = filename
2852         info['__files_to_move'] = files_to_move or {}
2853
2854         for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
2855             info = self.run_pp(pp, info)
2856         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
2857         del info['__files_to_move']
2858         for pp in self._pps['after_move']:
2859             info = self.run_pp(pp, info)
2860         return info
2861
2862     def _make_archive_id(self, info_dict):
2863         video_id = info_dict.get('id')
2864         if not video_id:
2865             return
2866         # Future-proof against any change in case
2867         # and backwards compatibility with prior versions
2868         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
2869         if extractor is None:
2870             url = str_or_none(info_dict.get('url'))
2871             if not url:
2872                 return
2873             # Try to find matching extractor for the URL and take its ie_key
2874             for ie in self._ies:
2875                 if ie.suitable(url):
2876                     extractor = ie.ie_key()
2877                     break
2878             else:
2879                 return
2880         return '%s %s' % (extractor.lower(), video_id)
2881
2882     def in_download_archive(self, info_dict):
2883         fn = self.params.get('download_archive')
2884         if fn is None:
2885             return False
2886
2887         vid_id = self._make_archive_id(info_dict)
2888         if not vid_id:
2889             return False  # Incomplete video information
2890
2891         return vid_id in self.archive
2892
2893     def record_download_archive(self, info_dict):
2894         fn = self.params.get('download_archive')
2895         if fn is None:
2896             return
2897         vid_id = self._make_archive_id(info_dict)
2898         assert vid_id
2899         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
2900             archive_file.write(vid_id + '\n')
2901         self.archive.add(vid_id)
2902
2903     @staticmethod
2904     def format_resolution(format, default='unknown'):
2905         if format.get('vcodec') == 'none':
2906             return 'audio only'
2907         if format.get('resolution') is not None:
2908             return format['resolution']
2909         if format.get('width') and format.get('height'):
2910             res = '%dx%d' % (format['width'], format['height'])
2911         elif format.get('height'):
2912             res = '%sp' % format['height']
2913         elif format.get('width'):
2914             res = '%dx?' % format['width']
2915         else:
2916             res = default
2917         return res
2918
2919     def _format_note(self, fdict):
2920         res = ''
2921         if fdict.get('ext') in ['f4f', 'f4m']:
2922             res += '(unsupported) '
2923         if fdict.get('language'):
2924             if res:
2925                 res += ' '
2926             res += '[%s] ' % fdict['language']
2927         if fdict.get('format_note') is not None:
2928             res += fdict['format_note'] + ' '
2929         if fdict.get('tbr') is not None:
2930             res += '%4dk ' % fdict['tbr']
2931         if fdict.get('container') is not None:
2932             if res:
2933                 res += ', '
2934             res += '%s container' % fdict['container']
2935         if (fdict.get('vcodec') is not None
2936                 and fdict.get('vcodec') != 'none'):
2937             if res:
2938                 res += ', '
2939             res += fdict['vcodec']
2940             if fdict.get('vbr') is not None:
2941                 res += '@'
2942         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2943             res += 'video@'
2944         if fdict.get('vbr') is not None:
2945             res += '%4dk' % fdict['vbr']
2946         if fdict.get('fps') is not None:
2947             if res:
2948                 res += ', '
2949             res += '%sfps' % fdict['fps']
2950         if fdict.get('acodec') is not None:
2951             if res:
2952                 res += ', '
2953             if fdict['acodec'] == 'none':
2954                 res += 'video only'
2955             else:
2956                 res += '%-5s' % fdict['acodec']
2957         elif fdict.get('abr') is not None:
2958             if res:
2959                 res += ', '
2960             res += 'audio'
2961         if fdict.get('abr') is not None:
2962             res += '@%3dk' % fdict['abr']
2963         if fdict.get('asr') is not None:
2964             res += ' (%5dHz)' % fdict['asr']
2965         if fdict.get('filesize') is not None:
2966             if res:
2967                 res += ', '
2968             res += format_bytes(fdict['filesize'])
2969         elif fdict.get('filesize_approx') is not None:
2970             if res:
2971                 res += ', '
2972             res += '~' + format_bytes(fdict['filesize_approx'])
2973         return res
2974
2975     def _format_note_table(self, f):
2976         def join_fields(*vargs):
2977             return ', '.join((val for val in vargs if val != ''))
2978
2979         return join_fields(
2980             'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
2981             format_field(f, 'language', '[%s]'),
2982             format_field(f, 'format_note'),
2983             format_field(f, 'container', ignore=(None, f.get('ext'))),
2984             format_field(f, 'asr', '%5dHz'))
2985
2986     def list_formats(self, info_dict):
2987         formats = info_dict.get('formats', [info_dict])
2988         new_format = (
2989             'list-formats' not in self.params.get('compat_opts', [])
2990             and self.params.get('list_formats_as_table', True) is not False)
2991         if new_format:
2992             table = [
2993                 [
2994                     format_field(f, 'format_id'),
2995                     format_field(f, 'ext'),
2996                     self.format_resolution(f),
2997                     format_field(f, 'fps', '%d'),
2998                     '|',
2999                     format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
3000                     format_field(f, 'tbr', '%4dk'),
3001                     shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
3002                     '|',
3003                     format_field(f, 'vcodec', default='unknown').replace('none', ''),
3004                     format_field(f, 'vbr', '%4dk'),
3005                     format_field(f, 'acodec', default='unknown').replace('none', ''),
3006                     format_field(f, 'abr', '%3dk'),
3007                     format_field(f, 'asr', '%5dHz'),
3008                     self._format_note_table(f)]
3009                 for f in formats
3010                 if f.get('preference') is None or f['preference'] >= -1000]
3011             header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', '  TBR', 'PROTO',
3012                            '|', 'VCODEC', '  VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']
3013         else:
3014             table = [
3015                 [
3016                     format_field(f, 'format_id'),
3017                     format_field(f, 'ext'),
3018                     self.format_resolution(f),
3019                     self._format_note(f)]
3020                 for f in formats
3021                 if f.get('preference') is None or f['preference'] >= -1000]
3022             header_line = ['format code', 'extension', 'resolution', 'note']
3023
3024         self.to_screen(
3025             '[info] Available formats for %s:\n%s' % (info_dict['id'], render_table(
3026                 header_line,
3027                 table,
3028                 delim=new_format,
3029                 extraGap=(0 if new_format else 1),
3030                 hideEmpty=new_format)))
3031
3032     def list_thumbnails(self, info_dict):
3033         thumbnails = info_dict.get('thumbnails')
3034         if not thumbnails:
3035             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3036             return
3037
3038         self.to_screen(
3039             '[info] Thumbnails for %s:' % info_dict['id'])
3040         self.to_screen(render_table(
3041             ['ID', 'width', 'height', 'URL'],
3042             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
3043
3044     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3045         if not subtitles:
3046             self.to_screen('%s has no %s' % (video_id, name))
3047             return
3048         self.to_screen(
3049             'Available %s for %s:' % (name, video_id))
3050
3051         def _row(lang, formats):
3052             exts, names = zip(*((f['ext'], f.get('name', 'unknown')) for f in reversed(formats)))
3053             if len(set(names)) == 1:
3054                 names = [] if names[0] == 'unknown' else names[:1]
3055             return [lang, ', '.join(names), ', '.join(exts)]
3056
3057         self.to_screen(render_table(
3058             ['Language', 'Name', 'Formats'],
3059             [_row(lang, formats) for lang, formats in subtitles.items()],
3060             hideEmpty=True))
3061
3062     def urlopen(self, req):
3063         """ Start an HTTP download """
3064         if isinstance(req, compat_basestring):
3065             req = sanitized_Request(req)
3066         return self._opener.open(req, timeout=self._socket_timeout)
3067
3068     def print_debug_header(self):
3069         if not self.params.get('verbose'):
3070             return
3071
3072         if type('') is not compat_str:
3073             # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
3074             self.report_warning(
3075                 'Your Python is broken! Update to a newer and supported version')
3076
3077         stdout_encoding = getattr(
3078             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
3079         encoding_str = (
3080             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
3081                 locale.getpreferredencoding(),
3082                 sys.getfilesystemencoding(),
3083                 stdout_encoding,
3084                 self.get_encoding()))
3085         write_string(encoding_str, encoding=None)
3086
3087         source = (
3088             '(exe)' if hasattr(sys, 'frozen')
3089             else '(zip)' if isinstance(globals().get('__loader__'), zipimporter)
3090             else '(source)' if os.path.basename(sys.argv[0]) == '__main__.py'
3091             else '')
3092         self._write_string('[debug] yt-dlp version %s %s\n' % (__version__, source))
3093         if _LAZY_LOADER:
3094             self._write_string('[debug] Lazy loading extractors enabled\n')
3095         if _PLUGIN_CLASSES:
3096             self._write_string(
3097                 '[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])
3098         if self.params.get('compat_opts'):
3099             self._write_string(
3100                 '[debug] Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts')))
3101         try:
3102             sp = subprocess.Popen(
3103                 ['git', 'rev-parse', '--short', 'HEAD'],
3104                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3105                 cwd=os.path.dirname(os.path.abspath(__file__)))
3106             out, err = process_communicate_or_kill(sp)
3107             out = out.decode().strip()
3108             if re.match('[0-9a-f]+', out):
3109                 self._write_string('[debug] Git HEAD: %s\n' % out)
3110         except Exception:
3111             try:
3112                 sys.exc_clear()
3113             except Exception:
3114                 pass
3115
3116         def python_implementation():
3117             impl_name = platform.python_implementation()
3118             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3119                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3120             return impl_name
3121
3122         self._write_string('[debug] Python version %s (%s %s) - %s\n' % (
3123             platform.python_version(),
3124             python_implementation(),
3125             platform.architecture()[0],
3126             platform_name()))
3127
3128         exe_versions = FFmpegPostProcessor.get_versions(self)
3129         exe_versions['rtmpdump'] = rtmpdump_version()
3130         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3131         exe_str = ', '.join(
3132             '%s %s' % (exe, v)
3133             for exe, v in sorted(exe_versions.items())
3134             if v
3135         )
3136         if not exe_str:
3137             exe_str = 'none'
3138         self._write_string('[debug] exe versions: %s\n' % exe_str)
3139
3140         proxy_map = {}
3141         for handler in self._opener.handlers:
3142             if hasattr(handler, 'proxies'):
3143                 proxy_map.update(handler.proxies)
3144         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
3145
3146         if self.params.get('call_home', False):
3147             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3148             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
3149             return
3150             latest_version = self.urlopen(
3151                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3152             if version_tuple(latest_version) > version_tuple(__version__):
3153                 self.report_warning(
3154                     'You are using an outdated version (newest version: %s)! '
3155                     'See https://yt-dl.org/update if you need help updating.' %
3156                     latest_version)
3157
3158     def _setup_opener(self):
3159         timeout_val = self.params.get('socket_timeout')
3160         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
3161
3162         opts_cookiefile = self.params.get('cookiefile')
3163         opts_proxy = self.params.get('proxy')
3164
3165         if opts_cookiefile is None:
3166             self.cookiejar = compat_cookiejar.CookieJar()
3167         else:
3168             opts_cookiefile = expand_path(opts_cookiefile)
3169             self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
3170             if os.access(opts_cookiefile, os.R_OK):
3171                 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
3172
3173         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3174         if opts_proxy is not None:
3175             if opts_proxy == '':
3176                 proxies = {}
3177             else:
3178                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3179         else:
3180             proxies = compat_urllib_request.getproxies()
3181             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3182             if 'http' in proxies and 'https' not in proxies:
3183                 proxies['https'] = proxies['http']
3184         proxy_handler = PerRequestProxyHandler(proxies)
3185
3186         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3187         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3188         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3189         redirect_handler = YoutubeDLRedirectHandler()
3190         data_handler = compat_urllib_request_DataHandler()
3191
3192         # When passing our own FileHandler instance, build_opener won't add the
3193         # default FileHandler and allows us to disable the file protocol, which
3194         # can be used for malicious purposes (see
3195         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3196         file_handler = compat_urllib_request.FileHandler()
3197
3198         def file_open(*args, **kwargs):
3199             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3200         file_handler.file_open = file_open
3201
3202         opener = compat_urllib_request.build_opener(
3203             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3204
3205         # Delete the default user-agent header, which would otherwise apply in
3206         # cases where our custom HTTP handler doesn't come into play
3207         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3208         opener.addheaders = []
3209         self._opener = opener
3210
3211     def encode(self, s):
3212         if isinstance(s, bytes):
3213             return s  # Already encoded
3214
3215         try:
3216             return s.encode(self.get_encoding())
3217         except UnicodeEncodeError as err:
3218             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3219             raise
3220
3221     def get_encoding(self):
3222         encoding = self.params.get('encoding')
3223         if encoding is None:
3224             encoding = preferredencoding()
3225         return encoding
3226
3227     def _write_thumbnails(self, info_dict, filename):  # return the extensions
3228         write_all = self.params.get('write_all_thumbnails', False)
3229         thumbnails = []
3230         if write_all or self.params.get('writethumbnail', False):
3231             thumbnails = info_dict.get('thumbnails') or []
3232         multiple = write_all and len(thumbnails) > 1
3233
3234         ret = []
3235         for t in thumbnails[::1 if write_all else -1]:
3236             thumb_ext = determine_ext(t['url'], 'jpg')
3237             suffix = '%s.' % t['id'] if multiple else ''
3238             thumb_display_id = '%s ' % t['id'] if multiple else ''
3239             thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
3240
3241             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
3242                 ret.append(suffix + thumb_ext)
3243                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
3244                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
3245             else:
3246                 self.to_screen('[%s] %s: Downloading thumbnail %s ...' %
3247                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
3248                 try:
3249                     uf = self.urlopen(t['url'])
3250                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3251                         shutil.copyfileobj(uf, thumbf)
3252                     ret.append(suffix + thumb_ext)
3253                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
3254                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
3255                     t['filepath'] = thumb_filename
3256                 except network_exceptions as err:
3257                     self.report_warning('Unable to download thumbnail "%s": %s' %
3258                                         (t['url'], error_to_compat_str(err)))
3259             if ret and not write_all:
3260                 break
3261         return ret