yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import sys
  23 import time
  24 import tokenize
  25 import traceback
  26 import random
  27
  28 from string import ascii_letters
  29 from zipimport import zipimporter
  30
  31 from .compat import (
  32     compat_basestring,
  33     compat_cookiejar,
  34     compat_get_terminal_size,
  35     compat_kwargs,
  36     compat_numeric_types,
  37     compat_os_name,
  38     compat_str,
  39     compat_tokenize_tokenize,
  40     compat_urllib_error,
  41     compat_urllib_request,
  42     compat_urllib_request_DataHandler,
  43 )
  44 from .utils import (
  45     age_restricted,
  46     args_to_str,
  47     ContentTooShortError,
  48     date_from_str,
  49     DateRange,
  50     DEFAULT_OUTTMPL,
  51     determine_ext,
  52     determine_protocol,
  53     DOT_DESKTOP_LINK_TEMPLATE,
  54     DOT_URL_LINK_TEMPLATE,
  55     DOT_WEBLOC_LINK_TEMPLATE,
  56     DownloadError,
  57     encode_compat_str,
  58     encodeFilename,
  59     EntryNotInPlaylist,
  60     error_to_compat_str,
  61     ExistingVideoReached,
  62     expand_path,
  63     ExtractorError,
  64     float_or_none,
  65     format_bytes,
  66     format_field,
  67     STR_FORMAT_RE,
  68     formatSeconds,
  69     GeoRestrictedError,
  70     int_or_none,
  71     iri_to_uri,
  72     ISO3166Utils,
  73     LazyList,
  74     locked_file,
  75     make_dir,
  76     make_HTTPS_handler,
  77     MaxDownloadsReached,
  78     network_exceptions,
  79     orderedSet,
  80     OUTTMPL_TYPES,
  81     PagedList,
  82     parse_filesize,
  83     PerRequestProxyHandler,
  84     platform_name,
  85     PostProcessingError,
  86     preferredencoding,
  87     prepend_extension,
  88     process_communicate_or_kill,
  89     random_uuidv4,
  90     register_socks_protocols,
  91     RejectedVideoReached,
  92     render_table,
  93     replace_extension,
  94     SameFileError,
  95     sanitize_filename,
  96     sanitize_path,
  97     sanitize_url,
  98     sanitized_Request,
  99     std_headers,
 100     str_or_none,
 101     strftime_or_none,
 102     subtitles_filename,
 103     to_high_limit_path,
 104     traverse_obj,
 105     UnavailableVideoError,
 106     url_basename,
 107     version_tuple,
 108     write_json_file,
 109     write_string,
 110     YoutubeDLCookieJar,
 111     YoutubeDLCookieProcessor,
 112     YoutubeDLHandler,
 113     YoutubeDLRedirectHandler,
 114 )
 115 from .cache import Cache
 116 from .extractor import (
 117     gen_extractor_classes,
 118     get_info_extractor,
 119     _LAZY_LOADER,
 120     _PLUGIN_CLASSES
 121 )
 122 from .extractor.openload import PhantomJSwrapper
 123 from .downloader import (
 124     get_suitable_downloader,
 125     shorten_protocol_name
 126 )
 127 from .downloader.rtmp import rtmpdump_version
 128 from .postprocessor import (
 129     FFmpegFixupM3u8PP,
 130     FFmpegFixupM4aPP,
 131     FFmpegFixupStretchedPP,
 132     FFmpegMergerPP,
 133     FFmpegPostProcessor,
 134     # FFmpegSubtitlesConvertorPP,
 135     get_postprocessor,
 136     MoveFilesAfterDownloadPP,
 137 )
 138 from .version import __version__
 139
 140 if compat_os_name == 'nt':
 141     import ctypes
 142
 143
 144 class YoutubeDL(object):
 145     """YoutubeDL class.
 146
 147     YoutubeDL objects are the ones responsible of downloading the
 148     actual video file and writing it to disk if the user has requested
 149     it, among some other tasks. In most cases there should be one per
 150     program. As, given a video URL, the downloader doesn't know how to
 151     extract all the needed information, task that InfoExtractors do, it
 152     has to pass the URL to one of them.
 153
 154     For this, YoutubeDL objects have a method that allows
 155     InfoExtractors to be registered in a given order. When it is passed
 156     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 157     finds that reports being able to handle it. The InfoExtractor extracts
 158     all the information about the video or videos the URL refers to, and
 159     YoutubeDL process the extracted information, possibly using a File
 160     Downloader to download the video.
 161
 162     YoutubeDL objects accept a lot of parameters. In order not to saturate
 163     the object constructor with arguments, it receives a dictionary of
 164     options instead. These options are available through the params
 165     attribute for the InfoExtractors to use. The YoutubeDL also
 166     registers itself as the downloader in charge for the InfoExtractors
 167     that are added to it, so this is a "mutual registration".
 168
 169     Available options:
 170
 171     username:          Username for authentication purposes.
 172     password:          Password for authentication purposes.
 173     videopassword:     Password for accessing a video.
 174     ap_mso:            Adobe Pass multiple-system operator identifier.
 175     ap_username:       Multiple-system operator account username.
 176     ap_password:       Multiple-system operator account password.
 177     usenetrc:          Use netrc for authentication instead.
 178     verbose:           Print additional info to stdout.
 179     quiet:             Do not print messages to stdout.
 180     no_warnings:       Do not print out anything for warnings.
 181     forceprint:        A list of templates to force print
 182     forceurl:          Force printing final URL. (Deprecated)
 183     forcetitle:        Force printing title. (Deprecated)
 184     forceid:           Force printing ID. (Deprecated)
 185     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 186     forcedescription:  Force printing description. (Deprecated)
 187     forcefilename:     Force printing final filename. (Deprecated)
 188     forceduration:     Force printing duration. (Deprecated)
 189     forcejson:         Force printing info_dict as JSON.
 190     dump_single_json:  Force printing the info_dict of the whole playlist
 191                        (or video) as a single JSON line.
 192     force_write_download_archive: Force writing download archive regardless
 193                        of 'skip_download' or 'simulate'.
 194     simulate:          Do not download the video files.
 195     format:            Video format code. see "FORMAT SELECTION" for more details.
 196     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 197     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 198                        extracting metadata even if the video is not actually
 199                        available for download (experimental)
 200     format_sort:       How to sort the video formats. see "Sorting Formats"
 201                        for more details.
 202     format_sort_force: Force the given format_sort. see "Sorting Formats"
 203                        for more details.
 204     allow_multiple_video_streams:   Allow multiple video streams to be merged
 205                        into a single file
 206     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 207                        into a single file
 208     paths:             Dictionary of output paths. The allowed keys are 'home'
 209                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 210     outtmpl:           Dictionary of templates for output names. Allowed keys
 211                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 212                        A string a also accepted for backward compatibility
 213     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 214     restrictfilenames: Do not allow "&" and spaces in file names
 215     trim_file_name:    Limit length of filename (extension excluded)
 216     windowsfilenames:  Force the filenames to be windows compatible
 217     ignoreerrors:      Do not stop on download errors
 218                        (Default True when running yt-dlp,
 219                        but False when directly accessing YoutubeDL class)
 220     skip_playlist_after_errors: Number of allowed failures until the rest of
 221                        the playlist is skipped
 222     force_generic_extractor: Force downloader to use the generic extractor
 223     overwrites:        Overwrite all video and metadata files if True,
 224                        overwrite only non-video files if None
 225                        and don't overwrite any file if False
 226     playliststart:     Playlist item to start at.
 227     playlistend:       Playlist item to end at.
 228     playlist_items:    Specific indices of playlist to download.
 229     playlistreverse:   Download playlist items in reverse order.
 230     playlistrandom:    Download playlist items in random order.
 231     matchtitle:        Download only matching titles.
 232     rejecttitle:       Reject downloads for matching titles.
 233     logger:            Log messages to a logging.Logger instance.
 234     logtostderr:       Log messages to stderr instead of stdout.
 235     writedescription:  Write the video description to a .description file
 236     writeinfojson:     Write the video description to a .info.json file
 237     clean_infojson:    Remove private fields from the infojson
 238     writecomments:     Extract video comments. This will not be written to disk
 239                        unless writeinfojson is also given
 240     writeannotations:  Write the video annotations to a .annotations.xml file
 241     writethumbnail:    Write the thumbnail image to a file
 242     allow_playlist_files: Whether to write playlists' description, infojson etc
 243                        also to disk when using the 'write*' options
 244     write_all_thumbnails:  Write all thumbnail formats to files
 245     writelink:         Write an internet shortcut file, depending on the
 246                        current platform (.url/.webloc/.desktop)
 247     writeurllink:      Write a Windows internet shortcut file (.url)
 248     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 249     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 250     writesubtitles:    Write the video subtitles to a file
 251     writeautomaticsub: Write the automatically generated subtitles to a file
 252     allsubtitles:      Deprecated - Use subtitlelangs = ['all']
 253                        Downloads all the subtitles of the video
 254                        (requires writesubtitles or writeautomaticsub)
 255     listsubtitles:     Lists all available subtitles for the video
 256     subtitlesformat:   The format code for subtitles
 257     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 258                        The list may contain "all" to refer to all the available
 259                        subtitles. The language can be prefixed with a "-" to
 260                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 261     keepvideo:         Keep the video file after post-processing
 262     daterange:         A DateRange object, download only if the upload_date is in the range.
 263     skip_download:     Skip the actual download of the video file
 264     cachedir:          Location of the cache files in the filesystem.
 265                        False to disable filesystem cache.
 266     noplaylist:        Download single video instead of a playlist if in doubt.
 267     age_limit:         An integer representing the user's age in years.
 268                        Unsuitable videos for the given age are skipped.
 269     min_views:         An integer representing the minimum view count the video
 270                        must have in order to not be skipped.
 271                        Videos without view count information are always
 272                        downloaded. None for no limit.
 273     max_views:         An integer representing the maximum view count.
 274                        Videos that are more popular than that are not
 275                        downloaded.
 276                        Videos without view count information are always
 277                        downloaded. None for no limit.
 278     download_archive:  File name of a file where all downloads are recorded.
 279                        Videos already present in the file are not downloaded
 280                        again.
 281     break_on_existing: Stop the download process after attempting to download a
 282                        file that is in the archive.
 283     break_on_reject:   Stop the download process when encountering a video that
 284                        has been filtered out.
 285     cookiefile:        File name where cookies should be read from and dumped to
 286     nocheckcertificate:Do not verify SSL certificates
 287     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 288                        At the moment, this is only supported by YouTube.
 289     proxy:             URL of the proxy server to use
 290     geo_verification_proxy:  URL of the proxy to use for IP address verification
 291                        on geo-restricted sites.
 292     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 293     bidi_workaround:   Work around buggy terminals without bidirectional text
 294                        support, using fridibi
 295     debug_printtraffic:Print out sent and received HTTP traffic
 296     include_ads:       Download ads as well
 297     default_search:    Prepend this string if an input url is not valid.
 298                        'auto' for elaborate guessing
 299     encoding:          Use this encoding instead of the system-specified.
 300     extract_flat:      Do not resolve URLs, return the immediate result.
 301                        Pass in 'in_playlist' to only show this behavior for
 302                        playlist items.
 303     postprocessors:    A list of dictionaries, each with an entry
 304                        * key:  The name of the postprocessor. See
 305                                yt_dlp/postprocessor/__init__.py for a list.
 306                        * when: When to run the postprocessor. Can be one of
 307                                pre_process|before_dl|post_process|after_move.
 308                                Assumed to be 'post_process' if not given
 309     post_hooks:        A list of functions that get called as the final step
 310                        for each video file, after all postprocessors have been
 311                        called. The filename will be passed as the only argument.
 312     progress_hooks:    A list of functions that get called on download
 313                        progress, with a dictionary with the entries
 314                        * status: One of "downloading", "error", or "finished".
 315                                  Check this first and ignore unknown values.
 316
 317                        If status is one of "downloading", or "finished", the
 318                        following properties may also be present:
 319                        * filename: The final filename (always present)
 320                        * tmpfilename: The filename we're currently writing to
 321                        * downloaded_bytes: Bytes on disk
 322                        * total_bytes: Size of the whole file, None if unknown
 323                        * total_bytes_estimate: Guess of the eventual file size,
 324                                                None if unavailable.
 325                        * elapsed: The number of seconds since download started.
 326                        * eta: The estimated time in seconds, None if unknown
 327                        * speed: The download speed in bytes/second, None if
 328                                 unknown
 329                        * fragment_index: The counter of the currently
 330                                          downloaded video fragment.
 331                        * fragment_count: The number of fragments (= individual
 332                                          files that will be merged)
 333
 334                        Progress hooks are guaranteed to be called at least once
 335                        (with status "finished") if the download is successful.
 336     merge_output_format: Extension to use when merging formats.
 337     final_ext:         Expected final extension; used to detect when the file was
 338                        already downloaded and converted. "merge_output_format" is
 339                        replaced by this extension when given
 340     fixup:             Automatically correct known faults of the file.
 341                        One of:
 342                        - "never": do nothing
 343                        - "warn": only emit a warning
 344                        - "detect_or_warn": check whether we can do anything
 345                                            about it, warn otherwise (default)
 346     source_address:    Client-side IP address to bind to.
 347     call_home:         Boolean, true iff we are allowed to contact the
 348                        yt-dlp servers for debugging. (BROKEN)
 349     sleep_interval_requests: Number of seconds to sleep between requests
 350                        during extraction
 351     sleep_interval:    Number of seconds to sleep before each download when
 352                        used alone or a lower bound of a range for randomized
 353                        sleep before each download (minimum possible number
 354                        of seconds to sleep) when used along with
 355                        max_sleep_interval.
 356     max_sleep_interval:Upper bound of a range for randomized sleep before each
 357                        download (maximum possible number of seconds to sleep).
 358                        Must only be used along with sleep_interval.
 359                        Actual sleep time will be a random float from range
 360                        [sleep_interval; max_sleep_interval].
 361     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 362     listformats:       Print an overview of available video formats and exit.
 363     list_thumbnails:   Print a table of all thumbnails and exit.
 364     match_filter:      A function that gets called with the info_dict of
 365                        every video.
 366                        If it returns a message, the video is ignored.
 367                        If it returns None, the video is downloaded.
 368                        match_filter_func in utils.py is one example for this.
 369     no_color:          Do not emit color codes in output.
 370     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 371                        HTTP header
 372     geo_bypass_country:
 373                        Two-letter ISO 3166-2 country code that will be used for
 374                        explicit geographic restriction bypassing via faking
 375                        X-Forwarded-For HTTP header
 376     geo_bypass_ip_block:
 377                        IP range in CIDR notation that will be used similarly to
 378                        geo_bypass_country
 379
 380     The following options determine which downloader is picked:
 381     external_downloader: A dictionary of protocol keys and the executable of the
 382                        external downloader to use for it. The allowed protocols
 383                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 384                        Set the value to 'native' to use the native downloader
 385     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 386                        or {'m3u8': 'ffmpeg'} instead.
 387                        Use the native HLS downloader instead of ffmpeg/avconv
 388                        if True, otherwise use ffmpeg/avconv if False, otherwise
 389                        use downloader suggested by extractor if None.
 390     compat_opts:       Compatibility options. See "Differences in default behavior".
 391                        Note that only format-sort, format-spec, no-live-chat,
 392                        no-attach-info-json, playlist-index, list-formats,
 393                        no-direct-merge, no-youtube-channel-redirect,
 394                        and no-youtube-unavailable-videos works when used via the API
 395
 396     The following parameters are not used by YoutubeDL itself, they are used by
 397     the downloader (see yt_dlp/downloader/common.py):
 398     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 399     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 400     xattr_set_filesize, external_downloader_args, hls_use_mpegts,
 401     http_chunk_size.
 402
 403     The following options are used by the post processors:
 404     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 405                        otherwise prefer ffmpeg. (avconv support is deprecated)
 406     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 407                        to the binary or its containing directory.
 408     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 409                         and a list of additional command-line arguments for the
 410                         postprocessor/executable. The dict can also have "PP+EXE" keys
 411                         which are used when the given exe is used by the given PP.
 412                         Use 'default' as the name for arguments to passed to all PP
 413
 414     The following options are used by the extractors:
 415     extractor_retries: Number of times to retry for known errors
 416     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 417     hls_split_discontinuity: Split HLS playlists to different formats at
 418                        discontinuities such as ad breaks (default: False)
 419     youtube_include_dash_manifest: If True (default), DASH manifests and related
 420                        data will be downloaded and processed by extractor.
 421                        You can reduce network I/O by disabling it if you don't
 422                        care about DASH. (only for youtube)
 423     youtube_include_hls_manifest: If True (default), HLS manifests and related
 424                        data will be downloaded and processed by extractor.
 425                        You can reduce network I/O by disabling it if you don't
 426                        care about HLS. (only for youtube)
 427     """
 428
 429     _NUMERIC_FIELDS = set((
 430         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 431         'timestamp', 'upload_year', 'upload_month', 'upload_day',
 432         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 433         'average_rating', 'comment_count', 'age_limit',
 434         'start_time', 'end_time',
 435         'chapter_number', 'season_number', 'episode_number',
 436         'track_number', 'disc_number', 'release_year',
 437         'playlist_index',
 438     ))
 439
 440     params = None
 441     _ies = []
 442     _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 443     __prepare_filename_warned = False
 444     _first_webpage_request = True
 445     _download_retcode = None
 446     _num_downloads = None
 447     _playlist_level = 0
 448     _playlist_urls = set()
 449     _screen_file = None
 450
 451     def __init__(self, params=None, auto_init=True):
 452         """Create a FileDownloader object with the given options."""
 453         if params is None:
 454             params = {}
 455         self._ies = []
 456         self._ies_instances = {}
 457         self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 458         self.__prepare_filename_warned = False
 459         self._first_webpage_request = True
 460         self._post_hooks = []
 461         self._progress_hooks = []
 462         self._download_retcode = 0
 463         self._num_downloads = 0
 464         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 465         self._err_file = sys.stderr
 466         self.params = {
 467             # Default parameters
 468             'nocheckcertificate': False,
 469         }
 470         self.params.update(params)
 471         self.cache = Cache(self)
 472
 473         if sys.version_info < (3, 6):
 474             self.report_warning(
 475                 'Support for Python version %d.%d have been deprecated and will break in future versions of yt-dlp! '
 476                 'Update to Python 3.6 or above' % sys.version_info[:2])
 477
 478         def check_deprecated(param, option, suggestion):
 479             if self.params.get(param) is not None:
 480                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 481                 return True
 482             return False
 483
 484         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 485             if self.params.get('geo_verification_proxy') is None:
 486                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 487
 488         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 489         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 490         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 491
 492         for msg in self.params.get('warnings', []):
 493             self.report_warning(msg)
 494
 495         if self.params.get('final_ext'):
 496             if self.params.get('merge_output_format'):
 497                 self.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
 498             self.params['merge_output_format'] = self.params['final_ext']
 499
 500         if 'overwrites' in self.params and self.params['overwrites'] is None:
 501             del self.params['overwrites']
 502
 503         if params.get('bidi_workaround', False):
 504             try:
 505                 import pty
 506                 master, slave = pty.openpty()
 507                 width = compat_get_terminal_size().columns
 508                 if width is None:
 509                     width_args = []
 510                 else:
 511                     width_args = ['-w', str(width)]
 512                 sp_kwargs = dict(
 513                     stdin=subprocess.PIPE,
 514                     stdout=slave,
 515                     stderr=self._err_file)
 516                 try:
 517                     self._output_process = subprocess.Popen(
 518                         ['bidiv'] + width_args, **sp_kwargs
 519                     )
 520                 except OSError:
 521                     self._output_process = subprocess.Popen(
 522                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 523                 self._output_channel = os.fdopen(master, 'rb')
 524             except OSError as ose:
 525                 if ose.errno == errno.ENOENT:
 526                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 527                 else:
 528                     raise
 529
 530         if (sys.platform != 'win32'
 531                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 532                 and not params.get('restrictfilenames', False)):
 533             # Unicode filesystem API will throw errors (#1474, #13027)
 534             self.report_warning(
 535                 'Assuming --restrict-filenames since file system encoding '
 536                 'cannot encode all characters. '
 537                 'Set the LC_ALL environment variable to fix this.')
 538             self.params['restrictfilenames'] = True
 539
 540         self.outtmpl_dict = self.parse_outtmpl()
 541
 542         self._setup_opener()
 543
 544         """Preload the archive, if any is specified"""
 545         def preload_download_archive(fn):
 546             if fn is None:
 547                 return False
 548             self.write_debug('Loading archive file %r\n' % fn)
 549             try:
 550                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 551                     for line in archive_file:
 552                         self.archive.add(line.strip())
 553             except IOError as ioe:
 554                 if ioe.errno != errno.ENOENT:
 555                     raise
 556                 return False
 557             return True
 558
 559         self.archive = set()
 560         preload_download_archive(self.params.get('download_archive'))
 561
 562         if auto_init:
 563             self.print_debug_header()
 564             self.add_default_info_extractors()
 565
 566         for pp_def_raw in self.params.get('postprocessors', []):
 567             pp_class = get_postprocessor(pp_def_raw['key'])
 568             pp_def = dict(pp_def_raw)
 569             del pp_def['key']
 570             if 'when' in pp_def:
 571                 when = pp_def['when']
 572                 del pp_def['when']
 573             else:
 574                 when = 'post_process'
 575             pp = pp_class(self, **compat_kwargs(pp_def))
 576             self.add_post_processor(pp, when=when)
 577
 578         for ph in self.params.get('post_hooks', []):
 579             self.add_post_hook(ph)
 580
 581         for ph in self.params.get('progress_hooks', []):
 582             self.add_progress_hook(ph)
 583
 584         register_socks_protocols()
 585
 586     def warn_if_short_id(self, argv):
 587         # short YouTube ID starting with dash?
 588         idxs = [
 589             i for i, a in enumerate(argv)
 590             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 591         if idxs:
 592             correct_argv = (
 593                 ['yt-dlp']
 594                 + [a for i, a in enumerate(argv) if i not in idxs]
 595                 + ['--'] + [argv[i] for i in idxs]
 596             )
 597             self.report_warning(
 598                 'Long argument string detected. '
 599                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 600                 args_to_str(correct_argv))
 601
 602     def add_info_extractor(self, ie):
 603         """Add an InfoExtractor object to the end of the list."""
 604         self._ies.append(ie)
 605         if not isinstance(ie, type):
 606             self._ies_instances[ie.ie_key()] = ie
 607             ie.set_downloader(self)
 608
 609     def get_info_extractor(self, ie_key):
 610         """
 611         Get an instance of an IE with name ie_key, it will try to get one from
 612         the _ies list, if there's no instance it will create a new one and add
 613         it to the extractor list.
 614         """
 615         ie = self._ies_instances.get(ie_key)
 616         if ie is None:
 617             ie = get_info_extractor(ie_key)()
 618             self.add_info_extractor(ie)
 619         return ie
 620
 621     def add_default_info_extractors(self):
 622         """
 623         Add the InfoExtractors returned by gen_extractors to the end of the list
 624         """
 625         for ie in gen_extractor_classes():
 626             self.add_info_extractor(ie)
 627
 628     def add_post_processor(self, pp, when='post_process'):
 629         """Add a PostProcessor object to the end of the chain."""
 630         self._pps[when].append(pp)
 631         pp.set_downloader(self)
 632
 633     def add_post_hook(self, ph):
 634         """Add the post hook"""
 635         self._post_hooks.append(ph)
 636
 637     def add_progress_hook(self, ph):
 638         """Add the progress hook (currently only for the file downloader)"""
 639         self._progress_hooks.append(ph)
 640
 641     def _bidi_workaround(self, message):
 642         if not hasattr(self, '_output_channel'):
 643             return message
 644
 645         assert hasattr(self, '_output_process')
 646         assert isinstance(message, compat_str)
 647         line_count = message.count('\n') + 1
 648         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 649         self._output_process.stdin.flush()
 650         res = ''.join(self._output_channel.readline().decode('utf-8')
 651                       for _ in range(line_count))
 652         return res[:-len('\n')]
 653
 654     def _write_string(self, s, out=None):
 655         write_string(s, out=out, encoding=self.params.get('encoding'))
 656
 657     def to_stdout(self, message, skip_eol=False, quiet=False):
 658         """Print message to stdout"""
 659         if self.params.get('logger'):
 660             self.params['logger'].debug(message)
 661         elif not quiet or self.params.get('verbose'):
 662             self._write_string(
 663                 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 664                 self._err_file if quiet else self._screen_file)
 665
 666     def to_stderr(self, message):
 667         """Print message to stderr"""
 668         assert isinstance(message, compat_str)
 669         if self.params.get('logger'):
 670             self.params['logger'].error(message)
 671         else:
 672             self._write_string('%s\n' % self._bidi_workaround(message), self._err_file)
 673
 674     def to_console_title(self, message):
 675         if not self.params.get('consoletitle', False):
 676             return
 677         if compat_os_name == 'nt':
 678             if ctypes.windll.kernel32.GetConsoleWindow():
 679                 # c_wchar_p() might not be necessary if `message` is
 680                 # already of type unicode()
 681                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 682         elif 'TERM' in os.environ:
 683             self._write_string('\033]0;%s\007' % message, self._screen_file)
 684
 685     def save_console_title(self):
 686         if not self.params.get('consoletitle', False):
 687             return
 688         if self.params.get('simulate', False):
 689             return
 690         if compat_os_name != 'nt' and 'TERM' in os.environ:
 691             # Save the title on stack
 692             self._write_string('\033[22;0t', self._screen_file)
 693
 694     def restore_console_title(self):
 695         if not self.params.get('consoletitle', False):
 696             return
 697         if self.params.get('simulate', False):
 698             return
 699         if compat_os_name != 'nt' and 'TERM' in os.environ:
 700             # Restore the title from stack
 701             self._write_string('\033[23;0t', self._screen_file)
 702
 703     def __enter__(self):
 704         self.save_console_title()
 705         return self
 706
 707     def __exit__(self, *args):
 708         self.restore_console_title()
 709
 710         if self.params.get('cookiefile') is not None:
 711             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 712
 713     def trouble(self, message=None, tb=None):
 714         """Determine action to take when a download problem appears.
 715
 716         Depending on if the downloader has been configured to ignore
 717         download errors or not, this method may throw an exception or
 718         not when errors are found, after printing the message.
 719
 720         tb, if given, is additional traceback information.
 721         """
 722         if message is not None:
 723             self.to_stderr(message)
 724         if self.params.get('verbose'):
 725             if tb is None:
 726                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 727                     tb = ''
 728                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 729                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 730                     tb += encode_compat_str(traceback.format_exc())
 731                 else:
 732                     tb_data = traceback.format_list(traceback.extract_stack())
 733                     tb = ''.join(tb_data)
 734             if tb:
 735                 self.to_stderr(tb)
 736         if not self.params.get('ignoreerrors', False):
 737             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 738                 exc_info = sys.exc_info()[1].exc_info
 739             else:
 740                 exc_info = sys.exc_info()
 741             raise DownloadError(message, exc_info)
 742         self._download_retcode = 1
 743
 744     def to_screen(self, message, skip_eol=False):
 745         """Print message to stdout if not in quiet mode"""
 746         self.to_stdout(
 747             message, skip_eol, quiet=self.params.get('quiet', False))
 748
 749     def report_warning(self, message):
 750         '''
 751         Print the message to stderr, it will be prefixed with 'WARNING:'
 752         If stderr is a tty file the 'WARNING:' will be colored
 753         '''
 754         if self.params.get('logger') is not None:
 755             self.params['logger'].warning(message)
 756         else:
 757             if self.params.get('no_warnings'):
 758                 return
 759             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 760                 _msg_header = '\033[0;33mWARNING:\033[0m'
 761             else:
 762                 _msg_header = 'WARNING:'
 763             warning_message = '%s %s' % (_msg_header, message)
 764             self.to_stderr(warning_message)
 765
 766     def report_error(self, message, tb=None):
 767         '''
 768         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 769         in red if stderr is a tty file.
 770         '''
 771         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 772             _msg_header = '\033[0;31mERROR:\033[0m'
 773         else:
 774             _msg_header = 'ERROR:'
 775         error_message = '%s %s' % (_msg_header, message)
 776         self.trouble(error_message, tb)
 777
 778     def write_debug(self, message):
 779         '''Log debug message or Print message to stderr'''
 780         if not self.params.get('verbose', False):
 781             return
 782         message = '[debug] %s' % message
 783         if self.params.get('logger'):
 784             self.params['logger'].debug(message)
 785         else:
 786             self._write_string('%s\n' % message)
 787
 788     def report_file_already_downloaded(self, file_name):
 789         """Report file has already been fully downloaded."""
 790         try:
 791             self.to_screen('[download] %s has already been downloaded' % file_name)
 792         except UnicodeEncodeError:
 793             self.to_screen('[download] The file has already been downloaded')
 794
 795     def report_file_delete(self, file_name):
 796         """Report that existing file will be deleted."""
 797         try:
 798             self.to_screen('Deleting existing file %s' % file_name)
 799         except UnicodeEncodeError:
 800             self.to_screen('Deleting existing file')
 801
 802     def parse_outtmpl(self):
 803         outtmpl_dict = self.params.get('outtmpl', {})
 804         if not isinstance(outtmpl_dict, dict):
 805             outtmpl_dict = {'default': outtmpl_dict}
 806         outtmpl_dict.update({
 807             k: v for k, v in DEFAULT_OUTTMPL.items()
 808             if not outtmpl_dict.get(k)})
 809         for key, val in outtmpl_dict.items():
 810             if isinstance(val, bytes):
 811                 self.report_warning(
 812                     'Parameter outtmpl is bytes, but should be a unicode string. '
 813                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 814         return outtmpl_dict
 815
 816     @staticmethod
 817     def validate_outtmpl(tmpl):
 818         ''' @return None or Exception object '''
 819         try:
 820             re.sub(
 821                 STR_FORMAT_RE.format(''),
 822                 lambda mobj: ('%' if not mobj.group('has_key') else '') + mobj.group(0),
 823                 tmpl
 824             ) % collections.defaultdict(int)
 825             return None
 826         except ValueError as err:
 827             return err
 828
 829     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
 830         """ Make the template and info_dict suitable for substitution (outtmpl % info_dict)"""
 831         info_dict = dict(info_dict)
 832         na = self.params.get('outtmpl_na_placeholder', 'NA')
 833
 834         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
 835             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
 836             if info_dict.get('duration', None) is not None
 837             else None)
 838         info_dict['epoch'] = int(time.time())
 839         info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
 840         if info_dict.get('resolution') is None:
 841             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
 842
 843         # For fields playlist_index and autonumber convert all occurrences
 844         # of %(field)s to %(field)0Nd for backward compatibility
 845         field_size_compat_map = {
 846             'playlist_index': len(str(info_dict.get('_last_playlist_index') or '')),
 847             'autonumber': self.params.get('autonumber_size') or 5,
 848         }
 849
 850         EXTERNAL_FORMAT_RE = STR_FORMAT_RE.format('[^)]*')
 851         # Field is of the form key1.key2...
 852         # where keys (except first) can be string, int or slice
 853         FIELD_RE = r'\w+(?:\.(?:\w+|[-\d]*(?::[-\d]*){0,2}))*'
 854         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
 855             (?P<negate>-)?
 856             (?P<fields>{0})
 857             (?P<maths>(?:[-+]-?(?:\d+(?:\.\d+)?|{0}))*)
 858             (?:>(?P<strf_format>.+?))?
 859             (?:\|(?P<default>.*?))?
 860             $'''.format(FIELD_RE))
 861         MATH_OPERATORS_RE = re.compile(r'(?<![-+])([-+])')
 862         MATH_FUNCTIONS = {
 863             '+': float.__add__,
 864             '-': float.__sub__,
 865         }
 866         tmpl_dict = {}
 867
 868         get_key = lambda k: traverse_obj(
 869             info_dict, k.split('.'), is_user_input=True, traverse_string=True)
 870
 871         def get_value(mdict):
 872             # Object traversal
 873             value = get_key(mdict['fields'])
 874             # Negative
 875             if mdict['negate']:
 876                 value = float_or_none(value)
 877                 if value is not None:
 878                     value *= -1
 879             # Do maths
 880             if mdict['maths']:
 881                 value = float_or_none(value)
 882                 operator = None
 883                 for item in MATH_OPERATORS_RE.split(mdict['maths'])[1:]:
 884                     if item == '' or value is None:
 885                         return None
 886                     if operator:
 887                         item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
 888                         offset = float_or_none(item)
 889                         if offset is None:
 890                             offset = float_or_none(get_key(item))
 891                         try:
 892                             value = operator(value, multiplier * offset)
 893                         except (TypeError, ZeroDivisionError):
 894                             return None
 895                         operator = None
 896                     else:
 897                         operator = MATH_FUNCTIONS[item]
 898             # Datetime formatting
 899             if mdict['strf_format']:
 900                 value = strftime_or_none(value, mdict['strf_format'])
 901
 902             return value
 903
 904         def create_key(outer_mobj):
 905             if not outer_mobj.group('has_key'):
 906                 return '%{}'.format(outer_mobj.group(0))
 907
 908             key = outer_mobj.group('key')
 909             fmt = outer_mobj.group('format')
 910             mobj = re.match(INTERNAL_FORMAT_RE, key)
 911             if mobj is None:
 912                 value, default = None, na
 913             else:
 914                 mobj = mobj.groupdict()
 915                 default = mobj['default'] if mobj['default'] is not None else na
 916                 value = get_value(mobj)
 917
 918             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
 919                 fmt = '0{:d}d'.format(field_size_compat_map[key])
 920
 921             value = default if value is None else value
 922             key += '\0%s' % fmt
 923
 924             if fmt == 'c':
 925                 value = compat_str(value)
 926                 if value is None:
 927                     value, fmt = default, 's'
 928                 else:
 929                     value = value[0]
 930             elif fmt[-1] not in 'rs':  # numeric
 931                 value = float_or_none(value)
 932                 if value is None:
 933                     value, fmt = default, 's'
 934             if sanitize:
 935                 if fmt[-1] == 'r':
 936                     # If value is an object, sanitize might convert it to a string
 937                     # So we convert it to repr first
 938                     value, fmt = repr(value), '%ss' % fmt[:-1]
 939                 value = sanitize(key, value)
 940             tmpl_dict[key] = value
 941             return '%({key}){fmt}'.format(key=key, fmt=fmt)
 942
 943         return re.sub(EXTERNAL_FORMAT_RE, create_key, outtmpl), tmpl_dict
 944
 945     def _prepare_filename(self, info_dict, tmpl_type='default'):
 946         try:
 947             sanitize = lambda k, v: sanitize_filename(
 948                 compat_str(v),
 949                 restricted=self.params.get('restrictfilenames'),
 950                 is_id=(k == 'id' or k.endswith('_id')))
 951             outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
 952             outtmpl, template_dict = self.prepare_outtmpl(outtmpl, info_dict, sanitize)
 953
 954             # expand_path translates '%%' into '%' and '$$' into '$'
 955             # correspondingly that is not what we want since we need to keep
 956             # '%%' intact for template dict substitution step. Working around
 957             # with boundary-alike separator hack.
 958             sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 959             outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 960
 961             # outtmpl should be expand_path'ed before template dict substitution
 962             # because meta fields may contain env variables we don't want to
 963             # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 964             # title "Hello $PATH", we don't want `$PATH` to be expanded.
 965             filename = expand_path(outtmpl).replace(sep, '') % template_dict
 966
 967             force_ext = OUTTMPL_TYPES.get(tmpl_type)
 968             if force_ext is not None:
 969                 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
 970
 971             # https://github.com/blackjack4494/youtube-dlc/issues/85
 972             trim_file_name = self.params.get('trim_file_name', False)
 973             if trim_file_name:
 974                 fn_groups = filename.rsplit('.')
 975                 ext = fn_groups[-1]
 976                 sub_ext = ''
 977                 if len(fn_groups) > 2:
 978                     sub_ext = fn_groups[-2]
 979                 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
 980
 981             return filename
 982         except ValueError as err:
 983             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 984             return None
 985
 986     def prepare_filename(self, info_dict, dir_type='', warn=False):
 987         """Generate the output filename."""
 988         paths = self.params.get('paths', {})
 989         assert isinstance(paths, dict)
 990         filename = self._prepare_filename(info_dict, dir_type or 'default')
 991
 992         if warn and not self.__prepare_filename_warned:
 993             if not paths:
 994                 pass
 995             elif filename == '-':
 996                 self.report_warning('--paths is ignored when an outputting to stdout')
 997             elif os.path.isabs(filename):
 998                 self.report_warning('--paths is ignored since an absolute path is given in output template')
 999             self.__prepare_filename_warned = True
1000         if filename == '-' or not filename:
1001             return filename
1002
1003         homepath = expand_path(paths.get('home', '').strip())
1004         assert isinstance(homepath, compat_str)
1005         subdir = expand_path(paths.get(dir_type, '').strip()) if dir_type else ''
1006         assert isinstance(subdir, compat_str)
1007         path = os.path.join(homepath, subdir, filename)
1008
1009         # Temporary fix for #4787
1010         # 'Treat' all problem characters by passing filename through preferredencoding
1011         # to workaround encoding issues with subprocess on python2 @ Windows
1012         if sys.version_info < (3, 0) and sys.platform == 'win32':
1013             path = encodeFilename(path, True).decode(preferredencoding())
1014         return sanitize_path(path, force=self.params.get('windowsfilenames'))
1015
1016     def _match_entry(self, info_dict, incomplete=False, silent=False):
1017         """ Returns None if the file should be downloaded """
1018
1019         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1020
1021         def check_filter():
1022             if 'title' in info_dict:
1023                 # This can happen when we're just evaluating the playlist
1024                 title = info_dict['title']
1025                 matchtitle = self.params.get('matchtitle', False)
1026                 if matchtitle:
1027                     if not re.search(matchtitle, title, re.IGNORECASE):
1028                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1029                 rejecttitle = self.params.get('rejecttitle', False)
1030                 if rejecttitle:
1031                     if re.search(rejecttitle, title, re.IGNORECASE):
1032                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1033             date = info_dict.get('upload_date')
1034             if date is not None:
1035                 dateRange = self.params.get('daterange', DateRange())
1036                 if date not in dateRange:
1037                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1038             view_count = info_dict.get('view_count')
1039             if view_count is not None:
1040                 min_views = self.params.get('min_views')
1041                 if min_views is not None and view_count < min_views:
1042                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1043                 max_views = self.params.get('max_views')
1044                 if max_views is not None and view_count > max_views:
1045                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1046             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1047                 return 'Skipping "%s" because it is age restricted' % video_title
1048
1049             if not incomplete:
1050                 match_filter = self.params.get('match_filter')
1051                 if match_filter is not None:
1052                     ret = match_filter(info_dict)
1053                     if ret is not None:
1054                         return ret
1055             return None
1056
1057         if self.in_download_archive(info_dict):
1058             reason = '%s has already been recorded in the archive' % video_title
1059             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1060         else:
1061             reason = check_filter()
1062             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1063         if reason is not None:
1064             if not silent:
1065                 self.to_screen('[download] ' + reason)
1066             if self.params.get(break_opt, False):
1067                 raise break_err()
1068         return reason
1069
1070     @staticmethod
1071     def add_extra_info(info_dict, extra_info):
1072         '''Set the keys from extra_info in info dict if they are missing'''
1073         for key, value in extra_info.items():
1074             info_dict.setdefault(key, value)
1075
1076     def extract_info(self, url, download=True, ie_key=None, extra_info={},
1077                      process=True, force_generic_extractor=False):
1078         """
1079         Return a list with a dictionary for each video extracted.
1080
1081         Arguments:
1082         url -- URL to extract
1083
1084         Keyword arguments:
1085         download -- whether to download videos during extraction
1086         ie_key -- extractor key hint
1087         extra_info -- dictionary containing the extra values to add to each result
1088         process -- whether to resolve all unresolved references (URLs, playlist items),
1089             must be True for download to work.
1090         force_generic_extractor -- force using the generic extractor
1091         """
1092
1093         if not ie_key and force_generic_extractor:
1094             ie_key = 'Generic'
1095
1096         if ie_key:
1097             ies = [self.get_info_extractor(ie_key)]
1098         else:
1099             ies = self._ies
1100
1101         for ie in ies:
1102             if not ie.suitable(url):
1103                 continue
1104
1105             ie_key = ie.ie_key()
1106             ie = self.get_info_extractor(ie_key)
1107             if not ie.working():
1108                 self.report_warning('The program functionality for this site has been marked as broken, '
1109                                     'and will probably not work.')
1110
1111             try:
1112                 temp_id = str_or_none(
1113                     ie.extract_id(url) if callable(getattr(ie, 'extract_id', None))
1114                     else ie._match_id(url))
1115             except (AssertionError, IndexError, AttributeError):
1116                 temp_id = None
1117             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1118                 self.to_screen("[%s] %s: has already been recorded in archive" % (
1119                                ie_key, temp_id))
1120                 break
1121             return self.__extract_info(url, ie, download, extra_info, process)
1122         else:
1123             self.report_error('no suitable InfoExtractor for URL %s' % url)
1124
1125     def __handle_extraction_exceptions(func):
1126         def wrapper(self, *args, **kwargs):
1127             try:
1128                 return func(self, *args, **kwargs)
1129             except GeoRestrictedError as e:
1130                 msg = e.msg
1131                 if e.countries:
1132                     msg += '\nThis video is available in %s.' % ', '.join(
1133                         map(ISO3166Utils.short2full, e.countries))
1134                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1135                 self.report_error(msg)
1136             except ExtractorError as e:  # An error we somewhat expected
1137                 self.report_error(compat_str(e), e.format_traceback())
1138             except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
1139                 raise
1140             except Exception as e:
1141                 if self.params.get('ignoreerrors', False):
1142                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
1143                 else:
1144                     raise
1145         return wrapper
1146
1147     @__handle_extraction_exceptions
1148     def __extract_info(self, url, ie, download, extra_info, process):
1149         ie_result = ie.extract(url)
1150         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1151             return
1152         if isinstance(ie_result, list):
1153             # Backwards compatibility: old IE result format
1154             ie_result = {
1155                 '_type': 'compat_list',
1156                 'entries': ie_result,
1157             }
1158         self.add_default_extra_info(ie_result, ie, url)
1159         if process:
1160             return self.process_ie_result(ie_result, download, extra_info)
1161         else:
1162             return ie_result
1163
1164     def add_default_extra_info(self, ie_result, ie, url):
1165         self.add_extra_info(ie_result, {
1166             'extractor': ie.IE_NAME,
1167             'webpage_url': url,
1168             'original_url': url,
1169             'webpage_url_basename': url_basename(url),
1170             'extractor_key': ie.ie_key(),
1171         })
1172
1173     def process_ie_result(self, ie_result, download=True, extra_info={}):
1174         """
1175         Take the result of the ie(may be modified) and resolve all unresolved
1176         references (URLs, playlist items).
1177
1178         It will also download the videos if 'download'.
1179         Returns the resolved ie_result.
1180         """
1181         result_type = ie_result.get('_type', 'video')
1182
1183         if result_type in ('url', 'url_transparent'):
1184             ie_result['url'] = sanitize_url(ie_result['url'])
1185             extract_flat = self.params.get('extract_flat', False)
1186             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1187                     or extract_flat is True):
1188                 info_copy = ie_result.copy()
1189                 self.add_extra_info(info_copy, extra_info)
1190                 self.add_default_extra_info(
1191                     info_copy, self.get_info_extractor(ie_result.get('ie_key')), ie_result['url'])
1192                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1193                 return ie_result
1194
1195         if result_type == 'video':
1196             self.add_extra_info(ie_result, extra_info)
1197             ie_result = self.process_video_result(ie_result, download=download)
1198             additional_urls = (ie_result or {}).get('additional_urls')
1199             if additional_urls:
1200                 # TODO: Improve MetadataFromFieldPP to allow setting a list
1201                 if isinstance(additional_urls, compat_str):
1202                     additional_urls = [additional_urls]
1203                 self.to_screen(
1204                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1205                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1206                 ie_result['additional_entries'] = [
1207                     self.extract_info(
1208                         url, download, extra_info,
1209                         force_generic_extractor=self.params.get('force_generic_extractor'))
1210                     for url in additional_urls
1211                 ]
1212             return ie_result
1213         elif result_type == 'url':
1214             # We have to add extra_info to the results because it may be
1215             # contained in a playlist
1216             return self.extract_info(
1217                 ie_result['url'], download,
1218                 ie_key=ie_result.get('ie_key'),
1219                 extra_info=extra_info)
1220         elif result_type == 'url_transparent':
1221             # Use the information from the embedding page
1222             info = self.extract_info(
1223                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1224                 extra_info=extra_info, download=False, process=False)
1225
1226             # extract_info may return None when ignoreerrors is enabled and
1227             # extraction failed with an error, don't crash and return early
1228             # in this case
1229             if not info:
1230                 return info
1231
1232             force_properties = dict(
1233                 (k, v) for k, v in ie_result.items() if v is not None)
1234             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1235                 if f in force_properties:
1236                     del force_properties[f]
1237             new_result = info.copy()
1238             new_result.update(force_properties)
1239
1240             # Extracted info may not be a video result (i.e.
1241             # info.get('_type', 'video') != video) but rather an url or
1242             # url_transparent. In such cases outer metadata (from ie_result)
1243             # should be propagated to inner one (info). For this to happen
1244             # _type of info should be overridden with url_transparent. This
1245             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1246             if new_result.get('_type') == 'url':
1247                 new_result['_type'] = 'url_transparent'
1248
1249             return self.process_ie_result(
1250                 new_result, download=download, extra_info=extra_info)
1251         elif result_type in ('playlist', 'multi_video'):
1252             # Protect from infinite recursion due to recursively nested playlists
1253             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1254             webpage_url = ie_result['webpage_url']
1255             if webpage_url in self._playlist_urls:
1256                 self.to_screen(
1257                     '[download] Skipping already downloaded playlist: %s'
1258                     % ie_result.get('title') or ie_result.get('id'))
1259                 return
1260
1261             self._playlist_level += 1
1262             self._playlist_urls.add(webpage_url)
1263             self._sanitize_thumbnails(ie_result)
1264             try:
1265                 return self.__process_playlist(ie_result, download)
1266             finally:
1267                 self._playlist_level -= 1
1268                 if not self._playlist_level:
1269                     self._playlist_urls.clear()
1270         elif result_type == 'compat_list':
1271             self.report_warning(
1272                 'Extractor %s returned a compat_list result. '
1273                 'It needs to be updated.' % ie_result.get('extractor'))
1274
1275             def _fixup(r):
1276                 self.add_extra_info(
1277                     r,
1278                     {
1279                         'extractor': ie_result['extractor'],
1280                         'webpage_url': ie_result['webpage_url'],
1281                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
1282                         'extractor_key': ie_result['extractor_key'],
1283                     }
1284                 )
1285                 return r
1286             ie_result['entries'] = [
1287                 self.process_ie_result(_fixup(r), download, extra_info)
1288                 for r in ie_result['entries']
1289             ]
1290             return ie_result
1291         else:
1292             raise Exception('Invalid result type: %s' % result_type)
1293
1294     def _ensure_dir_exists(self, path):
1295         return make_dir(path, self.report_error)
1296
1297     def __process_playlist(self, ie_result, download):
1298         # We process each entry in the playlist
1299         playlist = ie_result.get('title') or ie_result.get('id')
1300         self.to_screen('[download] Downloading playlist: %s' % playlist)
1301
1302         if 'entries' not in ie_result:
1303             raise EntryNotInPlaylist()
1304         incomplete_entries = bool(ie_result.get('requested_entries'))
1305         if incomplete_entries:
1306             def fill_missing_entries(entries, indexes):
1307                 ret = [None] * max(*indexes)
1308                 for i, entry in zip(indexes, entries):
1309                     ret[i - 1] = entry
1310                 return ret
1311             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1312
1313         playlist_results = []
1314
1315         playliststart = self.params.get('playliststart', 1)
1316         playlistend = self.params.get('playlistend')
1317         # For backwards compatibility, interpret -1 as whole list
1318         if playlistend == -1:
1319             playlistend = None
1320
1321         playlistitems_str = self.params.get('playlist_items')
1322         playlistitems = None
1323         if playlistitems_str is not None:
1324             def iter_playlistitems(format):
1325                 for string_segment in format.split(','):
1326                     if '-' in string_segment:
1327                         start, end = string_segment.split('-')
1328                         for item in range(int(start), int(end) + 1):
1329                             yield int(item)
1330                     else:
1331                         yield int(string_segment)
1332             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1333
1334         ie_entries = ie_result['entries']
1335         msg = (
1336             'Downloading %d videos' if not isinstance(ie_entries, list)
1337             else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
1338         if not isinstance(ie_entries, (list, PagedList)):
1339             ie_entries = LazyList(ie_entries)
1340
1341         entries = []
1342         for i in playlistitems or itertools.count(playliststart):
1343             if playlistitems is None and playlistend is not None and playlistend < i:
1344                 break
1345             entry = None
1346             try:
1347                 entry = ie_entries[i - 1]
1348                 if entry is None:
1349                     raise EntryNotInPlaylist()
1350             except (IndexError, EntryNotInPlaylist):
1351                 if incomplete_entries:
1352                     raise EntryNotInPlaylist()
1353                 elif not playlistitems:
1354                     break
1355             entries.append(entry)
1356             try:
1357                 if entry is not None:
1358                     self._match_entry(entry, incomplete=True, silent=True)
1359             except (ExistingVideoReached, RejectedVideoReached):
1360                 break
1361         ie_result['entries'] = entries
1362
1363         # Save playlist_index before re-ordering
1364         entries = [
1365             ((playlistitems[i - 1] if playlistitems else i), entry)
1366             for i, entry in enumerate(entries, 1)
1367             if entry is not None]
1368         n_entries = len(entries)
1369
1370         if not playlistitems and (playliststart or playlistend):
1371             playlistitems = list(range(playliststart, playliststart + n_entries))
1372         ie_result['requested_entries'] = playlistitems
1373
1374         if self.params.get('allow_playlist_files', True):
1375             ie_copy = {
1376                 'playlist': playlist,
1377                 'playlist_id': ie_result.get('id'),
1378                 'playlist_title': ie_result.get('title'),
1379                 'playlist_uploader': ie_result.get('uploader'),
1380                 'playlist_uploader_id': ie_result.get('uploader_id'),
1381                 'playlist_index': 0,
1382             }
1383             ie_copy.update(dict(ie_result))
1384
1385             if self.params.get('writeinfojson', False):
1386                 infofn = self.prepare_filename(ie_copy, 'pl_infojson')
1387                 if not self._ensure_dir_exists(encodeFilename(infofn)):
1388                     return
1389                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
1390                     self.to_screen('[info] Playlist metadata is already present')
1391                 else:
1392                     self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
1393                     try:
1394                         write_json_file(self.filter_requested_info(ie_result, self.params.get('clean_infojson', True)), infofn)
1395                     except (OSError, IOError):
1396                         self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
1397
1398             # TODO: This should be passed to ThumbnailsConvertor if necessary
1399             self._write_thumbnails(ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1400
1401             if self.params.get('writedescription', False):
1402                 descfn = self.prepare_filename(ie_copy, 'pl_description')
1403                 if not self._ensure_dir_exists(encodeFilename(descfn)):
1404                     return
1405                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
1406                     self.to_screen('[info] Playlist description is already present')
1407                 elif ie_result.get('description') is None:
1408                     self.report_warning('There\'s no playlist description to write.')
1409                 else:
1410                     try:
1411                         self.to_screen('[info] Writing playlist description to: ' + descfn)
1412                         with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1413                             descfile.write(ie_result['description'])
1414                     except (OSError, IOError):
1415                         self.report_error('Cannot write playlist description file ' + descfn)
1416                         return
1417
1418         if self.params.get('playlistreverse', False):
1419             entries = entries[::-1]
1420         if self.params.get('playlistrandom', False):
1421             random.shuffle(entries)
1422
1423         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1424
1425         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1426         failures = 0
1427         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1428         for i, entry_tuple in enumerate(entries, 1):
1429             playlist_index, entry = entry_tuple
1430             if 'playlist_index' in self.params.get('compat_options', []):
1431                 playlist_index = playlistitems[i - 1] if playlistitems else i
1432             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1433             # This __x_forwarded_for_ip thing is a bit ugly but requires
1434             # minimal changes
1435             if x_forwarded_for:
1436                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1437             extra = {
1438                 'n_entries': n_entries,
1439                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1440                 'playlist_index': playlist_index,
1441                 'playlist_autonumber': i,
1442                 'playlist': playlist,
1443                 'playlist_id': ie_result.get('id'),
1444                 'playlist_title': ie_result.get('title'),
1445                 'playlist_uploader': ie_result.get('uploader'),
1446                 'playlist_uploader_id': ie_result.get('uploader_id'),
1447                 'extractor': ie_result['extractor'],
1448                 'webpage_url': ie_result['webpage_url'],
1449                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1450                 'extractor_key': ie_result['extractor_key'],
1451             }
1452
1453             if self._match_entry(entry, incomplete=True) is not None:
1454                 continue
1455
1456             entry_result = self.__process_iterable_entry(entry, download, extra)
1457             if not entry_result:
1458                 failures += 1
1459             if failures >= max_failures:
1460                 self.report_error(
1461                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1462                 break
1463             # TODO: skip failed (empty) entries?
1464             playlist_results.append(entry_result)
1465         ie_result['entries'] = playlist_results
1466         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1467         return ie_result
1468
1469     @__handle_extraction_exceptions
1470     def __process_iterable_entry(self, entry, download, extra_info):
1471         return self.process_ie_result(
1472             entry, download=download, extra_info=extra_info)
1473
1474     def _build_format_filter(self, filter_spec):
1475         " Returns a function to filter the formats according to the filter_spec "
1476
1477         OPERATORS = {
1478             '<': operator.lt,
1479             '<=': operator.le,
1480             '>': operator.gt,
1481             '>=': operator.ge,
1482             '=': operator.eq,
1483             '!=': operator.ne,
1484         }
1485         operator_rex = re.compile(r'''(?x)\s*
1486             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
1487             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1488             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
1489             $
1490             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1491         m = operator_rex.search(filter_spec)
1492         if m:
1493             try:
1494                 comparison_value = int(m.group('value'))
1495             except ValueError:
1496                 comparison_value = parse_filesize(m.group('value'))
1497                 if comparison_value is None:
1498                     comparison_value = parse_filesize(m.group('value') + 'B')
1499                 if comparison_value is None:
1500                     raise ValueError(
1501                         'Invalid value %r in format specification %r' % (
1502                             m.group('value'), filter_spec))
1503             op = OPERATORS[m.group('op')]
1504
1505         if not m:
1506             STR_OPERATORS = {
1507                 '=': operator.eq,
1508                 '^=': lambda attr, value: attr.startswith(value),
1509                 '$=': lambda attr, value: attr.endswith(value),
1510                 '*=': lambda attr, value: value in attr,
1511             }
1512             str_operator_rex = re.compile(r'''(?x)
1513                 \s*(?P<key>[a-zA-Z0-9._-]+)
1514                 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
1515                 \s*(?P<value>[a-zA-Z0-9._-]+)
1516                 \s*$
1517                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1518             m = str_operator_rex.search(filter_spec)
1519             if m:
1520                 comparison_value = m.group('value')
1521                 str_op = STR_OPERATORS[m.group('op')]
1522                 if m.group('negation'):
1523                     op = lambda attr, value: not str_op(attr, value)
1524                 else:
1525                     op = str_op
1526
1527         if not m:
1528             raise ValueError('Invalid filter specification %r' % filter_spec)
1529
1530         def _filter(f):
1531             actual_value = f.get(m.group('key'))
1532             if actual_value is None:
1533                 return m.group('none_inclusive')
1534             return op(actual_value, comparison_value)
1535         return _filter
1536
1537     def _default_format_spec(self, info_dict, download=True):
1538
1539         def can_merge():
1540             merger = FFmpegMergerPP(self)
1541             return merger.available and merger.can_merge()
1542
1543         prefer_best = (
1544             not self.params.get('simulate', False)
1545             and download
1546             and (
1547                 not can_merge()
1548                 or info_dict.get('is_live', False)
1549                 or self.outtmpl_dict['default'] == '-'))
1550         compat = (
1551             prefer_best
1552             or self.params.get('allow_multiple_audio_streams', False)
1553             or 'format-spec' in self.params.get('compat_opts', []))
1554
1555         return (
1556             'best/bestvideo+bestaudio' if prefer_best
1557             else 'bestvideo*+bestaudio/best' if not compat
1558             else 'bestvideo+bestaudio/best')
1559
1560     def build_format_selector(self, format_spec):
1561         def syntax_error(note, start):
1562             message = (
1563                 'Invalid format specification: '
1564                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1565             return SyntaxError(message)
1566
1567         PICKFIRST = 'PICKFIRST'
1568         MERGE = 'MERGE'
1569         SINGLE = 'SINGLE'
1570         GROUP = 'GROUP'
1571         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1572
1573         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1574                                   'video': self.params.get('allow_multiple_video_streams', False)}
1575
1576         check_formats = self.params.get('check_formats')
1577
1578         def _parse_filter(tokens):
1579             filter_parts = []
1580             for type, string, start, _, _ in tokens:
1581                 if type == tokenize.OP and string == ']':
1582                     return ''.join(filter_parts)
1583                 else:
1584                     filter_parts.append(string)
1585
1586         def _remove_unused_ops(tokens):
1587             # Remove operators that we don't use and join them with the surrounding strings
1588             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1589             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1590             last_string, last_start, last_end, last_line = None, None, None, None
1591             for type, string, start, end, line in tokens:
1592                 if type == tokenize.OP and string == '[':
1593                     if last_string:
1594                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1595                         last_string = None
1596                     yield type, string, start, end, line
1597                     # everything inside brackets will be handled by _parse_filter
1598                     for type, string, start, end, line in tokens:
1599                         yield type, string, start, end, line
1600                         if type == tokenize.OP and string == ']':
1601                             break
1602                 elif type == tokenize.OP and string in ALLOWED_OPS:
1603                     if last_string:
1604                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1605                         last_string = None
1606                     yield type, string, start, end, line
1607                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1608                     if not last_string:
1609                         last_string = string
1610                         last_start = start
1611                         last_end = end
1612                     else:
1613                         last_string += string
1614             if last_string:
1615                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1616
1617         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1618             selectors = []
1619             current_selector = None
1620             for type, string, start, _, _ in tokens:
1621                 # ENCODING is only defined in python 3.x
1622                 if type == getattr(tokenize, 'ENCODING', None):
1623                     continue
1624                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1625                     current_selector = FormatSelector(SINGLE, string, [])
1626                 elif type == tokenize.OP:
1627                     if string == ')':
1628                         if not inside_group:
1629                             # ')' will be handled by the parentheses group
1630                             tokens.restore_last_token()
1631                         break
1632                     elif inside_merge and string in ['/', ',']:
1633                         tokens.restore_last_token()
1634                         break
1635                     elif inside_choice and string == ',':
1636                         tokens.restore_last_token()
1637                         break
1638                     elif string == ',':
1639                         if not current_selector:
1640                             raise syntax_error('"," must follow a format selector', start)
1641                         selectors.append(current_selector)
1642                         current_selector = None
1643                     elif string == '/':
1644                         if not current_selector:
1645                             raise syntax_error('"/" must follow a format selector', start)
1646                         first_choice = current_selector
1647                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1648                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1649                     elif string == '[':
1650                         if not current_selector:
1651                             current_selector = FormatSelector(SINGLE, 'best', [])
1652                         format_filter = _parse_filter(tokens)
1653                         current_selector.filters.append(format_filter)
1654                     elif string == '(':
1655                         if current_selector:
1656                             raise syntax_error('Unexpected "("', start)
1657                         group = _parse_format_selection(tokens, inside_group=True)
1658                         current_selector = FormatSelector(GROUP, group, [])
1659                     elif string == '+':
1660                         if not current_selector:
1661                             raise syntax_error('Unexpected "+"', start)
1662                         selector_1 = current_selector
1663                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1664                         if not selector_2:
1665                             raise syntax_error('Expected a selector', start)
1666                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1667                     else:
1668                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1669                 elif type == tokenize.ENDMARKER:
1670                     break
1671             if current_selector:
1672                 selectors.append(current_selector)
1673             return selectors
1674
1675         def _merge(formats_pair):
1676             format_1, format_2 = formats_pair
1677
1678             formats_info = []
1679             formats_info.extend(format_1.get('requested_formats', (format_1,)))
1680             formats_info.extend(format_2.get('requested_formats', (format_2,)))
1681
1682             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1683                 get_no_more = {"video": False, "audio": False}
1684                 for (i, fmt_info) in enumerate(formats_info):
1685                     for aud_vid in ["audio", "video"]:
1686                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1687                             if get_no_more[aud_vid]:
1688                                 formats_info.pop(i)
1689                             get_no_more[aud_vid] = True
1690
1691             if len(formats_info) == 1:
1692                 return formats_info[0]
1693
1694             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1695             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1696
1697             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1698             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1699
1700             output_ext = self.params.get('merge_output_format')
1701             if not output_ext:
1702                 if the_only_video:
1703                     output_ext = the_only_video['ext']
1704                 elif the_only_audio and not video_fmts:
1705                     output_ext = the_only_audio['ext']
1706                 else:
1707                     output_ext = 'mkv'
1708
1709             new_dict = {
1710                 'requested_formats': formats_info,
1711                 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1712                 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1713                 'ext': output_ext,
1714             }
1715
1716             if the_only_video:
1717                 new_dict.update({
1718                     'width': the_only_video.get('width'),
1719                     'height': the_only_video.get('height'),
1720                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1721                     'fps': the_only_video.get('fps'),
1722                     'vcodec': the_only_video.get('vcodec'),
1723                     'vbr': the_only_video.get('vbr'),
1724                     'stretched_ratio': the_only_video.get('stretched_ratio'),
1725                 })
1726
1727             if the_only_audio:
1728                 new_dict.update({
1729                     'acodec': the_only_audio.get('acodec'),
1730                     'abr': the_only_audio.get('abr'),
1731                 })
1732
1733             return new_dict
1734
1735         def _check_formats(formats):
1736             for f in formats:
1737                 self.to_screen('[info] Testing format %s' % f['format_id'])
1738                 paths = self.params.get('paths', {})
1739                 temp_file = os.path.join(
1740                     expand_path(paths.get('home', '').strip()),
1741                     expand_path(paths.get('temp', '').strip()),
1742                     'ytdl.%s.f%s.check-format' % (random_uuidv4(), f['format_id']))
1743                 try:
1744                     dl, _ = self.dl(temp_file, f, test=True)
1745                 except (ExtractorError, IOError, OSError, ValueError) + network_exceptions:
1746                     dl = False
1747                 finally:
1748                     if os.path.exists(temp_file):
1749                         os.remove(temp_file)
1750                 if dl:
1751                     yield f
1752                 else:
1753                     self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1754
1755         def _build_selector_function(selector):
1756             if isinstance(selector, list):  # ,
1757                 fs = [_build_selector_function(s) for s in selector]
1758
1759                 def selector_function(ctx):
1760                     for f in fs:
1761                         for format in f(ctx):
1762                             yield format
1763                 return selector_function
1764
1765             elif selector.type == GROUP:  # ()
1766                 selector_function = _build_selector_function(selector.selector)
1767
1768             elif selector.type == PICKFIRST:  # /
1769                 fs = [_build_selector_function(s) for s in selector.selector]
1770
1771                 def selector_function(ctx):
1772                     for f in fs:
1773                         picked_formats = list(f(ctx))
1774                         if picked_formats:
1775                             return picked_formats
1776                     return []
1777
1778             elif selector.type == SINGLE:  # atom
1779                 format_spec = selector.selector or 'best'
1780
1781                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
1782                 if format_spec == 'all':
1783                     def selector_function(ctx):
1784                         formats = list(ctx['formats'])
1785                         if check_formats:
1786                             formats = _check_formats(formats)
1787                         for f in formats:
1788                             yield f
1789                 elif format_spec == 'mergeall':
1790                     def selector_function(ctx):
1791                         formats = list(_check_formats(ctx['formats']))
1792                         if not formats:
1793                             return
1794                         merged_format = formats[-1]
1795                         for f in formats[-2::-1]:
1796                             merged_format = _merge((merged_format, f))
1797                         yield merged_format
1798
1799                 else:
1800                     format_fallback, format_reverse, format_idx = False, True, 1
1801                     mobj = re.match(
1802                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
1803                         format_spec)
1804                     if mobj is not None:
1805                         format_idx = int_or_none(mobj.group('n'), default=1)
1806                         format_reverse = mobj.group('bw')[0] == 'b'
1807                         format_type = (mobj.group('type') or [None])[0]
1808                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
1809                         format_modified = mobj.group('mod') is not None
1810
1811                         format_fallback = not format_type and not format_modified  # for b, w
1812                         filter_f = (
1813                             (lambda f: f.get('%scodec' % format_type) != 'none')
1814                             if format_type and format_modified  # bv*, ba*, wv*, wa*
1815                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
1816                             if format_type  # bv, ba, wv, wa
1817                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1818                             if not format_modified  # b, w
1819                             else None)  # b*, w*
1820                     else:
1821                         filter_f = ((lambda f: f.get('ext') == format_spec)
1822                                     if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']  # extension
1823                                     else (lambda f: f.get('format_id') == format_spec))  # id
1824
1825                     def selector_function(ctx):
1826                         formats = list(ctx['formats'])
1827                         if not formats:
1828                             return
1829                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
1830                         if format_fallback and ctx['incomplete_formats'] and not matches:
1831                             # for extractors with incomplete formats (audio only (soundcloud)
1832                             # or video only (imgur)) best/worst will fallback to
1833                             # best/worst {video,audio}-only format
1834                             matches = formats
1835                         if format_reverse:
1836                             matches = matches[::-1]
1837                         if check_formats:
1838                             matches = list(itertools.islice(_check_formats(matches), format_idx))
1839                         n = len(matches)
1840                         if -n <= format_idx - 1 < n:
1841                             yield matches[format_idx - 1]
1842
1843             elif selector.type == MERGE:        # +
1844                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1845
1846                 def selector_function(ctx):
1847                     for pair in itertools.product(
1848                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1849                         yield _merge(pair)
1850
1851             filters = [self._build_format_filter(f) for f in selector.filters]
1852
1853             def final_selector(ctx):
1854                 ctx_copy = copy.deepcopy(ctx)
1855                 for _filter in filters:
1856                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1857                 return selector_function(ctx_copy)
1858             return final_selector
1859
1860         stream = io.BytesIO(format_spec.encode('utf-8'))
1861         try:
1862             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1863         except tokenize.TokenError:
1864             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1865
1866         class TokenIterator(object):
1867             def __init__(self, tokens):
1868                 self.tokens = tokens
1869                 self.counter = 0
1870
1871             def __iter__(self):
1872                 return self
1873
1874             def __next__(self):
1875                 if self.counter >= len(self.tokens):
1876                     raise StopIteration()
1877                 value = self.tokens[self.counter]
1878                 self.counter += 1
1879                 return value
1880
1881             next = __next__
1882
1883             def restore_last_token(self):
1884                 self.counter -= 1
1885
1886         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1887         return _build_selector_function(parsed_selector)
1888
1889     def _calc_headers(self, info_dict):
1890         res = std_headers.copy()
1891
1892         add_headers = info_dict.get('http_headers')
1893         if add_headers:
1894             res.update(add_headers)
1895
1896         cookies = self._calc_cookies(info_dict)
1897         if cookies:
1898             res['Cookie'] = cookies
1899
1900         if 'X-Forwarded-For' not in res:
1901             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1902             if x_forwarded_for_ip:
1903                 res['X-Forwarded-For'] = x_forwarded_for_ip
1904
1905         return res
1906
1907     def _calc_cookies(self, info_dict):
1908         pr = sanitized_Request(info_dict['url'])
1909         self.cookiejar.add_cookie_header(pr)
1910         return pr.get_header('Cookie')
1911
1912     @staticmethod
1913     def _sanitize_thumbnails(info_dict):
1914         thumbnails = info_dict.get('thumbnails')
1915         if thumbnails is None:
1916             thumbnail = info_dict.get('thumbnail')
1917             if thumbnail:
1918                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1919         if thumbnails:
1920             thumbnails.sort(key=lambda t: (
1921                 t.get('preference') if t.get('preference') is not None else -1,
1922                 t.get('width') if t.get('width') is not None else -1,
1923                 t.get('height') if t.get('height') is not None else -1,
1924                 t.get('id') if t.get('id') is not None else '',
1925                 t.get('url')))
1926             for i, t in enumerate(thumbnails):
1927                 t['url'] = sanitize_url(t['url'])
1928                 if t.get('width') and t.get('height'):
1929                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1930                 if t.get('id') is None:
1931                     t['id'] = '%d' % i
1932
1933     def process_video_result(self, info_dict, download=True):
1934         assert info_dict.get('_type', 'video') == 'video'
1935
1936         if 'id' not in info_dict:
1937             raise ExtractorError('Missing "id" field in extractor result')
1938         if 'title' not in info_dict:
1939             raise ExtractorError('Missing "title" field in extractor result')
1940
1941         def report_force_conversion(field, field_not, conversion):
1942             self.report_warning(
1943                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1944                 % (field, field_not, conversion))
1945
1946         def sanitize_string_field(info, string_field):
1947             field = info.get(string_field)
1948             if field is None or isinstance(field, compat_str):
1949                 return
1950             report_force_conversion(string_field, 'a string', 'string')
1951             info[string_field] = compat_str(field)
1952
1953         def sanitize_numeric_fields(info):
1954             for numeric_field in self._NUMERIC_FIELDS:
1955                 field = info.get(numeric_field)
1956                 if field is None or isinstance(field, compat_numeric_types):
1957                     continue
1958                 report_force_conversion(numeric_field, 'numeric', 'int')
1959                 info[numeric_field] = int_or_none(field)
1960
1961         sanitize_string_field(info_dict, 'id')
1962         sanitize_numeric_fields(info_dict)
1963
1964         if 'playlist' not in info_dict:
1965             # It isn't part of a playlist
1966             info_dict['playlist'] = None
1967             info_dict['playlist_index'] = None
1968
1969         self._sanitize_thumbnails(info_dict)
1970
1971         if self.params.get('list_thumbnails'):
1972             self.list_thumbnails(info_dict)
1973             return
1974
1975         thumbnail = info_dict.get('thumbnail')
1976         thumbnails = info_dict.get('thumbnails')
1977         if thumbnail:
1978             info_dict['thumbnail'] = sanitize_url(thumbnail)
1979         elif thumbnails:
1980             info_dict['thumbnail'] = thumbnails[-1]['url']
1981
1982         if 'display_id' not in info_dict and 'id' in info_dict:
1983             info_dict['display_id'] = info_dict['id']
1984
1985         for ts_key, date_key in (
1986                 ('timestamp', 'upload_date'),
1987                 ('release_timestamp', 'release_date'),
1988         ):
1989             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
1990                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1991                 # see http://bugs.python.org/issue1646728)
1992                 try:
1993                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
1994                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
1995                 except (ValueError, OverflowError, OSError):
1996                     pass
1997
1998         # Auto generate title fields corresponding to the *_number fields when missing
1999         # in order to always have clean titles. This is very common for TV series.
2000         for field in ('chapter', 'season', 'episode'):
2001             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2002                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2003
2004         for cc_kind in ('subtitles', 'automatic_captions'):
2005             cc = info_dict.get(cc_kind)
2006             if cc:
2007                 for _, subtitle in cc.items():
2008                     for subtitle_format in subtitle:
2009                         if subtitle_format.get('url'):
2010                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2011                         if subtitle_format.get('ext') is None:
2012                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2013
2014         automatic_captions = info_dict.get('automatic_captions')
2015         subtitles = info_dict.get('subtitles')
2016
2017         if self.params.get('listsubtitles', False):
2018             if 'automatic_captions' in info_dict:
2019                 self.list_subtitles(
2020                     info_dict['id'], automatic_captions, 'automatic captions')
2021             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2022             return
2023
2024         info_dict['requested_subtitles'] = self.process_subtitles(
2025             info_dict['id'], subtitles, automatic_captions)
2026
2027         # We now pick which formats have to be downloaded
2028         if info_dict.get('formats') is None:
2029             # There's only one format available
2030             formats = [info_dict]
2031         else:
2032             formats = info_dict['formats']
2033
2034         if not formats:
2035             if not self.params.get('ignore_no_formats_error'):
2036                 raise ExtractorError('No video formats found!')
2037             else:
2038                 self.report_warning('No video formats found!')
2039
2040         def is_wellformed(f):
2041             url = f.get('url')
2042             if not url:
2043                 self.report_warning(
2044                     '"url" field is missing or empty - skipping format, '
2045                     'there is an error in extractor')
2046                 return False
2047             if isinstance(url, bytes):
2048                 sanitize_string_field(f, 'url')
2049             return True
2050
2051         # Filter out malformed formats for better extraction robustness
2052         formats = list(filter(is_wellformed, formats))
2053
2054         formats_dict = {}
2055
2056         # We check that all the formats have the format and format_id fields
2057         for i, format in enumerate(formats):
2058             sanitize_string_field(format, 'format_id')
2059             sanitize_numeric_fields(format)
2060             format['url'] = sanitize_url(format['url'])
2061             if not format.get('format_id'):
2062                 format['format_id'] = compat_str(i)
2063             else:
2064                 # Sanitize format_id from characters used in format selector expression
2065                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2066             format_id = format['format_id']
2067             if format_id not in formats_dict:
2068                 formats_dict[format_id] = []
2069             formats_dict[format_id].append(format)
2070
2071         # Make sure all formats have unique format_id
2072         for format_id, ambiguous_formats in formats_dict.items():
2073             if len(ambiguous_formats) > 1:
2074                 for i, format in enumerate(ambiguous_formats):
2075                     format['format_id'] = '%s-%d' % (format_id, i)
2076
2077         for i, format in enumerate(formats):
2078             if format.get('format') is None:
2079                 format['format'] = '{id} - {res}{note}'.format(
2080                     id=format['format_id'],
2081                     res=self.format_resolution(format),
2082                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
2083                 )
2084             # Automatically determine file extension if missing
2085             if format.get('ext') is None:
2086                 format['ext'] = determine_ext(format['url']).lower()
2087             # Automatically determine protocol if missing (useful for format
2088             # selection purposes)
2089             if format.get('protocol') is None:
2090                 format['protocol'] = determine_protocol(format)
2091             # Add HTTP headers, so that external programs can use them from the
2092             # json output
2093             full_format_info = info_dict.copy()
2094             full_format_info.update(format)
2095             format['http_headers'] = self._calc_headers(full_format_info)
2096         # Remove private housekeeping stuff
2097         if '__x_forwarded_for_ip' in info_dict:
2098             del info_dict['__x_forwarded_for_ip']
2099
2100         # TODO Central sorting goes here
2101
2102         if formats and formats[0] is not info_dict:
2103             # only set the 'formats' fields if the original info_dict list them
2104             # otherwise we end up with a circular reference, the first (and unique)
2105             # element in the 'formats' field in info_dict is info_dict itself,
2106             # which can't be exported to json
2107             info_dict['formats'] = formats
2108
2109         info_dict, _ = self.pre_process(info_dict)
2110
2111         if self.params.get('listformats'):
2112             if not info_dict.get('formats'):
2113                 raise ExtractorError('No video formats found', expected=True)
2114             self.list_formats(info_dict)
2115             return
2116
2117         req_format = self.params.get('format')
2118         if req_format is None:
2119             req_format = self._default_format_spec(info_dict, download=download)
2120             self.write_debug('Default format spec: %s' % req_format)
2121
2122         format_selector = self.build_format_selector(req_format)
2123
2124         # While in format selection we may need to have an access to the original
2125         # format set in order to calculate some metrics or do some processing.
2126         # For now we need to be able to guess whether original formats provided
2127         # by extractor are incomplete or not (i.e. whether extractor provides only
2128         # video-only or audio-only formats) for proper formats selection for
2129         # extractors with such incomplete formats (see
2130         # https://github.com/ytdl-org/youtube-dl/pull/5556).
2131         # Since formats may be filtered during format selection and may not match
2132         # the original formats the results may be incorrect. Thus original formats
2133         # or pre-calculated metrics should be passed to format selection routines
2134         # as well.
2135         # We will pass a context object containing all necessary additional data
2136         # instead of just formats.
2137         # This fixes incorrect format selection issue (see
2138         # https://github.com/ytdl-org/youtube-dl/issues/10083).
2139         incomplete_formats = (
2140             # All formats are video-only or
2141             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2142             # all formats are audio-only
2143             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2144
2145         ctx = {
2146             'formats': formats,
2147             'incomplete_formats': incomplete_formats,
2148         }
2149
2150         formats_to_download = list(format_selector(ctx))
2151         if not formats_to_download:
2152             if not self.params.get('ignore_no_formats_error'):
2153                 raise ExtractorError('Requested format is not available', expected=True)
2154             else:
2155                 self.report_warning('Requested format is not available')
2156         elif download:
2157             self.to_screen(
2158                 '[info] %s: Downloading %d format(s): %s' % (
2159                     info_dict['id'], len(formats_to_download),
2160                     ", ".join([f['format_id'] for f in formats_to_download])))
2161             for fmt in formats_to_download:
2162                 new_info = dict(info_dict)
2163                 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2164                 new_info['__original_infodict'] = info_dict
2165                 new_info.update(fmt)
2166                 self.process_info(new_info)
2167         # We update the info dict with the best quality format (backwards compatibility)
2168         if formats_to_download:
2169             info_dict.update(formats_to_download[-1])
2170         return info_dict
2171
2172     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2173         """Select the requested subtitles and their format"""
2174         available_subs = {}
2175         if normal_subtitles and self.params.get('writesubtitles'):
2176             available_subs.update(normal_subtitles)
2177         if automatic_captions and self.params.get('writeautomaticsub'):
2178             for lang, cap_info in automatic_captions.items():
2179                 if lang not in available_subs:
2180                     available_subs[lang] = cap_info
2181
2182         if (not self.params.get('writesubtitles') and not
2183                 self.params.get('writeautomaticsub') or not
2184                 available_subs):
2185             return None
2186
2187         all_sub_langs = available_subs.keys()
2188         if self.params.get('allsubtitles', False):
2189             requested_langs = all_sub_langs
2190         elif self.params.get('subtitleslangs', False):
2191             requested_langs = set()
2192             for lang in self.params.get('subtitleslangs'):
2193                 if lang == 'all':
2194                     requested_langs.update(all_sub_langs)
2195                     continue
2196                 discard = lang[0] == '-'
2197                 if discard:
2198                     lang = lang[1:]
2199                 current_langs = filter(re.compile(lang + '$').match, all_sub_langs)
2200                 if discard:
2201                     for lang in current_langs:
2202                         requested_langs.discard(lang)
2203                 else:
2204                     requested_langs.update(current_langs)
2205         elif 'en' in available_subs:
2206             requested_langs = ['en']
2207         else:
2208             requested_langs = [list(all_sub_langs)[0]]
2209         self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2210
2211         formats_query = self.params.get('subtitlesformat', 'best')
2212         formats_preference = formats_query.split('/') if formats_query else []
2213         subs = {}
2214         for lang in requested_langs:
2215             formats = available_subs.get(lang)
2216             if formats is None:
2217                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2218                 continue
2219             for ext in formats_preference:
2220                 if ext == 'best':
2221                     f = formats[-1]
2222                     break
2223                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2224                 if matches:
2225                     f = matches[-1]
2226                     break
2227             else:
2228                 f = formats[-1]
2229                 self.report_warning(
2230                     'No subtitle format found matching "%s" for language %s, '
2231                     'using %s' % (formats_query, lang, f['ext']))
2232             subs[lang] = f
2233         return subs
2234
2235     def __forced_printings(self, info_dict, filename, incomplete):
2236         def print_mandatory(field, actual_field=None):
2237             if actual_field is None:
2238                 actual_field = field
2239             if (self.params.get('force%s' % field, False)
2240                     and (not incomplete or info_dict.get(actual_field) is not None)):
2241                 self.to_stdout(info_dict[actual_field])
2242
2243         def print_optional(field):
2244             if (self.params.get('force%s' % field, False)
2245                     and info_dict.get(field) is not None):
2246                 self.to_stdout(info_dict[field])
2247
2248         info_dict = info_dict.copy()
2249         if filename is not None:
2250             info_dict['filename'] = filename
2251         if info_dict.get('requested_formats') is not None:
2252             # For RTMP URLs, also include the playpath
2253             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2254         elif 'url' in info_dict:
2255             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2256
2257         for tmpl in self.params.get('forceprint', []):
2258             if re.match(r'\w+$', tmpl):
2259                 tmpl = '%({})s'.format(tmpl)
2260             tmpl, info_copy = self.prepare_outtmpl(tmpl, info_dict)
2261             self.to_stdout(tmpl % info_copy)
2262
2263         print_mandatory('title')
2264         print_mandatory('id')
2265         print_mandatory('url', 'urls')
2266         print_optional('thumbnail')
2267         print_optional('description')
2268         print_optional('filename')
2269         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
2270             self.to_stdout(formatSeconds(info_dict['duration']))
2271         print_mandatory('format')
2272
2273         if self.params.get('forcejson', False):
2274             self.post_extract(info_dict)
2275             self.to_stdout(json.dumps(info_dict, default=repr))
2276
2277     def dl(self, name, info, subtitle=False, test=False):
2278
2279         if test:
2280             verbose = self.params.get('verbose')
2281             params = {
2282                 'test': True,
2283                 'quiet': not verbose,
2284                 'verbose': verbose,
2285                 'noprogress': not verbose,
2286                 'nopart': True,
2287                 'skip_unavailable_fragments': False,
2288                 'keep_fragments': False,
2289                 'overwrites': True,
2290                 '_no_ytdl_file': True,
2291             }
2292         else:
2293             params = self.params
2294         fd = get_suitable_downloader(info, params)(self, params)
2295         if not test:
2296             for ph in self._progress_hooks:
2297                 fd.add_progress_hook(ph)
2298             urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2299             self.write_debug('Invoking downloader on "%s"' % urls)
2300         new_info = dict(info)
2301         if new_info.get('http_headers') is None:
2302             new_info['http_headers'] = self._calc_headers(new_info)
2303         return fd.download(name, new_info, subtitle)
2304
2305     def process_info(self, info_dict):
2306         """Process a single resolved IE result."""
2307
2308         assert info_dict.get('_type', 'video') == 'video'
2309
2310         info_dict.setdefault('__postprocessors', [])
2311
2312         max_downloads = self.params.get('max_downloads')
2313         if max_downloads is not None:
2314             if self._num_downloads >= int(max_downloads):
2315                 raise MaxDownloadsReached()
2316
2317         # TODO: backward compatibility, to be removed
2318         info_dict['fulltitle'] = info_dict['title']
2319
2320         if 'format' not in info_dict:
2321             info_dict['format'] = info_dict['ext']
2322
2323         if self._match_entry(info_dict) is not None:
2324             return
2325
2326         self.post_extract(info_dict)
2327         self._num_downloads += 1
2328
2329         # info_dict['_filename'] needs to be set for backward compatibility
2330         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2331         temp_filename = self.prepare_filename(info_dict, 'temp')
2332         files_to_move = {}
2333
2334         # Forced printings
2335         self.__forced_printings(info_dict, full_filename, incomplete=False)
2336
2337         if self.params.get('simulate', False):
2338             if self.params.get('force_write_download_archive', False):
2339                 self.record_download_archive(info_dict)
2340
2341             # Do nothing else if in simulate mode
2342             return
2343
2344         if full_filename is None:
2345             return
2346
2347         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2348             return
2349         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2350             return
2351
2352         if self.params.get('writedescription', False):
2353             descfn = self.prepare_filename(info_dict, 'description')
2354             if not self._ensure_dir_exists(encodeFilename(descfn)):
2355                 return
2356             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
2357                 self.to_screen('[info] Video description is already present')
2358             elif info_dict.get('description') is None:
2359                 self.report_warning('There\'s no description to write.')
2360             else:
2361                 try:
2362                     self.to_screen('[info] Writing video description to: ' + descfn)
2363                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
2364                         descfile.write(info_dict['description'])
2365                 except (OSError, IOError):
2366                     self.report_error('Cannot write description file ' + descfn)
2367                     return
2368
2369         if self.params.get('writeannotations', False):
2370             annofn = self.prepare_filename(info_dict, 'annotation')
2371             if not self._ensure_dir_exists(encodeFilename(annofn)):
2372                 return
2373             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2374                 self.to_screen('[info] Video annotations are already present')
2375             elif not info_dict.get('annotations'):
2376                 self.report_warning('There are no annotations to write.')
2377             else:
2378                 try:
2379                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2380                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2381                         annofile.write(info_dict['annotations'])
2382                 except (KeyError, TypeError):
2383                     self.report_warning('There are no annotations to write.')
2384                 except (OSError, IOError):
2385                     self.report_error('Cannot write annotations file: ' + annofn)
2386                     return
2387
2388         subtitles_are_requested = any([self.params.get('writesubtitles', False),
2389                                        self.params.get('writeautomaticsub')])
2390
2391         if subtitles_are_requested and info_dict.get('requested_subtitles'):
2392             # subtitles download errors are already managed as troubles in relevant IE
2393             # that way it will silently go on when used with unsupporting IE
2394             subtitles = info_dict['requested_subtitles']
2395             # ie = self.get_info_extractor(info_dict['extractor_key'])
2396             for sub_lang, sub_info in subtitles.items():
2397                 sub_format = sub_info['ext']
2398                 sub_filename = subtitles_filename(temp_filename, sub_lang, sub_format, info_dict.get('ext'))
2399                 sub_filename_final = subtitles_filename(
2400                     self.prepare_filename(info_dict, 'subtitle'), sub_lang, sub_format, info_dict.get('ext'))
2401                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
2402                     self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
2403                     sub_info['filepath'] = sub_filename
2404                     files_to_move[sub_filename] = sub_filename_final
2405                 else:
2406                     self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
2407                     if sub_info.get('data') is not None:
2408                         try:
2409                             # Use newline='' to prevent conversion of newline characters
2410                             # See https://github.com/ytdl-org/youtube-dl/issues/10268
2411                             with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
2412                                 subfile.write(sub_info['data'])
2413                             sub_info['filepath'] = sub_filename
2414                             files_to_move[sub_filename] = sub_filename_final
2415                         except (OSError, IOError):
2416                             self.report_error('Cannot write subtitles file ' + sub_filename)
2417                             return
2418                     else:
2419                         try:
2420                             self.dl(sub_filename, sub_info.copy(), subtitle=True)
2421                             sub_info['filepath'] = sub_filename
2422                             files_to_move[sub_filename] = sub_filename_final
2423                         except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
2424                             self.report_warning('Unable to download subtitle for "%s": %s' %
2425                                                 (sub_lang, error_to_compat_str(err)))
2426                             continue
2427
2428         if self.params.get('writeinfojson', False):
2429             infofn = self.prepare_filename(info_dict, 'infojson')
2430             if not self._ensure_dir_exists(encodeFilename(infofn)):
2431                 return
2432             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
2433                 self.to_screen('[info] Video metadata is already present')
2434             else:
2435                 self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
2436                 try:
2437                     write_json_file(self.filter_requested_info(info_dict, self.params.get('clean_infojson', True)), infofn)
2438                 except (OSError, IOError):
2439                     self.report_error('Cannot write video metadata to JSON file ' + infofn)
2440                     return
2441             info_dict['__infojson_filename'] = infofn
2442
2443         for thumb_ext in self._write_thumbnails(info_dict, temp_filename):
2444             thumb_filename_temp = replace_extension(temp_filename, thumb_ext, info_dict.get('ext'))
2445             thumb_filename = replace_extension(
2446                 self.prepare_filename(info_dict, 'thumbnail'), thumb_ext, info_dict.get('ext'))
2447             files_to_move[thumb_filename_temp] = thumb_filename
2448
2449         # Write internet shortcut files
2450         url_link = webloc_link = desktop_link = False
2451         if self.params.get('writelink', False):
2452             if sys.platform == "darwin":  # macOS.
2453                 webloc_link = True
2454             elif sys.platform.startswith("linux"):
2455                 desktop_link = True
2456             else:  # if sys.platform in ['win32', 'cygwin']:
2457                 url_link = True
2458         if self.params.get('writeurllink', False):
2459             url_link = True
2460         if self.params.get('writewebloclink', False):
2461             webloc_link = True
2462         if self.params.get('writedesktoplink', False):
2463             desktop_link = True
2464
2465         if url_link or webloc_link or desktop_link:
2466             if 'webpage_url' not in info_dict:
2467                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2468                 return
2469             ascii_url = iri_to_uri(info_dict['webpage_url'])
2470
2471         def _write_link_file(extension, template, newline, embed_filename):
2472             linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
2473             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2474                 self.to_screen('[info] Internet shortcut is already present')
2475             else:
2476                 try:
2477                     self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2478                     with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2479                         template_vars = {'url': ascii_url}
2480                         if embed_filename:
2481                             template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2482                         linkfile.write(template % template_vars)
2483                 except (OSError, IOError):
2484                     self.report_error('Cannot write internet shortcut ' + linkfn)
2485                     return False
2486             return True
2487
2488         if url_link:
2489             if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2490                 return
2491         if webloc_link:
2492             if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2493                 return
2494         if desktop_link:
2495             if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2496                 return
2497
2498         try:
2499             info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2500         except PostProcessingError as err:
2501             self.report_error('Preprocessing: %s' % str(err))
2502             return
2503
2504         must_record_download_archive = False
2505         if self.params.get('skip_download', False):
2506             info_dict['filepath'] = temp_filename
2507             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2508             info_dict['__files_to_move'] = files_to_move
2509             info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2510         else:
2511             # Download
2512             try:
2513
2514                 def existing_file(*filepaths):
2515                     ext = info_dict.get('ext')
2516                     final_ext = self.params.get('final_ext', ext)
2517                     existing_files = []
2518                     for file in orderedSet(filepaths):
2519                         if final_ext != ext:
2520                             converted = replace_extension(file, final_ext, ext)
2521                             if os.path.exists(encodeFilename(converted)):
2522                                 existing_files.append(converted)
2523                         if os.path.exists(encodeFilename(file)):
2524                             existing_files.append(file)
2525
2526                     if not existing_files or self.params.get('overwrites', False):
2527                         for file in orderedSet(existing_files):
2528                             self.report_file_delete(file)
2529                             os.remove(encodeFilename(file))
2530                         return None
2531
2532                     self.report_file_already_downloaded(existing_files[0])
2533                     info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2534                     return existing_files[0]
2535
2536                 success = True
2537                 if info_dict.get('requested_formats') is not None:
2538
2539                     def compatible_formats(formats):
2540                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2541                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2542                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2543                         if len(video_formats) > 2 or len(audio_formats) > 2:
2544                             return False
2545
2546                         # Check extension
2547                         exts = set(format.get('ext') for format in formats)
2548                         COMPATIBLE_EXTS = (
2549                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2550                             set(('webm',)),
2551                         )
2552                         for ext_sets in COMPATIBLE_EXTS:
2553                             if ext_sets.issuperset(exts):
2554                                 return True
2555                         # TODO: Check acodec/vcodec
2556                         return False
2557
2558                     requested_formats = info_dict['requested_formats']
2559                     old_ext = info_dict['ext']
2560                     if self.params.get('merge_output_format') is None:
2561                         if not compatible_formats(requested_formats):
2562                             info_dict['ext'] = 'mkv'
2563                             self.report_warning(
2564                                 'Requested formats are incompatible for merge and will be merged into mkv.')
2565                         if (info_dict['ext'] == 'webm'
2566                                 and self.params.get('writethumbnail', False)
2567                                 and info_dict.get('thumbnails')):
2568                             info_dict['ext'] = 'mkv'
2569                             self.report_warning(
2570                                 'webm doesn\'t support embedding a thumbnail, mkv will be used.')
2571
2572                     def correct_ext(filename):
2573                         filename_real_ext = os.path.splitext(filename)[1][1:]
2574                         filename_wo_ext = (
2575                             os.path.splitext(filename)[0]
2576                             if filename_real_ext == old_ext
2577                             else filename)
2578                         return '%s.%s' % (filename_wo_ext, info_dict['ext'])
2579
2580                     # Ensure filename always has a correct extension for successful merge
2581                     full_filename = correct_ext(full_filename)
2582                     temp_filename = correct_ext(temp_filename)
2583                     dl_filename = existing_file(full_filename, temp_filename)
2584                     info_dict['__real_download'] = False
2585
2586                     _protocols = set(determine_protocol(f) for f in requested_formats)
2587                     if len(_protocols) == 1:
2588                         info_dict['protocol'] = _protocols.pop()
2589                     directly_mergable = (
2590                         'no-direct-merge' not in self.params.get('compat_opts', [])
2591                         and info_dict.get('protocol') is not None  # All requested formats have same protocol
2592                         and not self.params.get('allow_unplayable_formats')
2593                         and get_suitable_downloader(info_dict, self.params).__name__ == 'FFmpegFD')
2594                     if directly_mergable:
2595                         info_dict['url'] = requested_formats[0]['url']
2596                         # Treat it as a single download
2597                         dl_filename = existing_file(full_filename, temp_filename)
2598                         if dl_filename is None:
2599                             success, real_download = self.dl(temp_filename, info_dict)
2600                             info_dict['__real_download'] = real_download
2601                     else:
2602                         downloaded = []
2603                         merger = FFmpegMergerPP(self)
2604                         if self.params.get('allow_unplayable_formats'):
2605                             self.report_warning(
2606                                 'You have requested merging of multiple formats '
2607                                 'while also allowing unplayable formats to be downloaded. '
2608                                 'The formats won\'t be merged to prevent data corruption.')
2609                         elif not merger.available:
2610                             self.report_warning(
2611                                 'You have requested merging of multiple formats but ffmpeg is not installed. '
2612                                 'The formats won\'t be merged.')
2613
2614                         if dl_filename is None:
2615                             for f in requested_formats:
2616                                 new_info = dict(info_dict)
2617                                 del new_info['requested_formats']
2618                                 new_info.update(f)
2619                                 fname = prepend_extension(
2620                                     self.prepare_filename(new_info, 'temp'),
2621                                     'f%s' % f['format_id'], new_info['ext'])
2622                                 if not self._ensure_dir_exists(fname):
2623                                     return
2624                                 downloaded.append(fname)
2625                                 partial_success, real_download = self.dl(fname, new_info)
2626                                 info_dict['__real_download'] = info_dict['__real_download'] or real_download
2627                                 success = success and partial_success
2628                             if merger.available and not self.params.get('allow_unplayable_formats'):
2629                                 info_dict['__postprocessors'].append(merger)
2630                                 info_dict['__files_to_merge'] = downloaded
2631                                 # Even if there were no downloads, it is being merged only now
2632                                 info_dict['__real_download'] = True
2633                             else:
2634                                 for file in downloaded:
2635                                     files_to_move[file] = None
2636                 else:
2637                     # Just a single file
2638                     dl_filename = existing_file(full_filename, temp_filename)
2639                     if dl_filename is None:
2640                         success, real_download = self.dl(temp_filename, info_dict)
2641                         info_dict['__real_download'] = real_download
2642
2643                 dl_filename = dl_filename or temp_filename
2644                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2645
2646             except network_exceptions as err:
2647                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2648                 return
2649             except (OSError, IOError) as err:
2650                 raise UnavailableVideoError(err)
2651             except (ContentTooShortError, ) as err:
2652                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2653                 return
2654
2655             if success and full_filename != '-':
2656                 # Fixup content
2657                 fixup_policy = self.params.get('fixup')
2658                 if fixup_policy is None:
2659                     fixup_policy = 'detect_or_warn'
2660
2661                 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg to fix this automatically.'
2662
2663                 stretched_ratio = info_dict.get('stretched_ratio')
2664                 if stretched_ratio is not None and stretched_ratio != 1:
2665                     if fixup_policy == 'warn':
2666                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2667                             info_dict['id'], stretched_ratio))
2668                     elif fixup_policy == 'detect_or_warn':
2669                         stretched_pp = FFmpegFixupStretchedPP(self)
2670                         if stretched_pp.available:
2671                             info_dict['__postprocessors'].append(stretched_pp)
2672                         else:
2673                             self.report_warning(
2674                                 '%s: Non-uniform pixel ratio (%s). %s'
2675                                 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
2676                     else:
2677                         assert fixup_policy in ('ignore', 'never')
2678
2679                 if (info_dict.get('requested_formats') is None
2680                         and info_dict.get('container') == 'm4a_dash'
2681                         and info_dict.get('ext') == 'm4a'):
2682                     if fixup_policy == 'warn':
2683                         self.report_warning(
2684                             '%s: writing DASH m4a. '
2685                             'Only some players support this container.'
2686                             % info_dict['id'])
2687                     elif fixup_policy == 'detect_or_warn':
2688                         fixup_pp = FFmpegFixupM4aPP(self)
2689                         if fixup_pp.available:
2690                             info_dict['__postprocessors'].append(fixup_pp)
2691                         else:
2692                             self.report_warning(
2693                                 '%s: writing DASH m4a. '
2694                                 'Only some players support this container. %s'
2695                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2696                     else:
2697                         assert fixup_policy in ('ignore', 'never')
2698
2699                 if ('protocol' in info_dict
2700                         and get_suitable_downloader(info_dict, self.params).__name__ == 'HlsFD'):
2701                     if fixup_policy == 'warn':
2702                         self.report_warning('%s: malformed AAC bitstream detected.' % (
2703                             info_dict['id']))
2704                     elif fixup_policy == 'detect_or_warn':
2705                         fixup_pp = FFmpegFixupM3u8PP(self)
2706                         if fixup_pp.available:
2707                             info_dict['__postprocessors'].append(fixup_pp)
2708                         else:
2709                             self.report_warning(
2710                                 '%s: malformed AAC bitstream detected. %s'
2711                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2712                     else:
2713                         assert fixup_policy in ('ignore', 'never')
2714
2715                 try:
2716                     info_dict = self.post_process(dl_filename, info_dict, files_to_move)
2717                 except PostProcessingError as err:
2718                     self.report_error('Postprocessing: %s' % str(err))
2719                     return
2720                 try:
2721                     for ph in self._post_hooks:
2722                         ph(info_dict['filepath'])
2723                 except Exception as err:
2724                     self.report_error('post hooks: %s' % str(err))
2725                     return
2726                 must_record_download_archive = True
2727
2728         if must_record_download_archive or self.params.get('force_write_download_archive', False):
2729             self.record_download_archive(info_dict)
2730         max_downloads = self.params.get('max_downloads')
2731         if max_downloads is not None and self._num_downloads >= int(max_downloads):
2732             raise MaxDownloadsReached()
2733
2734     def download(self, url_list):
2735         """Download a given list of URLs."""
2736         outtmpl = self.outtmpl_dict['default']
2737         if (len(url_list) > 1
2738                 and outtmpl != '-'
2739                 and '%' not in outtmpl
2740                 and self.params.get('max_downloads') != 1):
2741             raise SameFileError(outtmpl)
2742
2743         for url in url_list:
2744             try:
2745                 # It also downloads the videos
2746                 res = self.extract_info(
2747                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2748             except UnavailableVideoError:
2749                 self.report_error('unable to download video')
2750             except MaxDownloadsReached:
2751                 self.to_screen('[info] Maximum number of downloaded files reached')
2752                 raise
2753             except ExistingVideoReached:
2754                 self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
2755                 raise
2756             except RejectedVideoReached:
2757                 self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
2758                 raise
2759             else:
2760                 if self.params.get('dump_single_json', False):
2761                     self.post_extract(res)
2762                     self.to_stdout(json.dumps(res, default=repr))
2763
2764         return self._download_retcode
2765
2766     def download_with_info_file(self, info_filename):
2767         with contextlib.closing(fileinput.FileInput(
2768                 [info_filename], mode='r',
2769                 openhook=fileinput.hook_encoded('utf-8'))) as f:
2770             # FileInput doesn't have a read method, we can't call json.load
2771             info = self.filter_requested_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
2772         try:
2773             self.process_ie_result(info, download=True)
2774         except (DownloadError, EntryNotInPlaylist):
2775             webpage_url = info.get('webpage_url')
2776             if webpage_url is not None:
2777                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2778                 return self.download([webpage_url])
2779             else:
2780                 raise
2781         return self._download_retcode
2782
2783     @staticmethod
2784     def filter_requested_info(info_dict, actually_filter=True):
2785         remove_keys = ['__original_infodict']  # Always remove this since this may contain a copy of the entire dict
2786         keep_keys = ['_type'],  # Always keep this to facilitate load-info-json
2787         if actually_filter:
2788             remove_keys += ('requested_formats', 'requested_subtitles', 'requested_entries', 'filepath', 'entries', 'original_url')
2789             empty_values = (None, {}, [], set(), tuple())
2790             reject = lambda k, v: k not in keep_keys and (
2791                 k.startswith('_') or k in remove_keys or v in empty_values)
2792         else:
2793             info_dict['epoch'] = int(time.time())
2794             reject = lambda k, v: k in remove_keys
2795         filter_fn = lambda obj: (
2796             list(map(filter_fn, obj)) if isinstance(obj, (list, tuple, set))
2797             else obj if not isinstance(obj, dict)
2798             else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
2799         return filter_fn(info_dict)
2800
2801     def run_pp(self, pp, infodict):
2802         files_to_delete = []
2803         if '__files_to_move' not in infodict:
2804             infodict['__files_to_move'] = {}
2805         files_to_delete, infodict = pp.run(infodict)
2806         if not files_to_delete:
2807             return infodict
2808
2809         if self.params.get('keepvideo', False):
2810             for f in files_to_delete:
2811                 infodict['__files_to_move'].setdefault(f, '')
2812         else:
2813             for old_filename in set(files_to_delete):
2814                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2815                 try:
2816                     os.remove(encodeFilename(old_filename))
2817                 except (IOError, OSError):
2818                     self.report_warning('Unable to remove downloaded original file')
2819                 if old_filename in infodict['__files_to_move']:
2820                     del infodict['__files_to_move'][old_filename]
2821         return infodict
2822
2823     @staticmethod
2824     def post_extract(info_dict):
2825         def actual_post_extract(info_dict):
2826             if info_dict.get('_type') in ('playlist', 'multi_video'):
2827                 for video_dict in info_dict.get('entries', {}):
2828                     actual_post_extract(video_dict or {})
2829                 return
2830
2831             post_extractor = info_dict.get('__post_extractor') or (lambda: {})
2832             extra = post_extractor().items()
2833             info_dict.update(extra)
2834             info_dict.pop('__post_extractor', None)
2835
2836             original_infodict = info_dict.get('__original_infodict') or {}
2837             original_infodict.update(extra)
2838             original_infodict.pop('__post_extractor', None)
2839
2840         actual_post_extract(info_dict or {})
2841
2842     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
2843         info = dict(ie_info)
2844         info['__files_to_move'] = files_to_move or {}
2845         for pp in self._pps[key]:
2846             info = self.run_pp(pp, info)
2847         return info, info.pop('__files_to_move', None)
2848
2849     def post_process(self, filename, ie_info, files_to_move=None):
2850         """Run all the postprocessors on the given file."""
2851         info = dict(ie_info)
2852         info['filepath'] = filename
2853         info['__files_to_move'] = files_to_move or {}
2854
2855         for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
2856             info = self.run_pp(pp, info)
2857         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
2858         del info['__files_to_move']
2859         for pp in self._pps['after_move']:
2860             info = self.run_pp(pp, info)
2861         return info
2862
2863     def _make_archive_id(self, info_dict):
2864         video_id = info_dict.get('id')
2865         if not video_id:
2866             return
2867         # Future-proof against any change in case
2868         # and backwards compatibility with prior versions
2869         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
2870         if extractor is None:
2871             url = str_or_none(info_dict.get('url'))
2872             if not url:
2873                 return
2874             # Try to find matching extractor for the URL and take its ie_key
2875             for ie in self._ies:
2876                 if ie.suitable(url):
2877                     extractor = ie.ie_key()
2878                     break
2879             else:
2880                 return
2881         return '%s %s' % (extractor.lower(), video_id)
2882
2883     def in_download_archive(self, info_dict):
2884         fn = self.params.get('download_archive')
2885         if fn is None:
2886             return False
2887
2888         vid_id = self._make_archive_id(info_dict)
2889         if not vid_id:
2890             return False  # Incomplete video information
2891
2892         return vid_id in self.archive
2893
2894     def record_download_archive(self, info_dict):
2895         fn = self.params.get('download_archive')
2896         if fn is None:
2897             return
2898         vid_id = self._make_archive_id(info_dict)
2899         assert vid_id
2900         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
2901             archive_file.write(vid_id + '\n')
2902         self.archive.add(vid_id)
2903
2904     @staticmethod
2905     def format_resolution(format, default='unknown'):
2906         if format.get('vcodec') == 'none':
2907             return 'audio only'
2908         if format.get('resolution') is not None:
2909             return format['resolution']
2910         if format.get('width') and format.get('height'):
2911             res = '%dx%d' % (format['width'], format['height'])
2912         elif format.get('height'):
2913             res = '%sp' % format['height']
2914         elif format.get('width'):
2915             res = '%dx?' % format['width']
2916         else:
2917             res = default
2918         return res
2919
2920     def _format_note(self, fdict):
2921         res = ''
2922         if fdict.get('ext') in ['f4f', 'f4m']:
2923             res += '(unsupported) '
2924         if fdict.get('language'):
2925             if res:
2926                 res += ' '
2927             res += '[%s] ' % fdict['language']
2928         if fdict.get('format_note') is not None:
2929             res += fdict['format_note'] + ' '
2930         if fdict.get('tbr') is not None:
2931             res += '%4dk ' % fdict['tbr']
2932         if fdict.get('container') is not None:
2933             if res:
2934                 res += ', '
2935             res += '%s container' % fdict['container']
2936         if (fdict.get('vcodec') is not None
2937                 and fdict.get('vcodec') != 'none'):
2938             if res:
2939                 res += ', '
2940             res += fdict['vcodec']
2941             if fdict.get('vbr') is not None:
2942                 res += '@'
2943         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2944             res += 'video@'
2945         if fdict.get('vbr') is not None:
2946             res += '%4dk' % fdict['vbr']
2947         if fdict.get('fps') is not None:
2948             if res:
2949                 res += ', '
2950             res += '%sfps' % fdict['fps']
2951         if fdict.get('acodec') is not None:
2952             if res:
2953                 res += ', '
2954             if fdict['acodec'] == 'none':
2955                 res += 'video only'
2956             else:
2957                 res += '%-5s' % fdict['acodec']
2958         elif fdict.get('abr') is not None:
2959             if res:
2960                 res += ', '
2961             res += 'audio'
2962         if fdict.get('abr') is not None:
2963             res += '@%3dk' % fdict['abr']
2964         if fdict.get('asr') is not None:
2965             res += ' (%5dHz)' % fdict['asr']
2966         if fdict.get('filesize') is not None:
2967             if res:
2968                 res += ', '
2969             res += format_bytes(fdict['filesize'])
2970         elif fdict.get('filesize_approx') is not None:
2971             if res:
2972                 res += ', '
2973             res += '~' + format_bytes(fdict['filesize_approx'])
2974         return res
2975
2976     def _format_note_table(self, f):
2977         def join_fields(*vargs):
2978             return ', '.join((val for val in vargs if val != ''))
2979
2980         return join_fields(
2981             'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
2982             format_field(f, 'language', '[%s]'),
2983             format_field(f, 'format_note'),
2984             format_field(f, 'container', ignore=(None, f.get('ext'))),
2985             format_field(f, 'asr', '%5dHz'))
2986
2987     def list_formats(self, info_dict):
2988         formats = info_dict.get('formats', [info_dict])
2989         new_format = (
2990             'list-formats' not in self.params.get('compat_opts', [])
2991             and self.params.get('list_formats_as_table', True) is not False)
2992         if new_format:
2993             table = [
2994                 [
2995                     format_field(f, 'format_id'),
2996                     format_field(f, 'ext'),
2997                     self.format_resolution(f),
2998                     format_field(f, 'fps', '%d'),
2999                     '|',
3000                     format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
3001                     format_field(f, 'tbr', '%4dk'),
3002                     shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
3003                     '|',
3004                     format_field(f, 'vcodec', default='unknown').replace('none', ''),
3005                     format_field(f, 'vbr', '%4dk'),
3006                     format_field(f, 'acodec', default='unknown').replace('none', ''),
3007                     format_field(f, 'abr', '%3dk'),
3008                     format_field(f, 'asr', '%5dHz'),
3009                     self._format_note_table(f)]
3010                 for f in formats
3011                 if f.get('preference') is None or f['preference'] >= -1000]
3012             header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', '  TBR', 'PROTO',
3013                            '|', 'VCODEC', '  VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']
3014         else:
3015             table = [
3016                 [
3017                     format_field(f, 'format_id'),
3018                     format_field(f, 'ext'),
3019                     self.format_resolution(f),
3020                     self._format_note(f)]
3021                 for f in formats
3022                 if f.get('preference') is None or f['preference'] >= -1000]
3023             header_line = ['format code', 'extension', 'resolution', 'note']
3024
3025         self.to_screen(
3026             '[info] Available formats for %s:\n%s' % (info_dict['id'], render_table(
3027                 header_line,
3028                 table,
3029                 delim=new_format,
3030                 extraGap=(0 if new_format else 1),
3031                 hideEmpty=new_format)))
3032
3033     def list_thumbnails(self, info_dict):
3034         thumbnails = info_dict.get('thumbnails')
3035         if not thumbnails:
3036             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3037             return
3038
3039         self.to_screen(
3040             '[info] Thumbnails for %s:' % info_dict['id'])
3041         self.to_screen(render_table(
3042             ['ID', 'width', 'height', 'URL'],
3043             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
3044
3045     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3046         if not subtitles:
3047             self.to_screen('%s has no %s' % (video_id, name))
3048             return
3049         self.to_screen(
3050             'Available %s for %s:' % (name, video_id))
3051
3052         def _row(lang, formats):
3053             exts, names = zip(*((f['ext'], f.get('name', 'unknown')) for f in reversed(formats)))
3054             if len(set(names)) == 1:
3055                 names = [] if names[0] == 'unknown' else names[:1]
3056             return [lang, ', '.join(names), ', '.join(exts)]
3057
3058         self.to_screen(render_table(
3059             ['Language', 'Name', 'Formats'],
3060             [_row(lang, formats) for lang, formats in subtitles.items()],
3061             hideEmpty=True))
3062
3063     def urlopen(self, req):
3064         """ Start an HTTP download """
3065         if isinstance(req, compat_basestring):
3066             req = sanitized_Request(req)
3067         return self._opener.open(req, timeout=self._socket_timeout)
3068
3069     def print_debug_header(self):
3070         if not self.params.get('verbose'):
3071             return
3072
3073         if type('') is not compat_str:
3074             # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
3075             self.report_warning(
3076                 'Your Python is broken! Update to a newer and supported version')
3077
3078         stdout_encoding = getattr(
3079             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
3080         encoding_str = (
3081             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
3082                 locale.getpreferredencoding(),
3083                 sys.getfilesystemencoding(),
3084                 stdout_encoding,
3085                 self.get_encoding()))
3086         write_string(encoding_str, encoding=None)
3087
3088         source = (
3089             '(exe)' if hasattr(sys, 'frozen')
3090             else '(zip)' if isinstance(globals().get('__loader__'), zipimporter)
3091             else '(source)' if os.path.basename(sys.argv[0]) == '__main__.py'
3092             else '')
3093         self._write_string('[debug] yt-dlp version %s %s\n' % (__version__, source))
3094         if _LAZY_LOADER:
3095             self._write_string('[debug] Lazy loading extractors enabled\n')
3096         if _PLUGIN_CLASSES:
3097             self._write_string(
3098                 '[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])
3099         if self.params.get('compat_opts'):
3100             self._write_string(
3101                 '[debug] Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts')))
3102         try:
3103             sp = subprocess.Popen(
3104                 ['git', 'rev-parse', '--short', 'HEAD'],
3105                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3106                 cwd=os.path.dirname(os.path.abspath(__file__)))
3107             out, err = process_communicate_or_kill(sp)
3108             out = out.decode().strip()
3109             if re.match('[0-9a-f]+', out):
3110                 self._write_string('[debug] Git HEAD: %s\n' % out)
3111         except Exception:
3112             try:
3113                 sys.exc_clear()
3114             except Exception:
3115                 pass
3116
3117         def python_implementation():
3118             impl_name = platform.python_implementation()
3119             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3120                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3121             return impl_name
3122
3123         self._write_string('[debug] Python version %s (%s %s) - %s\n' % (
3124             platform.python_version(),
3125             python_implementation(),
3126             platform.architecture()[0],
3127             platform_name()))
3128
3129         exe_versions = FFmpegPostProcessor.get_versions(self)
3130         exe_versions['rtmpdump'] = rtmpdump_version()
3131         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3132         exe_str = ', '.join(
3133             '%s %s' % (exe, v)
3134             for exe, v in sorted(exe_versions.items())
3135             if v
3136         )
3137         if not exe_str:
3138             exe_str = 'none'
3139         self._write_string('[debug] exe versions: %s\n' % exe_str)
3140
3141         proxy_map = {}
3142         for handler in self._opener.handlers:
3143             if hasattr(handler, 'proxies'):
3144                 proxy_map.update(handler.proxies)
3145         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
3146
3147         if self.params.get('call_home', False):
3148             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3149             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
3150             return
3151             latest_version = self.urlopen(
3152                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3153             if version_tuple(latest_version) > version_tuple(__version__):
3154                 self.report_warning(
3155                     'You are using an outdated version (newest version: %s)! '
3156                     'See https://yt-dl.org/update if you need help updating.' %
3157                     latest_version)
3158
3159     def _setup_opener(self):
3160         timeout_val = self.params.get('socket_timeout')
3161         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
3162
3163         opts_cookiefile = self.params.get('cookiefile')
3164         opts_proxy = self.params.get('proxy')
3165
3166         if opts_cookiefile is None:
3167             self.cookiejar = compat_cookiejar.CookieJar()
3168         else:
3169             opts_cookiefile = expand_path(opts_cookiefile)
3170             self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
3171             if os.access(opts_cookiefile, os.R_OK):
3172                 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
3173
3174         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3175         if opts_proxy is not None:
3176             if opts_proxy == '':
3177                 proxies = {}
3178             else:
3179                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3180         else:
3181             proxies = compat_urllib_request.getproxies()
3182             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3183             if 'http' in proxies and 'https' not in proxies:
3184                 proxies['https'] = proxies['http']
3185         proxy_handler = PerRequestProxyHandler(proxies)
3186
3187         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3188         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3189         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3190         redirect_handler = YoutubeDLRedirectHandler()
3191         data_handler = compat_urllib_request_DataHandler()
3192
3193         # When passing our own FileHandler instance, build_opener won't add the
3194         # default FileHandler and allows us to disable the file protocol, which
3195         # can be used for malicious purposes (see
3196         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3197         file_handler = compat_urllib_request.FileHandler()
3198
3199         def file_open(*args, **kwargs):
3200             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3201         file_handler.file_open = file_open
3202
3203         opener = compat_urllib_request.build_opener(
3204             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3205
3206         # Delete the default user-agent header, which would otherwise apply in
3207         # cases where our custom HTTP handler doesn't come into play
3208         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3209         opener.addheaders = []
3210         self._opener = opener
3211
3212     def encode(self, s):
3213         if isinstance(s, bytes):
3214             return s  # Already encoded
3215
3216         try:
3217             return s.encode(self.get_encoding())
3218         except UnicodeEncodeError as err:
3219             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3220             raise
3221
3222     def get_encoding(self):
3223         encoding = self.params.get('encoding')
3224         if encoding is None:
3225             encoding = preferredencoding()
3226         return encoding
3227
3228     def _write_thumbnails(self, info_dict, filename):  # return the extensions
3229         write_all = self.params.get('write_all_thumbnails', False)
3230         thumbnails = []
3231         if write_all or self.params.get('writethumbnail', False):
3232             thumbnails = info_dict.get('thumbnails') or []
3233         multiple = write_all and len(thumbnails) > 1
3234
3235         ret = []
3236         for t in thumbnails[::1 if write_all else -1]:
3237             thumb_ext = determine_ext(t['url'], 'jpg')
3238             suffix = '%s.' % t['id'] if multiple else ''
3239             thumb_display_id = '%s ' % t['id'] if multiple else ''
3240             thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
3241
3242             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
3243                 ret.append(suffix + thumb_ext)
3244                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
3245                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
3246             else:
3247                 self.to_screen('[%s] %s: Downloading thumbnail %s ...' %
3248                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
3249                 try:
3250                     uf = self.urlopen(t['url'])
3251                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3252                         shutil.copyfileobj(uf, thumbf)
3253                     ret.append(suffix + thumb_ext)
3254                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
3255                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
3256                     t['filepath'] = thumb_filename
3257                 except network_exceptions as err:
3258                     self.report_warning('Unable to download thumbnail "%s": %s' %
3259                                         (t['url'], error_to_compat_str(err)))
3260             if ret and not write_all:
3261                 break
3262         return ret