yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import sys
  23 import tempfile
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28
  29 from string import ascii_letters
  30 from zipimport import zipimporter
  31
  32 from .compat import (
  33     compat_basestring,
  34     compat_cookiejar,
  35     compat_get_terminal_size,
  36     compat_kwargs,
  37     compat_numeric_types,
  38     compat_os_name,
  39     compat_str,
  40     compat_tokenize_tokenize,
  41     compat_urllib_error,
  42     compat_urllib_request,
  43     compat_urllib_request_DataHandler,
  44 )
  45 from .utils import (
  46     age_restricted,
  47     args_to_str,
  48     ContentTooShortError,
  49     date_from_str,
  50     DateRange,
  51     DEFAULT_OUTTMPL,
  52     determine_ext,
  53     determine_protocol,
  54     DOT_DESKTOP_LINK_TEMPLATE,
  55     DOT_URL_LINK_TEMPLATE,
  56     DOT_WEBLOC_LINK_TEMPLATE,
  57     DownloadError,
  58     encode_compat_str,
  59     encodeFilename,
  60     EntryNotInPlaylist,
  61     error_to_compat_str,
  62     ExistingVideoReached,
  63     expand_path,
  64     ExtractorError,
  65     float_or_none,
  66     format_bytes,
  67     format_field,
  68     STR_FORMAT_RE,
  69     formatSeconds,
  70     GeoRestrictedError,
  71     HEADRequest,
  72     int_or_none,
  73     iri_to_uri,
  74     ISO3166Utils,
  75     LazyList,
  76     locked_file,
  77     make_dir,
  78     make_HTTPS_handler,
  79     MaxDownloadsReached,
  80     network_exceptions,
  81     orderedSet,
  82     OUTTMPL_TYPES,
  83     PagedList,
  84     parse_filesize,
  85     PerRequestProxyHandler,
  86     platform_name,
  87     PostProcessingError,
  88     preferredencoding,
  89     prepend_extension,
  90     process_communicate_or_kill,
  91     register_socks_protocols,
  92     RejectedVideoReached,
  93     render_table,
  94     replace_extension,
  95     SameFileError,
  96     sanitize_filename,
  97     sanitize_path,
  98     sanitize_url,
  99     sanitized_Request,
 100     std_headers,
 101     str_or_none,
 102     strftime_or_none,
 103     subtitles_filename,
 104     ThrottledDownload,
 105     to_high_limit_path,
 106     traverse_obj,
 107     try_get,
 108     UnavailableVideoError,
 109     url_basename,
 110     version_tuple,
 111     write_json_file,
 112     write_string,
 113     YoutubeDLCookieJar,
 114     YoutubeDLCookieProcessor,
 115     YoutubeDLHandler,
 116     YoutubeDLRedirectHandler,
 117 )
 118 from .cache import Cache
 119 from .extractor import (
 120     gen_extractor_classes,
 121     get_info_extractor,
 122     _LAZY_LOADER,
 123     _PLUGIN_CLASSES
 124 )
 125 from .extractor.openload import PhantomJSwrapper
 126 from .downloader import (
 127     get_suitable_downloader,
 128     shorten_protocol_name
 129 )
 130 from .downloader.rtmp import rtmpdump_version
 131 from .postprocessor import (
 132     get_postprocessor,
 133     FFmpegFixupDurationPP,
 134     FFmpegFixupM3u8PP,
 135     FFmpegFixupM4aPP,
 136     FFmpegFixupStretchedPP,
 137     FFmpegFixupTimestampPP,
 138     FFmpegMergerPP,
 139     FFmpegPostProcessor,
 140     MoveFilesAfterDownloadPP,
 141 )
 142 from .version import __version__
 143
 144 if compat_os_name == 'nt':
 145     import ctypes
 146
 147
 148 class YoutubeDL(object):
 149     """YoutubeDL class.
 150
 151     YoutubeDL objects are the ones responsible of downloading the
 152     actual video file and writing it to disk if the user has requested
 153     it, among some other tasks. In most cases there should be one per
 154     program. As, given a video URL, the downloader doesn't know how to
 155     extract all the needed information, task that InfoExtractors do, it
 156     has to pass the URL to one of them.
 157
 158     For this, YoutubeDL objects have a method that allows
 159     InfoExtractors to be registered in a given order. When it is passed
 160     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 161     finds that reports being able to handle it. The InfoExtractor extracts
 162     all the information about the video or videos the URL refers to, and
 163     YoutubeDL process the extracted information, possibly using a File
 164     Downloader to download the video.
 165
 166     YoutubeDL objects accept a lot of parameters. In order not to saturate
 167     the object constructor with arguments, it receives a dictionary of
 168     options instead. These options are available through the params
 169     attribute for the InfoExtractors to use. The YoutubeDL also
 170     registers itself as the downloader in charge for the InfoExtractors
 171     that are added to it, so this is a "mutual registration".
 172
 173     Available options:
 174
 175     username:          Username for authentication purposes.
 176     password:          Password for authentication purposes.
 177     videopassword:     Password for accessing a video.
 178     ap_mso:            Adobe Pass multiple-system operator identifier.
 179     ap_username:       Multiple-system operator account username.
 180     ap_password:       Multiple-system operator account password.
 181     usenetrc:          Use netrc for authentication instead.
 182     verbose:           Print additional info to stdout.
 183     quiet:             Do not print messages to stdout.
 184     no_warnings:       Do not print out anything for warnings.
 185     forceprint:        A list of templates to force print
 186     forceurl:          Force printing final URL. (Deprecated)
 187     forcetitle:        Force printing title. (Deprecated)
 188     forceid:           Force printing ID. (Deprecated)
 189     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 190     forcedescription:  Force printing description. (Deprecated)
 191     forcefilename:     Force printing final filename. (Deprecated)
 192     forceduration:     Force printing duration. (Deprecated)
 193     forcejson:         Force printing info_dict as JSON.
 194     dump_single_json:  Force printing the info_dict of the whole playlist
 195                        (or video) as a single JSON line.
 196     force_write_download_archive: Force writing download archive regardless
 197                        of 'skip_download' or 'simulate'.
 198     simulate:          Do not download the video files.
 199     format:            Video format code. see "FORMAT SELECTION" for more details.
 200     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 201     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 202                        extracting metadata even if the video is not actually
 203                        available for download (experimental)
 204     format_sort:       How to sort the video formats. see "Sorting Formats"
 205                        for more details.
 206     format_sort_force: Force the given format_sort. see "Sorting Formats"
 207                        for more details.
 208     allow_multiple_video_streams:   Allow multiple video streams to be merged
 209                        into a single file
 210     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 211                        into a single file
 212     paths:             Dictionary of output paths. The allowed keys are 'home'
 213                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 214     outtmpl:           Dictionary of templates for output names. Allowed keys
 215                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 216                        A string a also accepted for backward compatibility
 217     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 218     restrictfilenames: Do not allow "&" and spaces in file names
 219     trim_file_name:    Limit length of filename (extension excluded)
 220     windowsfilenames:  Force the filenames to be windows compatible
 221     ignoreerrors:      Do not stop on download errors
 222                        (Default True when running yt-dlp,
 223                        but False when directly accessing YoutubeDL class)
 224     skip_playlist_after_errors: Number of allowed failures until the rest of
 225                        the playlist is skipped
 226     force_generic_extractor: Force downloader to use the generic extractor
 227     overwrites:        Overwrite all video and metadata files if True,
 228                        overwrite only non-video files if None
 229                        and don't overwrite any file if False
 230     playliststart:     Playlist item to start at.
 231     playlistend:       Playlist item to end at.
 232     playlist_items:    Specific indices of playlist to download.
 233     playlistreverse:   Download playlist items in reverse order.
 234     playlistrandom:    Download playlist items in random order.
 235     matchtitle:        Download only matching titles.
 236     rejecttitle:       Reject downloads for matching titles.
 237     logger:            Log messages to a logging.Logger instance.
 238     logtostderr:       Log messages to stderr instead of stdout.
 239     writedescription:  Write the video description to a .description file
 240     writeinfojson:     Write the video description to a .info.json file
 241     clean_infojson:    Remove private fields from the infojson
 242     writecomments:     Extract video comments. This will not be written to disk
 243                        unless writeinfojson is also given
 244     writeannotations:  Write the video annotations to a .annotations.xml file
 245     writethumbnail:    Write the thumbnail image to a file
 246     allow_playlist_files: Whether to write playlists' description, infojson etc
 247                        also to disk when using the 'write*' options
 248     write_all_thumbnails:  Write all thumbnail formats to files
 249     writelink:         Write an internet shortcut file, depending on the
 250                        current platform (.url/.webloc/.desktop)
 251     writeurllink:      Write a Windows internet shortcut file (.url)
 252     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 253     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 254     writesubtitles:    Write the video subtitles to a file
 255     writeautomaticsub: Write the automatically generated subtitles to a file
 256     allsubtitles:      Deprecated - Use subtitlelangs = ['all']
 257                        Downloads all the subtitles of the video
 258                        (requires writesubtitles or writeautomaticsub)
 259     listsubtitles:     Lists all available subtitles for the video
 260     subtitlesformat:   The format code for subtitles
 261     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 262                        The list may contain "all" to refer to all the available
 263                        subtitles. The language can be prefixed with a "-" to
 264                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 265     keepvideo:         Keep the video file after post-processing
 266     daterange:         A DateRange object, download only if the upload_date is in the range.
 267     skip_download:     Skip the actual download of the video file
 268     cachedir:          Location of the cache files in the filesystem.
 269                        False to disable filesystem cache.
 270     noplaylist:        Download single video instead of a playlist if in doubt.
 271     age_limit:         An integer representing the user's age in years.
 272                        Unsuitable videos for the given age are skipped.
 273     min_views:         An integer representing the minimum view count the video
 274                        must have in order to not be skipped.
 275                        Videos without view count information are always
 276                        downloaded. None for no limit.
 277     max_views:         An integer representing the maximum view count.
 278                        Videos that are more popular than that are not
 279                        downloaded.
 280                        Videos without view count information are always
 281                        downloaded. None for no limit.
 282     download_archive:  File name of a file where all downloads are recorded.
 283                        Videos already present in the file are not downloaded
 284                        again.
 285     break_on_existing: Stop the download process after attempting to download a
 286                        file that is in the archive.
 287     break_on_reject:   Stop the download process when encountering a video that
 288                        has been filtered out.
 289     cookiefile:        File name where cookies should be read from and dumped to
 290     nocheckcertificate:Do not verify SSL certificates
 291     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 292                        At the moment, this is only supported by YouTube.
 293     proxy:             URL of the proxy server to use
 294     geo_verification_proxy:  URL of the proxy to use for IP address verification
 295                        on geo-restricted sites.
 296     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 297     bidi_workaround:   Work around buggy terminals without bidirectional text
 298                        support, using fridibi
 299     debug_printtraffic:Print out sent and received HTTP traffic
 300     include_ads:       Download ads as well
 301     default_search:    Prepend this string if an input url is not valid.
 302                        'auto' for elaborate guessing
 303     encoding:          Use this encoding instead of the system-specified.
 304     extract_flat:      Do not resolve URLs, return the immediate result.
 305                        Pass in 'in_playlist' to only show this behavior for
 306                        playlist items.
 307     postprocessors:    A list of dictionaries, each with an entry
 308                        * key:  The name of the postprocessor. See
 309                                yt_dlp/postprocessor/__init__.py for a list.
 310                        * when: When to run the postprocessor. Can be one of
 311                                pre_process|before_dl|post_process|after_move.
 312                                Assumed to be 'post_process' if not given
 313     post_hooks:        A list of functions that get called as the final step
 314                        for each video file, after all postprocessors have been
 315                        called. The filename will be passed as the only argument.
 316     progress_hooks:    A list of functions that get called on download
 317                        progress, with a dictionary with the entries
 318                        * status: One of "downloading", "error", or "finished".
 319                                  Check this first and ignore unknown values.
 320
 321                        If status is one of "downloading", or "finished", the
 322                        following properties may also be present:
 323                        * filename: The final filename (always present)
 324                        * tmpfilename: The filename we're currently writing to
 325                        * downloaded_bytes: Bytes on disk
 326                        * total_bytes: Size of the whole file, None if unknown
 327                        * total_bytes_estimate: Guess of the eventual file size,
 328                                                None if unavailable.
 329                        * elapsed: The number of seconds since download started.
 330                        * eta: The estimated time in seconds, None if unknown
 331                        * speed: The download speed in bytes/second, None if
 332                                 unknown
 333                        * fragment_index: The counter of the currently
 334                                          downloaded video fragment.
 335                        * fragment_count: The number of fragments (= individual
 336                                          files that will be merged)
 337
 338                        Progress hooks are guaranteed to be called at least once
 339                        (with status "finished") if the download is successful.
 340     merge_output_format: Extension to use when merging formats.
 341     final_ext:         Expected final extension; used to detect when the file was
 342                        already downloaded and converted. "merge_output_format" is
 343                        replaced by this extension when given
 344     fixup:             Automatically correct known faults of the file.
 345                        One of:
 346                        - "never": do nothing
 347                        - "warn": only emit a warning
 348                        - "detect_or_warn": check whether we can do anything
 349                                            about it, warn otherwise (default)
 350     source_address:    Client-side IP address to bind to.
 351     call_home:         Boolean, true iff we are allowed to contact the
 352                        yt-dlp servers for debugging. (BROKEN)
 353     sleep_interval_requests: Number of seconds to sleep between requests
 354                        during extraction
 355     sleep_interval:    Number of seconds to sleep before each download when
 356                        used alone or a lower bound of a range for randomized
 357                        sleep before each download (minimum possible number
 358                        of seconds to sleep) when used along with
 359                        max_sleep_interval.
 360     max_sleep_interval:Upper bound of a range for randomized sleep before each
 361                        download (maximum possible number of seconds to sleep).
 362                        Must only be used along with sleep_interval.
 363                        Actual sleep time will be a random float from range
 364                        [sleep_interval; max_sleep_interval].
 365     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 366     listformats:       Print an overview of available video formats and exit.
 367     list_thumbnails:   Print a table of all thumbnails and exit.
 368     match_filter:      A function that gets called with the info_dict of
 369                        every video.
 370                        If it returns a message, the video is ignored.
 371                        If it returns None, the video is downloaded.
 372                        match_filter_func in utils.py is one example for this.
 373     no_color:          Do not emit color codes in output.
 374     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 375                        HTTP header
 376     geo_bypass_country:
 377                        Two-letter ISO 3166-2 country code that will be used for
 378                        explicit geographic restriction bypassing via faking
 379                        X-Forwarded-For HTTP header
 380     geo_bypass_ip_block:
 381                        IP range in CIDR notation that will be used similarly to
 382                        geo_bypass_country
 383
 384     The following options determine which downloader is picked:
 385     external_downloader: A dictionary of protocol keys and the executable of the
 386                        external downloader to use for it. The allowed protocols
 387                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 388                        Set the value to 'native' to use the native downloader
 389     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 390                        or {'m3u8': 'ffmpeg'} instead.
 391                        Use the native HLS downloader instead of ffmpeg/avconv
 392                        if True, otherwise use ffmpeg/avconv if False, otherwise
 393                        use downloader suggested by extractor if None.
 394     compat_opts:       Compatibility options. See "Differences in default behavior".
 395                        Note that only format-sort, format-spec, no-live-chat,
 396                        no-attach-info-json, playlist-index, list-formats,
 397                        no-direct-merge, embed-thumbnail-atomicparsley,
 398                        no-youtube-unavailable-videos, no-youtube-channel-redirect,
 399                        works when used via the API
 400
 401     The following parameters are not used by YoutubeDL itself, they are used by
 402     the downloader (see yt_dlp/downloader/common.py):
 403     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 404     max_filesize, test, noresizebuffer, retries, continuedl, noprogress, consoletitle,
 405     xattr_set_filesize, external_downloader_args, hls_use_mpegts, http_chunk_size.
 406
 407     The following options are used by the post processors:
 408     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 409                        otherwise prefer ffmpeg. (avconv support is deprecated)
 410     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 411                        to the binary or its containing directory.
 412     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 413                         and a list of additional command-line arguments for the
 414                         postprocessor/executable. The dict can also have "PP+EXE" keys
 415                         which are used when the given exe is used by the given PP.
 416                         Use 'default' as the name for arguments to passed to all PP
 417
 418     The following options are used by the extractors:
 419     extractor_retries: Number of times to retry for known errors
 420     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 421     hls_split_discontinuity: Split HLS playlists to different formats at
 422                        discontinuities such as ad breaks (default: False)
 423     extractor_args:    A dictionary of arguments to be passed to the extractors.
 424                        See "EXTRACTOR ARGUMENTS" for details.
 425                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 426     youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
 427                        If True (default), DASH manifests and related
 428                        data will be downloaded and processed by extractor.
 429                        You can reduce network I/O by disabling it if you don't
 430                        care about DASH. (only for youtube)
 431     youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
 432                        If True (default), HLS manifests and related
 433                        data will be downloaded and processed by extractor.
 434                        You can reduce network I/O by disabling it if you don't
 435                        care about HLS. (only for youtube)
 436     """
 437
 438     _NUMERIC_FIELDS = set((
 439         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 440         'timestamp', 'upload_year', 'upload_month', 'upload_day',
 441         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 442         'average_rating', 'comment_count', 'age_limit',
 443         'start_time', 'end_time',
 444         'chapter_number', 'season_number', 'episode_number',
 445         'track_number', 'disc_number', 'release_year',
 446         'playlist_index',
 447     ))
 448
 449     params = None
 450     _ies = []
 451     _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 452     __prepare_filename_warned = False
 453     _first_webpage_request = True
 454     _download_retcode = None
 455     _num_downloads = None
 456     _playlist_level = 0
 457     _playlist_urls = set()
 458     _screen_file = None
 459
 460     def __init__(self, params=None, auto_init=True):
 461         """Create a FileDownloader object with the given options."""
 462         if params is None:
 463             params = {}
 464         self._ies = []
 465         self._ies_instances = {}
 466         self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 467         self.__prepare_filename_warned = False
 468         self._first_webpage_request = True
 469         self._post_hooks = []
 470         self._progress_hooks = []
 471         self._download_retcode = 0
 472         self._num_downloads = 0
 473         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 474         self._err_file = sys.stderr
 475         self.params = {
 476             # Default parameters
 477             'nocheckcertificate': False,
 478         }
 479         self.params.update(params)
 480         self.cache = Cache(self)
 481
 482         if sys.version_info < (3, 6):
 483             self.report_warning(
 484                 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 485
 486         def check_deprecated(param, option, suggestion):
 487             if self.params.get(param) is not None:
 488                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 489                 return True
 490             return False
 491
 492         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 493             if self.params.get('geo_verification_proxy') is None:
 494                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 495
 496         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 497         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 498         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 499
 500         for msg in self.params.get('warnings', []):
 501             self.report_warning(msg)
 502
 503         if self.params.get('final_ext'):
 504             if self.params.get('merge_output_format'):
 505                 self.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
 506             self.params['merge_output_format'] = self.params['final_ext']
 507
 508         if 'overwrites' in self.params and self.params['overwrites'] is None:
 509             del self.params['overwrites']
 510
 511         if params.get('bidi_workaround', False):
 512             try:
 513                 import pty
 514                 master, slave = pty.openpty()
 515                 width = compat_get_terminal_size().columns
 516                 if width is None:
 517                     width_args = []
 518                 else:
 519                     width_args = ['-w', str(width)]
 520                 sp_kwargs = dict(
 521                     stdin=subprocess.PIPE,
 522                     stdout=slave,
 523                     stderr=self._err_file)
 524                 try:
 525                     self._output_process = subprocess.Popen(
 526                         ['bidiv'] + width_args, **sp_kwargs
 527                     )
 528                 except OSError:
 529                     self._output_process = subprocess.Popen(
 530                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 531                 self._output_channel = os.fdopen(master, 'rb')
 532             except OSError as ose:
 533                 if ose.errno == errno.ENOENT:
 534                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 535                 else:
 536                     raise
 537
 538         if (sys.platform != 'win32'
 539                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 540                 and not params.get('restrictfilenames', False)):
 541             # Unicode filesystem API will throw errors (#1474, #13027)
 542             self.report_warning(
 543                 'Assuming --restrict-filenames since file system encoding '
 544                 'cannot encode all characters. '
 545                 'Set the LC_ALL environment variable to fix this.')
 546             self.params['restrictfilenames'] = True
 547
 548         self.outtmpl_dict = self.parse_outtmpl()
 549
 550         # Creating format selector here allows us to catch syntax errors before the extraction
 551         self.format_selector = (
 552             None if self.params.get('format') is None
 553             else self.build_format_selector(self.params['format']))
 554
 555         self._setup_opener()
 556
 557         """Preload the archive, if any is specified"""
 558         def preload_download_archive(fn):
 559             if fn is None:
 560                 return False
 561             self.write_debug('Loading archive file %r\n' % fn)
 562             try:
 563                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 564                     for line in archive_file:
 565                         self.archive.add(line.strip())
 566             except IOError as ioe:
 567                 if ioe.errno != errno.ENOENT:
 568                     raise
 569                 return False
 570             return True
 571
 572         self.archive = set()
 573         preload_download_archive(self.params.get('download_archive'))
 574
 575         if auto_init:
 576             self.print_debug_header()
 577             self.add_default_info_extractors()
 578
 579         for pp_def_raw in self.params.get('postprocessors', []):
 580             pp_def = dict(pp_def_raw)
 581             when = pp_def.pop('when', 'post_process')
 582             pp_class = get_postprocessor(pp_def.pop('key'))
 583             pp = pp_class(self, **compat_kwargs(pp_def))
 584             self.add_post_processor(pp, when=when)
 585
 586         for ph in self.params.get('post_hooks', []):
 587             self.add_post_hook(ph)
 588
 589         for ph in self.params.get('progress_hooks', []):
 590             self.add_progress_hook(ph)
 591
 592         register_socks_protocols()
 593
 594     def warn_if_short_id(self, argv):
 595         # short YouTube ID starting with dash?
 596         idxs = [
 597             i for i, a in enumerate(argv)
 598             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 599         if idxs:
 600             correct_argv = (
 601                 ['yt-dlp']
 602                 + [a for i, a in enumerate(argv) if i not in idxs]
 603                 + ['--'] + [argv[i] for i in idxs]
 604             )
 605             self.report_warning(
 606                 'Long argument string detected. '
 607                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 608                 args_to_str(correct_argv))
 609
 610     def add_info_extractor(self, ie):
 611         """Add an InfoExtractor object to the end of the list."""
 612         self._ies.append(ie)
 613         if not isinstance(ie, type):
 614             self._ies_instances[ie.ie_key()] = ie
 615             ie.set_downloader(self)
 616
 617     def get_info_extractor(self, ie_key):
 618         """
 619         Get an instance of an IE with name ie_key, it will try to get one from
 620         the _ies list, if there's no instance it will create a new one and add
 621         it to the extractor list.
 622         """
 623         ie = self._ies_instances.get(ie_key)
 624         if ie is None:
 625             ie = get_info_extractor(ie_key)()
 626             self.add_info_extractor(ie)
 627         return ie
 628
 629     def add_default_info_extractors(self):
 630         """
 631         Add the InfoExtractors returned by gen_extractors to the end of the list
 632         """
 633         for ie in gen_extractor_classes():
 634             self.add_info_extractor(ie)
 635
 636     def add_post_processor(self, pp, when='post_process'):
 637         """Add a PostProcessor object to the end of the chain."""
 638         self._pps[when].append(pp)
 639         pp.set_downloader(self)
 640
 641     def add_post_hook(self, ph):
 642         """Add the post hook"""
 643         self._post_hooks.append(ph)
 644
 645     def add_progress_hook(self, ph):
 646         """Add the progress hook (currently only for the file downloader)"""
 647         self._progress_hooks.append(ph)
 648
 649     def _bidi_workaround(self, message):
 650         if not hasattr(self, '_output_channel'):
 651             return message
 652
 653         assert hasattr(self, '_output_process')
 654         assert isinstance(message, compat_str)
 655         line_count = message.count('\n') + 1
 656         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 657         self._output_process.stdin.flush()
 658         res = ''.join(self._output_channel.readline().decode('utf-8')
 659                       for _ in range(line_count))
 660         return res[:-len('\n')]
 661
 662     def _write_string(self, s, out=None):
 663         write_string(s, out=out, encoding=self.params.get('encoding'))
 664
 665     def to_stdout(self, message, skip_eol=False, quiet=False):
 666         """Print message to stdout"""
 667         if self.params.get('logger'):
 668             self.params['logger'].debug(message)
 669         elif not quiet or self.params.get('verbose'):
 670             self._write_string(
 671                 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 672                 self._err_file if quiet else self._screen_file)
 673
 674     def to_stderr(self, message):
 675         """Print message to stderr"""
 676         assert isinstance(message, compat_str)
 677         if self.params.get('logger'):
 678             self.params['logger'].error(message)
 679         else:
 680             self._write_string('%s\n' % self._bidi_workaround(message), self._err_file)
 681
 682     def to_console_title(self, message):
 683         if not self.params.get('consoletitle', False):
 684             return
 685         if compat_os_name == 'nt':
 686             if ctypes.windll.kernel32.GetConsoleWindow():
 687                 # c_wchar_p() might not be necessary if `message` is
 688                 # already of type unicode()
 689                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 690         elif 'TERM' in os.environ:
 691             self._write_string('\033]0;%s\007' % message, self._screen_file)
 692
 693     def save_console_title(self):
 694         if not self.params.get('consoletitle', False):
 695             return
 696         if self.params.get('simulate', False):
 697             return
 698         if compat_os_name != 'nt' and 'TERM' in os.environ:
 699             # Save the title on stack
 700             self._write_string('\033[22;0t', self._screen_file)
 701
 702     def restore_console_title(self):
 703         if not self.params.get('consoletitle', False):
 704             return
 705         if self.params.get('simulate', False):
 706             return
 707         if compat_os_name != 'nt' and 'TERM' in os.environ:
 708             # Restore the title from stack
 709             self._write_string('\033[23;0t', self._screen_file)
 710
 711     def __enter__(self):
 712         self.save_console_title()
 713         return self
 714
 715     def __exit__(self, *args):
 716         self.restore_console_title()
 717
 718         if self.params.get('cookiefile') is not None:
 719             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 720
 721     def trouble(self, message=None, tb=None):
 722         """Determine action to take when a download problem appears.
 723
 724         Depending on if the downloader has been configured to ignore
 725         download errors or not, this method may throw an exception or
 726         not when errors are found, after printing the message.
 727
 728         tb, if given, is additional traceback information.
 729         """
 730         if message is not None:
 731             self.to_stderr(message)
 732         if self.params.get('verbose'):
 733             if tb is None:
 734                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 735                     tb = ''
 736                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 737                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 738                     tb += encode_compat_str(traceback.format_exc())
 739                 else:
 740                     tb_data = traceback.format_list(traceback.extract_stack())
 741                     tb = ''.join(tb_data)
 742             if tb:
 743                 self.to_stderr(tb)
 744         if not self.params.get('ignoreerrors', False):
 745             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 746                 exc_info = sys.exc_info()[1].exc_info
 747             else:
 748                 exc_info = sys.exc_info()
 749             raise DownloadError(message, exc_info)
 750         self._download_retcode = 1
 751
 752     def to_screen(self, message, skip_eol=False):
 753         """Print message to stdout if not in quiet mode"""
 754         self.to_stdout(
 755             message, skip_eol, quiet=self.params.get('quiet', False))
 756
 757     def report_warning(self, message):
 758         '''
 759         Print the message to stderr, it will be prefixed with 'WARNING:'
 760         If stderr is a tty file the 'WARNING:' will be colored
 761         '''
 762         if self.params.get('logger') is not None:
 763             self.params['logger'].warning(message)
 764         else:
 765             if self.params.get('no_warnings'):
 766                 return
 767             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 768                 _msg_header = '\033[0;33mWARNING:\033[0m'
 769             else:
 770                 _msg_header = 'WARNING:'
 771             warning_message = '%s %s' % (_msg_header, message)
 772             self.to_stderr(warning_message)
 773
 774     def report_error(self, message, tb=None):
 775         '''
 776         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 777         in red if stderr is a tty file.
 778         '''
 779         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 780             _msg_header = '\033[0;31mERROR:\033[0m'
 781         else:
 782             _msg_header = 'ERROR:'
 783         error_message = '%s %s' % (_msg_header, message)
 784         self.trouble(error_message, tb)
 785
 786     def write_debug(self, message):
 787         '''Log debug message or Print message to stderr'''
 788         if not self.params.get('verbose', False):
 789             return
 790         message = '[debug] %s' % message
 791         if self.params.get('logger'):
 792             self.params['logger'].debug(message)
 793         else:
 794             self._write_string('%s\n' % message)
 795
 796     def report_file_already_downloaded(self, file_name):
 797         """Report file has already been fully downloaded."""
 798         try:
 799             self.to_screen('[download] %s has already been downloaded' % file_name)
 800         except UnicodeEncodeError:
 801             self.to_screen('[download] The file has already been downloaded')
 802
 803     def report_file_delete(self, file_name):
 804         """Report that existing file will be deleted."""
 805         try:
 806             self.to_screen('Deleting existing file %s' % file_name)
 807         except UnicodeEncodeError:
 808             self.to_screen('Deleting existing file')
 809
 810     def parse_outtmpl(self):
 811         outtmpl_dict = self.params.get('outtmpl', {})
 812         if not isinstance(outtmpl_dict, dict):
 813             outtmpl_dict = {'default': outtmpl_dict}
 814         outtmpl_dict.update({
 815             k: v for k, v in DEFAULT_OUTTMPL.items()
 816             if not outtmpl_dict.get(k)})
 817         for key, val in outtmpl_dict.items():
 818             if isinstance(val, bytes):
 819                 self.report_warning(
 820                     'Parameter outtmpl is bytes, but should be a unicode string. '
 821                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 822         return outtmpl_dict
 823
 824     def get_output_path(self, dir_type='', filename=None):
 825         paths = self.params.get('paths', {})
 826         assert isinstance(paths, dict)
 827         path = os.path.join(
 828             expand_path(paths.get('home', '').strip()),
 829             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
 830             filename or '')
 831
 832         # Temporary fix for #4787
 833         # 'Treat' all problem characters by passing filename through preferredencoding
 834         # to workaround encoding issues with subprocess on python2 @ Windows
 835         if sys.version_info < (3, 0) and sys.platform == 'win32':
 836             path = encodeFilename(path, True).decode(preferredencoding())
 837         return sanitize_path(path, force=self.params.get('windowsfilenames'))
 838
 839     @staticmethod
 840     def validate_outtmpl(tmpl):
 841         ''' @return None or Exception object '''
 842         try:
 843             re.sub(
 844                 STR_FORMAT_RE.format(''),
 845                 lambda mobj: ('%' if not mobj.group('has_key') else '') + mobj.group(0),
 846                 tmpl
 847             ) % collections.defaultdict(int)
 848             return None
 849         except ValueError as err:
 850             return err
 851
 852     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
 853         """ Make the template and info_dict suitable for substitution (outtmpl % info_dict)"""
 854         info_dict = dict(info_dict)
 855         na = self.params.get('outtmpl_na_placeholder', 'NA')
 856
 857         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
 858             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
 859             if info_dict.get('duration', None) is not None
 860             else None)
 861         info_dict['epoch'] = int(time.time())
 862         info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
 863         if info_dict.get('resolution') is None:
 864             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
 865
 866         # For fields playlist_index and autonumber convert all occurrences
 867         # of %(field)s to %(field)0Nd for backward compatibility
 868         field_size_compat_map = {
 869             'playlist_index': len(str(info_dict.get('_last_playlist_index') or '')),
 870             'autonumber': self.params.get('autonumber_size') or 5,
 871         }
 872
 873         TMPL_DICT = {}
 874         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE.format('[^)]*'))
 875         MATH_FUNCTIONS = {
 876             '+': float.__add__,
 877             '-': float.__sub__,
 878         }
 879         # Field is of the form key1.key2...
 880         # where keys (except first) can be string, int or slice
 881         FIELD_RE = r'\w+(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
 882         MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
 883         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
 884         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
 885             (?P<negate>-)?
 886             (?P<fields>{field})
 887             (?P<maths>(?:{math_op}{math_field})*)
 888             (?:>(?P<strf_format>.+?))?
 889             (?:\|(?P<default>.*?))?
 890             $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
 891
 892         get_key = lambda k: traverse_obj(
 893             info_dict, k.split('.'), is_user_input=True, traverse_string=True)
 894
 895         def get_value(mdict):
 896             # Object traversal
 897             value = get_key(mdict['fields'])
 898             # Negative
 899             if mdict['negate']:
 900                 value = float_or_none(value)
 901                 if value is not None:
 902                     value *= -1
 903             # Do maths
 904             offset_key = mdict['maths']
 905             if offset_key:
 906                 value = float_or_none(value)
 907                 operator = None
 908                 while offset_key:
 909                     item = re.match(
 910                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
 911                         offset_key).group(0)
 912                     offset_key = offset_key[len(item):]
 913                     if operator is None:
 914                         operator = MATH_FUNCTIONS[item]
 915                         continue
 916                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
 917                     offset = float_or_none(item)
 918                     if offset is None:
 919                         offset = float_or_none(get_key(item))
 920                     try:
 921                         value = operator(value, multiplier * offset)
 922                     except (TypeError, ZeroDivisionError):
 923                         return None
 924                     operator = None
 925             # Datetime formatting
 926             if mdict['strf_format']:
 927                 value = strftime_or_none(value, mdict['strf_format'])
 928
 929             return value
 930
 931         def create_key(outer_mobj):
 932             if not outer_mobj.group('has_key'):
 933                 return '%{}'.format(outer_mobj.group(0))
 934
 935             key = outer_mobj.group('key')
 936             fmt = outer_mobj.group('format')
 937             mobj = re.match(INTERNAL_FORMAT_RE, key)
 938             if mobj is None:
 939                 value, default, mobj = None, na, {'fields': ''}
 940             else:
 941                 mobj = mobj.groupdict()
 942                 default = mobj['default'] if mobj['default'] is not None else na
 943                 value = get_value(mobj)
 944
 945             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
 946                 fmt = '0{:d}d'.format(field_size_compat_map[key])
 947
 948             value = default if value is None else value
 949
 950             if fmt == 'c':
 951                 value = compat_str(value)
 952                 if value is None:
 953                     value, fmt = default, 's'
 954                 else:
 955                     value = value[0]
 956             elif fmt[-1] not in 'rs':  # numeric
 957                 value = float_or_none(value)
 958                 if value is None:
 959                     value, fmt = default, 's'
 960             if sanitize:
 961                 if fmt[-1] == 'r':
 962                     # If value is an object, sanitize might convert it to a string
 963                     # So we convert it to repr first
 964                     value, fmt = repr(value), '%ss' % fmt[:-1]
 965                 if fmt[-1] in 'csr':
 966                     value = sanitize(mobj['fields'].split('.')[-1], value)
 967             key += '\0%s' % fmt
 968             TMPL_DICT[key] = value
 969             return '%({key}){fmt}'.format(key=key, fmt=fmt)
 970
 971         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
 972
 973     def _prepare_filename(self, info_dict, tmpl_type='default'):
 974         try:
 975             sanitize = lambda k, v: sanitize_filename(
 976                 compat_str(v),
 977                 restricted=self.params.get('restrictfilenames'),
 978                 is_id=(k == 'id' or k.endswith('_id')))
 979             outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
 980             outtmpl, template_dict = self.prepare_outtmpl(outtmpl, info_dict, sanitize)
 981
 982             # expand_path translates '%%' into '%' and '$$' into '$'
 983             # correspondingly that is not what we want since we need to keep
 984             # '%%' intact for template dict substitution step. Working around
 985             # with boundary-alike separator hack.
 986             sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 987             outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 988
 989             # outtmpl should be expand_path'ed before template dict substitution
 990             # because meta fields may contain env variables we don't want to
 991             # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 992             # title "Hello $PATH", we don't want `$PATH` to be expanded.
 993             filename = expand_path(outtmpl).replace(sep, '') % template_dict
 994
 995             force_ext = OUTTMPL_TYPES.get(tmpl_type)
 996             if force_ext is not None:
 997                 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
 998
 999             # https://github.com/blackjack4494/youtube-dlc/issues/85
1000             trim_file_name = self.params.get('trim_file_name', False)
1001             if trim_file_name:
1002                 fn_groups = filename.rsplit('.')
1003                 ext = fn_groups[-1]
1004                 sub_ext = ''
1005                 if len(fn_groups) > 2:
1006                     sub_ext = fn_groups[-2]
1007                 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
1008
1009             return filename
1010         except ValueError as err:
1011             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1012             return None
1013
1014     def prepare_filename(self, info_dict, dir_type='', warn=False):
1015         """Generate the output filename."""
1016
1017         filename = self._prepare_filename(info_dict, dir_type or 'default')
1018
1019         if warn and not self.__prepare_filename_warned:
1020             if not self.params.get('paths'):
1021                 pass
1022             elif filename == '-':
1023                 self.report_warning('--paths is ignored when an outputting to stdout')
1024             elif os.path.isabs(filename):
1025                 self.report_warning('--paths is ignored since an absolute path is given in output template')
1026             self.__prepare_filename_warned = True
1027         if filename == '-' or not filename:
1028             return filename
1029
1030         return self.get_output_path(dir_type, filename)
1031
1032     def _match_entry(self, info_dict, incomplete=False, silent=False):
1033         """ Returns None if the file should be downloaded """
1034
1035         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1036
1037         def check_filter():
1038             if 'title' in info_dict:
1039                 # This can happen when we're just evaluating the playlist
1040                 title = info_dict['title']
1041                 matchtitle = self.params.get('matchtitle', False)
1042                 if matchtitle:
1043                     if not re.search(matchtitle, title, re.IGNORECASE):
1044                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1045                 rejecttitle = self.params.get('rejecttitle', False)
1046                 if rejecttitle:
1047                     if re.search(rejecttitle, title, re.IGNORECASE):
1048                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1049             date = info_dict.get('upload_date')
1050             if date is not None:
1051                 dateRange = self.params.get('daterange', DateRange())
1052                 if date not in dateRange:
1053                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1054             view_count = info_dict.get('view_count')
1055             if view_count is not None:
1056                 min_views = self.params.get('min_views')
1057                 if min_views is not None and view_count < min_views:
1058                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1059                 max_views = self.params.get('max_views')
1060                 if max_views is not None and view_count > max_views:
1061                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1062             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1063                 return 'Skipping "%s" because it is age restricted' % video_title
1064
1065             if not incomplete:
1066                 match_filter = self.params.get('match_filter')
1067                 if match_filter is not None:
1068                     ret = match_filter(info_dict)
1069                     if ret is not None:
1070                         return ret
1071             return None
1072
1073         if self.in_download_archive(info_dict):
1074             reason = '%s has already been recorded in the archive' % video_title
1075             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1076         else:
1077             reason = check_filter()
1078             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1079         if reason is not None:
1080             if not silent:
1081                 self.to_screen('[download] ' + reason)
1082             if self.params.get(break_opt, False):
1083                 raise break_err()
1084         return reason
1085
1086     @staticmethod
1087     def add_extra_info(info_dict, extra_info):
1088         '''Set the keys from extra_info in info dict if they are missing'''
1089         for key, value in extra_info.items():
1090             info_dict.setdefault(key, value)
1091
1092     def extract_info(self, url, download=True, ie_key=None, extra_info={},
1093                      process=True, force_generic_extractor=False):
1094         """
1095         Return a list with a dictionary for each video extracted.
1096
1097         Arguments:
1098         url -- URL to extract
1099
1100         Keyword arguments:
1101         download -- whether to download videos during extraction
1102         ie_key -- extractor key hint
1103         extra_info -- dictionary containing the extra values to add to each result
1104         process -- whether to resolve all unresolved references (URLs, playlist items),
1105             must be True for download to work.
1106         force_generic_extractor -- force using the generic extractor
1107         """
1108
1109         if not ie_key and force_generic_extractor:
1110             ie_key = 'Generic'
1111
1112         if ie_key:
1113             ies = [self.get_info_extractor(ie_key)]
1114         else:
1115             ies = self._ies
1116
1117         for ie in ies:
1118             if not ie.suitable(url):
1119                 continue
1120
1121             ie_key = ie.ie_key()
1122             ie = self.get_info_extractor(ie_key)
1123             if not ie.working():
1124                 self.report_warning('The program functionality for this site has been marked as broken, '
1125                                     'and will probably not work.')
1126
1127             try:
1128                 temp_id = str_or_none(
1129                     ie.extract_id(url) if callable(getattr(ie, 'extract_id', None))
1130                     else ie._match_id(url))
1131             except (AssertionError, IndexError, AttributeError):
1132                 temp_id = None
1133             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1134                 self.to_screen("[%s] %s: has already been recorded in archive" % (
1135                                ie_key, temp_id))
1136                 break
1137             return self.__extract_info(url, ie, download, extra_info, process)
1138         else:
1139             self.report_error('no suitable InfoExtractor for URL %s' % url)
1140
1141     def __handle_extraction_exceptions(func):
1142         def wrapper(self, *args, **kwargs):
1143             try:
1144                 return func(self, *args, **kwargs)
1145             except GeoRestrictedError as e:
1146                 msg = e.msg
1147                 if e.countries:
1148                     msg += '\nThis video is available in %s.' % ', '.join(
1149                         map(ISO3166Utils.short2full, e.countries))
1150                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1151                 self.report_error(msg)
1152             except ExtractorError as e:  # An error we somewhat expected
1153                 self.report_error(compat_str(e), e.format_traceback())
1154             except ThrottledDownload:
1155                 self.to_stderr('\r')
1156                 self.report_warning('The download speed is below throttle limit. Re-extracting data')
1157                 return wrapper(self, *args, **kwargs)
1158             except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
1159                 raise
1160             except Exception as e:
1161                 if self.params.get('ignoreerrors', False):
1162                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
1163                 else:
1164                     raise
1165         return wrapper
1166
1167     @__handle_extraction_exceptions
1168     def __extract_info(self, url, ie, download, extra_info, process):
1169         ie_result = ie.extract(url)
1170         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1171             return
1172         if isinstance(ie_result, list):
1173             # Backwards compatibility: old IE result format
1174             ie_result = {
1175                 '_type': 'compat_list',
1176                 'entries': ie_result,
1177             }
1178         self.add_default_extra_info(ie_result, ie, url)
1179         if process:
1180             return self.process_ie_result(ie_result, download, extra_info)
1181         else:
1182             return ie_result
1183
1184     def add_default_extra_info(self, ie_result, ie, url):
1185         if url is not None:
1186             self.add_extra_info(ie_result, {
1187                 'webpage_url': url,
1188                 'original_url': url,
1189                 'webpage_url_basename': url_basename(url),
1190             })
1191         if ie is not None:
1192             self.add_extra_info(ie_result, {
1193                 'extractor': ie.IE_NAME,
1194                 'extractor_key': ie.ie_key(),
1195             })
1196
1197     def process_ie_result(self, ie_result, download=True, extra_info={}):
1198         """
1199         Take the result of the ie(may be modified) and resolve all unresolved
1200         references (URLs, playlist items).
1201
1202         It will also download the videos if 'download'.
1203         Returns the resolved ie_result.
1204         """
1205         result_type = ie_result.get('_type', 'video')
1206
1207         if result_type in ('url', 'url_transparent'):
1208             ie_result['url'] = sanitize_url(ie_result['url'])
1209             extract_flat = self.params.get('extract_flat', False)
1210             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1211                     or extract_flat is True):
1212                 info_copy = ie_result.copy()
1213                 self.add_extra_info(info_copy, extra_info)
1214                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1215                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1216                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1217                 return ie_result
1218
1219         if result_type == 'video':
1220             self.add_extra_info(ie_result, extra_info)
1221             ie_result = self.process_video_result(ie_result, download=download)
1222             additional_urls = (ie_result or {}).get('additional_urls')
1223             if additional_urls:
1224                 # TODO: Improve MetadataFromFieldPP to allow setting a list
1225                 if isinstance(additional_urls, compat_str):
1226                     additional_urls = [additional_urls]
1227                 self.to_screen(
1228                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1229                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1230                 ie_result['additional_entries'] = [
1231                     self.extract_info(
1232                         url, download, extra_info,
1233                         force_generic_extractor=self.params.get('force_generic_extractor'))
1234                     for url in additional_urls
1235                 ]
1236             return ie_result
1237         elif result_type == 'url':
1238             # We have to add extra_info to the results because it may be
1239             # contained in a playlist
1240             return self.extract_info(
1241                 ie_result['url'], download,
1242                 ie_key=ie_result.get('ie_key'),
1243                 extra_info=extra_info)
1244         elif result_type == 'url_transparent':
1245             # Use the information from the embedding page
1246             info = self.extract_info(
1247                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1248                 extra_info=extra_info, download=False, process=False)
1249
1250             # extract_info may return None when ignoreerrors is enabled and
1251             # extraction failed with an error, don't crash and return early
1252             # in this case
1253             if not info:
1254                 return info
1255
1256             force_properties = dict(
1257                 (k, v) for k, v in ie_result.items() if v is not None)
1258             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1259                 if f in force_properties:
1260                     del force_properties[f]
1261             new_result = info.copy()
1262             new_result.update(force_properties)
1263
1264             # Extracted info may not be a video result (i.e.
1265             # info.get('_type', 'video') != video) but rather an url or
1266             # url_transparent. In such cases outer metadata (from ie_result)
1267             # should be propagated to inner one (info). For this to happen
1268             # _type of info should be overridden with url_transparent. This
1269             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1270             if new_result.get('_type') == 'url':
1271                 new_result['_type'] = 'url_transparent'
1272
1273             return self.process_ie_result(
1274                 new_result, download=download, extra_info=extra_info)
1275         elif result_type in ('playlist', 'multi_video'):
1276             # Protect from infinite recursion due to recursively nested playlists
1277             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1278             webpage_url = ie_result['webpage_url']
1279             if webpage_url in self._playlist_urls:
1280                 self.to_screen(
1281                     '[download] Skipping already downloaded playlist: %s'
1282                     % ie_result.get('title') or ie_result.get('id'))
1283                 return
1284
1285             self._playlist_level += 1
1286             self._playlist_urls.add(webpage_url)
1287             self._sanitize_thumbnails(ie_result)
1288             try:
1289                 return self.__process_playlist(ie_result, download)
1290             finally:
1291                 self._playlist_level -= 1
1292                 if not self._playlist_level:
1293                     self._playlist_urls.clear()
1294         elif result_type == 'compat_list':
1295             self.report_warning(
1296                 'Extractor %s returned a compat_list result. '
1297                 'It needs to be updated.' % ie_result.get('extractor'))
1298
1299             def _fixup(r):
1300                 self.add_extra_info(
1301                     r,
1302                     {
1303                         'extractor': ie_result['extractor'],
1304                         'webpage_url': ie_result['webpage_url'],
1305                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
1306                         'extractor_key': ie_result['extractor_key'],
1307                     }
1308                 )
1309                 return r
1310             ie_result['entries'] = [
1311                 self.process_ie_result(_fixup(r), download, extra_info)
1312                 for r in ie_result['entries']
1313             ]
1314             return ie_result
1315         else:
1316             raise Exception('Invalid result type: %s' % result_type)
1317
1318     def _ensure_dir_exists(self, path):
1319         return make_dir(path, self.report_error)
1320
1321     def __process_playlist(self, ie_result, download):
1322         # We process each entry in the playlist
1323         playlist = ie_result.get('title') or ie_result.get('id')
1324         self.to_screen('[download] Downloading playlist: %s' % playlist)
1325
1326         if 'entries' not in ie_result:
1327             raise EntryNotInPlaylist()
1328         incomplete_entries = bool(ie_result.get('requested_entries'))
1329         if incomplete_entries:
1330             def fill_missing_entries(entries, indexes):
1331                 ret = [None] * max(*indexes)
1332                 for i, entry in zip(indexes, entries):
1333                     ret[i - 1] = entry
1334                 return ret
1335             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1336
1337         playlist_results = []
1338
1339         playliststart = self.params.get('playliststart', 1)
1340         playlistend = self.params.get('playlistend')
1341         # For backwards compatibility, interpret -1 as whole list
1342         if playlistend == -1:
1343             playlistend = None
1344
1345         playlistitems_str = self.params.get('playlist_items')
1346         playlistitems = None
1347         if playlistitems_str is not None:
1348             def iter_playlistitems(format):
1349                 for string_segment in format.split(','):
1350                     if '-' in string_segment:
1351                         start, end = string_segment.split('-')
1352                         for item in range(int(start), int(end) + 1):
1353                             yield int(item)
1354                     else:
1355                         yield int(string_segment)
1356             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1357
1358         ie_entries = ie_result['entries']
1359         msg = (
1360             'Downloading %d videos' if not isinstance(ie_entries, list)
1361             else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
1362         if not isinstance(ie_entries, (list, PagedList)):
1363             ie_entries = LazyList(ie_entries)
1364
1365         entries = []
1366         for i in playlistitems or itertools.count(playliststart):
1367             if playlistitems is None and playlistend is not None and playlistend < i:
1368                 break
1369             entry = None
1370             try:
1371                 entry = ie_entries[i - 1]
1372                 if entry is None:
1373                     raise EntryNotInPlaylist()
1374             except (IndexError, EntryNotInPlaylist):
1375                 if incomplete_entries:
1376                     raise EntryNotInPlaylist()
1377                 elif not playlistitems:
1378                     break
1379             entries.append(entry)
1380             try:
1381                 if entry is not None:
1382                     self._match_entry(entry, incomplete=True, silent=True)
1383             except (ExistingVideoReached, RejectedVideoReached):
1384                 break
1385         ie_result['entries'] = entries
1386
1387         # Save playlist_index before re-ordering
1388         entries = [
1389             ((playlistitems[i - 1] if playlistitems else i), entry)
1390             for i, entry in enumerate(entries, 1)
1391             if entry is not None]
1392         n_entries = len(entries)
1393
1394         if not playlistitems and (playliststart or playlistend):
1395             playlistitems = list(range(playliststart, playliststart + n_entries))
1396         ie_result['requested_entries'] = playlistitems
1397
1398         if self.params.get('allow_playlist_files', True):
1399             ie_copy = {
1400                 'playlist': playlist,
1401                 'playlist_id': ie_result.get('id'),
1402                 'playlist_title': ie_result.get('title'),
1403                 'playlist_uploader': ie_result.get('uploader'),
1404                 'playlist_uploader_id': ie_result.get('uploader_id'),
1405                 'playlist_index': 0,
1406             }
1407             ie_copy.update(dict(ie_result))
1408
1409             if self.params.get('writeinfojson', False):
1410                 infofn = self.prepare_filename(ie_copy, 'pl_infojson')
1411                 if not self._ensure_dir_exists(encodeFilename(infofn)):
1412                     return
1413                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
1414                     self.to_screen('[info] Playlist metadata is already present')
1415                 else:
1416                     self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
1417                     try:
1418                         write_json_file(self.filter_requested_info(ie_result, self.params.get('clean_infojson', True)), infofn)
1419                     except (OSError, IOError):
1420                         self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
1421
1422             # TODO: This should be passed to ThumbnailsConvertor if necessary
1423             self._write_thumbnails(ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1424
1425             if self.params.get('writedescription', False):
1426                 descfn = self.prepare_filename(ie_copy, 'pl_description')
1427                 if not self._ensure_dir_exists(encodeFilename(descfn)):
1428                     return
1429                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
1430                     self.to_screen('[info] Playlist description is already present')
1431                 elif ie_result.get('description') is None:
1432                     self.report_warning('There\'s no playlist description to write.')
1433                 else:
1434                     try:
1435                         self.to_screen('[info] Writing playlist description to: ' + descfn)
1436                         with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1437                             descfile.write(ie_result['description'])
1438                     except (OSError, IOError):
1439                         self.report_error('Cannot write playlist description file ' + descfn)
1440                         return
1441
1442         if self.params.get('playlistreverse', False):
1443             entries = entries[::-1]
1444         if self.params.get('playlistrandom', False):
1445             random.shuffle(entries)
1446
1447         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1448
1449         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1450         failures = 0
1451         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1452         for i, entry_tuple in enumerate(entries, 1):
1453             playlist_index, entry = entry_tuple
1454             if 'playlist_index' in self.params.get('compat_options', []):
1455                 playlist_index = playlistitems[i - 1] if playlistitems else i
1456             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1457             # This __x_forwarded_for_ip thing is a bit ugly but requires
1458             # minimal changes
1459             if x_forwarded_for:
1460                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1461             extra = {
1462                 'n_entries': n_entries,
1463                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1464                 'playlist_index': playlist_index,
1465                 'playlist_autonumber': i,
1466                 'playlist': playlist,
1467                 'playlist_id': ie_result.get('id'),
1468                 'playlist_title': ie_result.get('title'),
1469                 'playlist_uploader': ie_result.get('uploader'),
1470                 'playlist_uploader_id': ie_result.get('uploader_id'),
1471                 'extractor': ie_result['extractor'],
1472                 'webpage_url': ie_result['webpage_url'],
1473                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1474                 'extractor_key': ie_result['extractor_key'],
1475             }
1476
1477             if self._match_entry(entry, incomplete=True) is not None:
1478                 continue
1479
1480             entry_result = self.__process_iterable_entry(entry, download, extra)
1481             if not entry_result:
1482                 failures += 1
1483             if failures >= max_failures:
1484                 self.report_error(
1485                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1486                 break
1487             # TODO: skip failed (empty) entries?
1488             playlist_results.append(entry_result)
1489         ie_result['entries'] = playlist_results
1490         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1491         return ie_result
1492
1493     @__handle_extraction_exceptions
1494     def __process_iterable_entry(self, entry, download, extra_info):
1495         return self.process_ie_result(
1496             entry, download=download, extra_info=extra_info)
1497
1498     def _build_format_filter(self, filter_spec):
1499         " Returns a function to filter the formats according to the filter_spec "
1500
1501         OPERATORS = {
1502             '<': operator.lt,
1503             '<=': operator.le,
1504             '>': operator.gt,
1505             '>=': operator.ge,
1506             '=': operator.eq,
1507             '!=': operator.ne,
1508         }
1509         operator_rex = re.compile(r'''(?x)\s*
1510             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1511             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1512             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1513             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1514         m = operator_rex.fullmatch(filter_spec)
1515         if m:
1516             try:
1517                 comparison_value = int(m.group('value'))
1518             except ValueError:
1519                 comparison_value = parse_filesize(m.group('value'))
1520                 if comparison_value is None:
1521                     comparison_value = parse_filesize(m.group('value') + 'B')
1522                 if comparison_value is None:
1523                     raise ValueError(
1524                         'Invalid value %r in format specification %r' % (
1525                             m.group('value'), filter_spec))
1526             op = OPERATORS[m.group('op')]
1527
1528         if not m:
1529             STR_OPERATORS = {
1530                 '=': operator.eq,
1531                 '^=': lambda attr, value: attr.startswith(value),
1532                 '$=': lambda attr, value: attr.endswith(value),
1533                 '*=': lambda attr, value: value in attr,
1534             }
1535             str_operator_rex = re.compile(r'''(?x)\s*
1536                 (?P<key>[a-zA-Z0-9._-]+)\s*
1537                 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1538                 (?P<value>[a-zA-Z0-9._-]+)\s*
1539                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1540             m = str_operator_rex.fullmatch(filter_spec)
1541             if m:
1542                 comparison_value = m.group('value')
1543                 str_op = STR_OPERATORS[m.group('op')]
1544                 if m.group('negation'):
1545                     op = lambda attr, value: not str_op(attr, value)
1546                 else:
1547                     op = str_op
1548
1549         if not m:
1550             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1551
1552         def _filter(f):
1553             actual_value = f.get(m.group('key'))
1554             if actual_value is None:
1555                 return m.group('none_inclusive')
1556             return op(actual_value, comparison_value)
1557         return _filter
1558
1559     def _default_format_spec(self, info_dict, download=True):
1560
1561         def can_merge():
1562             merger = FFmpegMergerPP(self)
1563             return merger.available and merger.can_merge()
1564
1565         prefer_best = (
1566             not self.params.get('simulate', False)
1567             and download
1568             and (
1569                 not can_merge()
1570                 or info_dict.get('is_live', False)
1571                 or self.outtmpl_dict['default'] == '-'))
1572         compat = (
1573             prefer_best
1574             or self.params.get('allow_multiple_audio_streams', False)
1575             or 'format-spec' in self.params.get('compat_opts', []))
1576
1577         return (
1578             'best/bestvideo+bestaudio' if prefer_best
1579             else 'bestvideo*+bestaudio/best' if not compat
1580             else 'bestvideo+bestaudio/best')
1581
1582     def build_format_selector(self, format_spec):
1583         def syntax_error(note, start):
1584             message = (
1585                 'Invalid format specification: '
1586                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1587             return SyntaxError(message)
1588
1589         PICKFIRST = 'PICKFIRST'
1590         MERGE = 'MERGE'
1591         SINGLE = 'SINGLE'
1592         GROUP = 'GROUP'
1593         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1594
1595         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1596                                   'video': self.params.get('allow_multiple_video_streams', False)}
1597
1598         check_formats = self.params.get('check_formats')
1599
1600         def _parse_filter(tokens):
1601             filter_parts = []
1602             for type, string, start, _, _ in tokens:
1603                 if type == tokenize.OP and string == ']':
1604                     return ''.join(filter_parts)
1605                 else:
1606                     filter_parts.append(string)
1607
1608         def _remove_unused_ops(tokens):
1609             # Remove operators that we don't use and join them with the surrounding strings
1610             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1611             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1612             last_string, last_start, last_end, last_line = None, None, None, None
1613             for type, string, start, end, line in tokens:
1614                 if type == tokenize.OP and string == '[':
1615                     if last_string:
1616                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1617                         last_string = None
1618                     yield type, string, start, end, line
1619                     # everything inside brackets will be handled by _parse_filter
1620                     for type, string, start, end, line in tokens:
1621                         yield type, string, start, end, line
1622                         if type == tokenize.OP and string == ']':
1623                             break
1624                 elif type == tokenize.OP and string in ALLOWED_OPS:
1625                     if last_string:
1626                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1627                         last_string = None
1628                     yield type, string, start, end, line
1629                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1630                     if not last_string:
1631                         last_string = string
1632                         last_start = start
1633                         last_end = end
1634                     else:
1635                         last_string += string
1636             if last_string:
1637                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1638
1639         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1640             selectors = []
1641             current_selector = None
1642             for type, string, start, _, _ in tokens:
1643                 # ENCODING is only defined in python 3.x
1644                 if type == getattr(tokenize, 'ENCODING', None):
1645                     continue
1646                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1647                     current_selector = FormatSelector(SINGLE, string, [])
1648                 elif type == tokenize.OP:
1649                     if string == ')':
1650                         if not inside_group:
1651                             # ')' will be handled by the parentheses group
1652                             tokens.restore_last_token()
1653                         break
1654                     elif inside_merge and string in ['/', ',']:
1655                         tokens.restore_last_token()
1656                         break
1657                     elif inside_choice and string == ',':
1658                         tokens.restore_last_token()
1659                         break
1660                     elif string == ',':
1661                         if not current_selector:
1662                             raise syntax_error('"," must follow a format selector', start)
1663                         selectors.append(current_selector)
1664                         current_selector = None
1665                     elif string == '/':
1666                         if not current_selector:
1667                             raise syntax_error('"/" must follow a format selector', start)
1668                         first_choice = current_selector
1669                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1670                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1671                     elif string == '[':
1672                         if not current_selector:
1673                             current_selector = FormatSelector(SINGLE, 'best', [])
1674                         format_filter = _parse_filter(tokens)
1675                         current_selector.filters.append(format_filter)
1676                     elif string == '(':
1677                         if current_selector:
1678                             raise syntax_error('Unexpected "("', start)
1679                         group = _parse_format_selection(tokens, inside_group=True)
1680                         current_selector = FormatSelector(GROUP, group, [])
1681                     elif string == '+':
1682                         if not current_selector:
1683                             raise syntax_error('Unexpected "+"', start)
1684                         selector_1 = current_selector
1685                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1686                         if not selector_2:
1687                             raise syntax_error('Expected a selector', start)
1688                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1689                     else:
1690                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1691                 elif type == tokenize.ENDMARKER:
1692                     break
1693             if current_selector:
1694                 selectors.append(current_selector)
1695             return selectors
1696
1697         def _merge(formats_pair):
1698             format_1, format_2 = formats_pair
1699
1700             formats_info = []
1701             formats_info.extend(format_1.get('requested_formats', (format_1,)))
1702             formats_info.extend(format_2.get('requested_formats', (format_2,)))
1703
1704             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1705                 get_no_more = {'video': False, 'audio': False}
1706                 for (i, fmt_info) in enumerate(formats_info):
1707                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
1708                         formats_info.pop(i)
1709                         continue
1710                     for aud_vid in ['audio', 'video']:
1711                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1712                             if get_no_more[aud_vid]:
1713                                 formats_info.pop(i)
1714                             get_no_more[aud_vid] = True
1715
1716             if len(formats_info) == 1:
1717                 return formats_info[0]
1718
1719             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1720             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1721
1722             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1723             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1724
1725             output_ext = self.params.get('merge_output_format')
1726             if not output_ext:
1727                 if the_only_video:
1728                     output_ext = the_only_video['ext']
1729                 elif the_only_audio and not video_fmts:
1730                     output_ext = the_only_audio['ext']
1731                 else:
1732                     output_ext = 'mkv'
1733
1734             new_dict = {
1735                 'requested_formats': formats_info,
1736                 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1737                 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1738                 'ext': output_ext,
1739             }
1740
1741             if the_only_video:
1742                 new_dict.update({
1743                     'width': the_only_video.get('width'),
1744                     'height': the_only_video.get('height'),
1745                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1746                     'fps': the_only_video.get('fps'),
1747                     'vcodec': the_only_video.get('vcodec'),
1748                     'vbr': the_only_video.get('vbr'),
1749                     'stretched_ratio': the_only_video.get('stretched_ratio'),
1750                 })
1751
1752             if the_only_audio:
1753                 new_dict.update({
1754                     'acodec': the_only_audio.get('acodec'),
1755                     'abr': the_only_audio.get('abr'),
1756                 })
1757
1758             return new_dict
1759
1760         def _check_formats(formats):
1761             if not check_formats:
1762                 yield from formats
1763             for f in formats:
1764                 self.to_screen('[info] Testing format %s' % f['format_id'])
1765                 temp_file = tempfile.NamedTemporaryFile(
1766                     suffix='.tmp', delete=False,
1767                     dir=self.get_output_path('temp') or None)
1768                 temp_file.close()
1769                 try:
1770                     success, _ = self.dl(temp_file.name, f, test=True)
1771                 except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1772                     success = False
1773                 finally:
1774                     if os.path.exists(temp_file.name):
1775                         try:
1776                             os.remove(temp_file.name)
1777                         except OSError:
1778                             self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1779                 if success:
1780                     yield f
1781                 else:
1782                     self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1783
1784         def _build_selector_function(selector):
1785             if isinstance(selector, list):  # ,
1786                 fs = [_build_selector_function(s) for s in selector]
1787
1788                 def selector_function(ctx):
1789                     for f in fs:
1790                         yield from f(ctx)
1791                 return selector_function
1792
1793             elif selector.type == GROUP:  # ()
1794                 selector_function = _build_selector_function(selector.selector)
1795
1796             elif selector.type == PICKFIRST:  # /
1797                 fs = [_build_selector_function(s) for s in selector.selector]
1798
1799                 def selector_function(ctx):
1800                     for f in fs:
1801                         picked_formats = list(f(ctx))
1802                         if picked_formats:
1803                             return picked_formats
1804                     return []
1805
1806             elif selector.type == MERGE:  # +
1807                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1808
1809                 def selector_function(ctx):
1810                     for pair in itertools.product(
1811                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1812                         yield _merge(pair)
1813
1814             elif selector.type == SINGLE:  # atom
1815                 format_spec = selector.selector or 'best'
1816
1817                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
1818                 if format_spec == 'all':
1819                     def selector_function(ctx):
1820                         yield from _check_formats(ctx['formats'])
1821                 elif format_spec == 'mergeall':
1822                     def selector_function(ctx):
1823                         formats = list(_check_formats(ctx['formats']))
1824                         if not formats:
1825                             return
1826                         merged_format = formats[-1]
1827                         for f in formats[-2::-1]:
1828                             merged_format = _merge((merged_format, f))
1829                         yield merged_format
1830
1831                 else:
1832                     format_fallback, format_reverse, format_idx = False, True, 1
1833                     mobj = re.match(
1834                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
1835                         format_spec)
1836                     if mobj is not None:
1837                         format_idx = int_or_none(mobj.group('n'), default=1)
1838                         format_reverse = mobj.group('bw')[0] == 'b'
1839                         format_type = (mobj.group('type') or [None])[0]
1840                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
1841                         format_modified = mobj.group('mod') is not None
1842
1843                         format_fallback = not format_type and not format_modified  # for b, w
1844                         _filter_f = (
1845                             (lambda f: f.get('%scodec' % format_type) != 'none')
1846                             if format_type and format_modified  # bv*, ba*, wv*, wa*
1847                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
1848                             if format_type  # bv, ba, wv, wa
1849                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1850                             if not format_modified  # b, w
1851                             else lambda f: True)  # b*, w*
1852                         filter_f = lambda f: _filter_f(f) and (
1853                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
1854                     else:
1855                         filter_f = ((lambda f: f.get('ext') == format_spec)
1856                                     if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']  # extension
1857                                     else (lambda f: f.get('format_id') == format_spec))  # id
1858
1859                     def selector_function(ctx):
1860                         formats = list(ctx['formats'])
1861                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
1862                         if format_fallback and ctx['incomplete_formats'] and not matches:
1863                             # for extractors with incomplete formats (audio only (soundcloud)
1864                             # or video only (imgur)) best/worst will fallback to
1865                             # best/worst {video,audio}-only format
1866                             matches = formats
1867                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
1868                         try:
1869                             yield matches[format_idx - 1]
1870                         except IndexError:
1871                             return
1872
1873             filters = [self._build_format_filter(f) for f in selector.filters]
1874
1875             def final_selector(ctx):
1876                 ctx_copy = copy.deepcopy(ctx)
1877                 for _filter in filters:
1878                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1879                 return selector_function(ctx_copy)
1880             return final_selector
1881
1882         stream = io.BytesIO(format_spec.encode('utf-8'))
1883         try:
1884             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1885         except tokenize.TokenError:
1886             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1887
1888         class TokenIterator(object):
1889             def __init__(self, tokens):
1890                 self.tokens = tokens
1891                 self.counter = 0
1892
1893             def __iter__(self):
1894                 return self
1895
1896             def __next__(self):
1897                 if self.counter >= len(self.tokens):
1898                     raise StopIteration()
1899                 value = self.tokens[self.counter]
1900                 self.counter += 1
1901                 return value
1902
1903             next = __next__
1904
1905             def restore_last_token(self):
1906                 self.counter -= 1
1907
1908         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1909         return _build_selector_function(parsed_selector)
1910
1911     def _calc_headers(self, info_dict):
1912         res = std_headers.copy()
1913
1914         add_headers = info_dict.get('http_headers')
1915         if add_headers:
1916             res.update(add_headers)
1917
1918         cookies = self._calc_cookies(info_dict)
1919         if cookies:
1920             res['Cookie'] = cookies
1921
1922         if 'X-Forwarded-For' not in res:
1923             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1924             if x_forwarded_for_ip:
1925                 res['X-Forwarded-For'] = x_forwarded_for_ip
1926
1927         return res
1928
1929     def _calc_cookies(self, info_dict):
1930         pr = sanitized_Request(info_dict['url'])
1931         self.cookiejar.add_cookie_header(pr)
1932         return pr.get_header('Cookie')
1933
1934     def _sanitize_thumbnails(self, info_dict):
1935         thumbnails = info_dict.get('thumbnails')
1936         if thumbnails is None:
1937             thumbnail = info_dict.get('thumbnail')
1938             if thumbnail:
1939                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1940         if thumbnails:
1941             thumbnails.sort(key=lambda t: (
1942                 t.get('preference') if t.get('preference') is not None else -1,
1943                 t.get('width') if t.get('width') is not None else -1,
1944                 t.get('height') if t.get('height') is not None else -1,
1945                 t.get('id') if t.get('id') is not None else '',
1946                 t.get('url')))
1947
1948             def test_thumbnail(t):
1949                 self.to_screen('[info] Testing thumbnail %s' % t['id'])
1950                 try:
1951                     self.urlopen(HEADRequest(t['url']))
1952                 except network_exceptions as err:
1953                     self.to_screen('[info] Unable to connect to thumbnail %s URL "%s" - %s. Skipping...' % (
1954                         t['id'], t['url'], error_to_compat_str(err)))
1955                     return False
1956                 return True
1957
1958             for i, t in enumerate(thumbnails):
1959                 if t.get('id') is None:
1960                     t['id'] = '%d' % i
1961                 if t.get('width') and t.get('height'):
1962                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1963                 t['url'] = sanitize_url(t['url'])
1964             if self.params.get('check_formats'):
1965                 info_dict['thumbnails'] = LazyList(filter(test_thumbnail, thumbnails[::-1])).reverse()
1966
1967     def process_video_result(self, info_dict, download=True):
1968         assert info_dict.get('_type', 'video') == 'video'
1969
1970         if 'id' not in info_dict:
1971             raise ExtractorError('Missing "id" field in extractor result')
1972         if 'title' not in info_dict:
1973             raise ExtractorError('Missing "title" field in extractor result')
1974
1975         def report_force_conversion(field, field_not, conversion):
1976             self.report_warning(
1977                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1978                 % (field, field_not, conversion))
1979
1980         def sanitize_string_field(info, string_field):
1981             field = info.get(string_field)
1982             if field is None or isinstance(field, compat_str):
1983                 return
1984             report_force_conversion(string_field, 'a string', 'string')
1985             info[string_field] = compat_str(field)
1986
1987         def sanitize_numeric_fields(info):
1988             for numeric_field in self._NUMERIC_FIELDS:
1989                 field = info.get(numeric_field)
1990                 if field is None or isinstance(field, compat_numeric_types):
1991                     continue
1992                 report_force_conversion(numeric_field, 'numeric', 'int')
1993                 info[numeric_field] = int_or_none(field)
1994
1995         sanitize_string_field(info_dict, 'id')
1996         sanitize_numeric_fields(info_dict)
1997
1998         if 'playlist' not in info_dict:
1999             # It isn't part of a playlist
2000             info_dict['playlist'] = None
2001             info_dict['playlist_index'] = None
2002
2003         self._sanitize_thumbnails(info_dict)
2004
2005         thumbnail = info_dict.get('thumbnail')
2006         thumbnails = info_dict.get('thumbnails')
2007         if thumbnail:
2008             info_dict['thumbnail'] = sanitize_url(thumbnail)
2009         elif thumbnails:
2010             info_dict['thumbnail'] = thumbnails[-1]['url']
2011
2012         if 'display_id' not in info_dict and 'id' in info_dict:
2013             info_dict['display_id'] = info_dict['id']
2014
2015         for ts_key, date_key in (
2016                 ('timestamp', 'upload_date'),
2017                 ('release_timestamp', 'release_date'),
2018         ):
2019             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2020                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2021                 # see http://bugs.python.org/issue1646728)
2022                 try:
2023                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2024                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2025                 except (ValueError, OverflowError, OSError):
2026                     pass
2027
2028         # Auto generate title fields corresponding to the *_number fields when missing
2029         # in order to always have clean titles. This is very common for TV series.
2030         for field in ('chapter', 'season', 'episode'):
2031             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2032                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2033
2034         for cc_kind in ('subtitles', 'automatic_captions'):
2035             cc = info_dict.get(cc_kind)
2036             if cc:
2037                 for _, subtitle in cc.items():
2038                     for subtitle_format in subtitle:
2039                         if subtitle_format.get('url'):
2040                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2041                         if subtitle_format.get('ext') is None:
2042                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2043
2044         automatic_captions = info_dict.get('automatic_captions')
2045         subtitles = info_dict.get('subtitles')
2046
2047         info_dict['requested_subtitles'] = self.process_subtitles(
2048             info_dict['id'], subtitles, automatic_captions)
2049
2050         # We now pick which formats have to be downloaded
2051         if info_dict.get('formats') is None:
2052             # There's only one format available
2053             formats = [info_dict]
2054         else:
2055             formats = info_dict['formats']
2056
2057         if not formats:
2058             if not self.params.get('ignore_no_formats_error'):
2059                 raise ExtractorError('No video formats found!')
2060             else:
2061                 self.report_warning('No video formats found!')
2062
2063         def is_wellformed(f):
2064             url = f.get('url')
2065             if not url:
2066                 self.report_warning(
2067                     '"url" field is missing or empty - skipping format, '
2068                     'there is an error in extractor')
2069                 return False
2070             if isinstance(url, bytes):
2071                 sanitize_string_field(f, 'url')
2072             return True
2073
2074         # Filter out malformed formats for better extraction robustness
2075         formats = list(filter(is_wellformed, formats))
2076
2077         formats_dict = {}
2078
2079         # We check that all the formats have the format and format_id fields
2080         for i, format in enumerate(formats):
2081             sanitize_string_field(format, 'format_id')
2082             sanitize_numeric_fields(format)
2083             format['url'] = sanitize_url(format['url'])
2084             if not format.get('format_id'):
2085                 format['format_id'] = compat_str(i)
2086             else:
2087                 # Sanitize format_id from characters used in format selector expression
2088                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2089             format_id = format['format_id']
2090             if format_id not in formats_dict:
2091                 formats_dict[format_id] = []
2092             formats_dict[format_id].append(format)
2093
2094         # Make sure all formats have unique format_id
2095         for format_id, ambiguous_formats in formats_dict.items():
2096             if len(ambiguous_formats) > 1:
2097                 for i, format in enumerate(ambiguous_formats):
2098                     format['format_id'] = '%s-%d' % (format_id, i)
2099
2100         for i, format in enumerate(formats):
2101             if format.get('format') is None:
2102                 format['format'] = '{id} - {res}{note}'.format(
2103                     id=format['format_id'],
2104                     res=self.format_resolution(format),
2105                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
2106                 )
2107             # Automatically determine file extension if missing
2108             if format.get('ext') is None:
2109                 format['ext'] = determine_ext(format['url']).lower()
2110             # Automatically determine protocol if missing (useful for format
2111             # selection purposes)
2112             if format.get('protocol') is None:
2113                 format['protocol'] = determine_protocol(format)
2114             # Add HTTP headers, so that external programs can use them from the
2115             # json output
2116             full_format_info = info_dict.copy()
2117             full_format_info.update(format)
2118             format['http_headers'] = self._calc_headers(full_format_info)
2119         # Remove private housekeeping stuff
2120         if '__x_forwarded_for_ip' in info_dict:
2121             del info_dict['__x_forwarded_for_ip']
2122
2123         # TODO Central sorting goes here
2124
2125         if formats and formats[0] is not info_dict:
2126             # only set the 'formats' fields if the original info_dict list them
2127             # otherwise we end up with a circular reference, the first (and unique)
2128             # element in the 'formats' field in info_dict is info_dict itself,
2129             # which can't be exported to json
2130             info_dict['formats'] = formats
2131
2132         info_dict, _ = self.pre_process(info_dict)
2133
2134         list_only = self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles')
2135         if list_only:
2136             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2137             if self.params.get('list_thumbnails'):
2138                 self.list_thumbnails(info_dict)
2139             if self.params.get('listformats'):
2140                 if not info_dict.get('formats'):
2141                     raise ExtractorError('No video formats found', expected=True)
2142                 self.list_formats(info_dict)
2143             if self.params.get('listsubtitles'):
2144                 if 'automatic_captions' in info_dict:
2145                     self.list_subtitles(
2146                         info_dict['id'], automatic_captions, 'automatic captions')
2147                 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2148             return
2149
2150         format_selector = self.format_selector
2151         if format_selector is None:
2152             req_format = self._default_format_spec(info_dict, download=download)
2153             self.write_debug('Default format spec: %s' % req_format)
2154             format_selector = self.build_format_selector(req_format)
2155
2156         # While in format selection we may need to have an access to the original
2157         # format set in order to calculate some metrics or do some processing.
2158         # For now we need to be able to guess whether original formats provided
2159         # by extractor are incomplete or not (i.e. whether extractor provides only
2160         # video-only or audio-only formats) for proper formats selection for
2161         # extractors with such incomplete formats (see
2162         # https://github.com/ytdl-org/youtube-dl/pull/5556).
2163         # Since formats may be filtered during format selection and may not match
2164         # the original formats the results may be incorrect. Thus original formats
2165         # or pre-calculated metrics should be passed to format selection routines
2166         # as well.
2167         # We will pass a context object containing all necessary additional data
2168         # instead of just formats.
2169         # This fixes incorrect format selection issue (see
2170         # https://github.com/ytdl-org/youtube-dl/issues/10083).
2171         incomplete_formats = (
2172             # All formats are video-only or
2173             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2174             # all formats are audio-only
2175             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2176
2177         ctx = {
2178             'formats': formats,
2179             'incomplete_formats': incomplete_formats,
2180         }
2181
2182         formats_to_download = list(format_selector(ctx))
2183         if not formats_to_download:
2184             if not self.params.get('ignore_no_formats_error'):
2185                 raise ExtractorError('Requested format is not available', expected=True)
2186             else:
2187                 self.report_warning('Requested format is not available')
2188                 # Process what we can, even without any available formats.
2189                 self.process_info(dict(info_dict))
2190         elif download:
2191             self.to_screen(
2192                 '[info] %s: Downloading %d format(s): %s' % (
2193                     info_dict['id'], len(formats_to_download),
2194                     ", ".join([f['format_id'] for f in formats_to_download])))
2195             for fmt in formats_to_download:
2196                 new_info = dict(info_dict)
2197                 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2198                 new_info['__original_infodict'] = info_dict
2199                 new_info.update(fmt)
2200                 self.process_info(new_info)
2201         # We update the info dict with the best quality format (backwards compatibility)
2202         if formats_to_download:
2203             info_dict.update(formats_to_download[-1])
2204         return info_dict
2205
2206     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2207         """Select the requested subtitles and their format"""
2208         available_subs = {}
2209         if normal_subtitles and self.params.get('writesubtitles'):
2210             available_subs.update(normal_subtitles)
2211         if automatic_captions and self.params.get('writeautomaticsub'):
2212             for lang, cap_info in automatic_captions.items():
2213                 if lang not in available_subs:
2214                     available_subs[lang] = cap_info
2215
2216         if (not self.params.get('writesubtitles') and not
2217                 self.params.get('writeautomaticsub') or not
2218                 available_subs):
2219             return None
2220
2221         all_sub_langs = available_subs.keys()
2222         if self.params.get('allsubtitles', False):
2223             requested_langs = all_sub_langs
2224         elif self.params.get('subtitleslangs', False):
2225             requested_langs = set()
2226             for lang in self.params.get('subtitleslangs'):
2227                 if lang == 'all':
2228                     requested_langs.update(all_sub_langs)
2229                     continue
2230                 discard = lang[0] == '-'
2231                 if discard:
2232                     lang = lang[1:]
2233                 current_langs = filter(re.compile(lang + '$').match, all_sub_langs)
2234                 if discard:
2235                     for lang in current_langs:
2236                         requested_langs.discard(lang)
2237                 else:
2238                     requested_langs.update(current_langs)
2239         elif 'en' in available_subs:
2240             requested_langs = ['en']
2241         else:
2242             requested_langs = [list(all_sub_langs)[0]]
2243         self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2244
2245         formats_query = self.params.get('subtitlesformat', 'best')
2246         formats_preference = formats_query.split('/') if formats_query else []
2247         subs = {}
2248         for lang in requested_langs:
2249             formats = available_subs.get(lang)
2250             if formats is None:
2251                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2252                 continue
2253             for ext in formats_preference:
2254                 if ext == 'best':
2255                     f = formats[-1]
2256                     break
2257                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2258                 if matches:
2259                     f = matches[-1]
2260                     break
2261             else:
2262                 f = formats[-1]
2263                 self.report_warning(
2264                     'No subtitle format found matching "%s" for language %s, '
2265                     'using %s' % (formats_query, lang, f['ext']))
2266             subs[lang] = f
2267         return subs
2268
2269     def __forced_printings(self, info_dict, filename, incomplete):
2270         def print_mandatory(field, actual_field=None):
2271             if actual_field is None:
2272                 actual_field = field
2273             if (self.params.get('force%s' % field, False)
2274                     and (not incomplete or info_dict.get(actual_field) is not None)):
2275                 self.to_stdout(info_dict[actual_field])
2276
2277         def print_optional(field):
2278             if (self.params.get('force%s' % field, False)
2279                     and info_dict.get(field) is not None):
2280                 self.to_stdout(info_dict[field])
2281
2282         info_dict = info_dict.copy()
2283         if filename is not None:
2284             info_dict['filename'] = filename
2285         if info_dict.get('requested_formats') is not None:
2286             # For RTMP URLs, also include the playpath
2287             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2288         elif 'url' in info_dict:
2289             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2290
2291         for tmpl in self.params.get('forceprint', []):
2292             if re.match(r'\w+$', tmpl):
2293                 tmpl = '%({})s'.format(tmpl)
2294             tmpl, info_copy = self.prepare_outtmpl(tmpl, info_dict)
2295             self.to_stdout(tmpl % info_copy)
2296
2297         print_mandatory('title')
2298         print_mandatory('id')
2299         print_mandatory('url', 'urls')
2300         print_optional('thumbnail')
2301         print_optional('description')
2302         print_optional('filename')
2303         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
2304             self.to_stdout(formatSeconds(info_dict['duration']))
2305         print_mandatory('format')
2306
2307         if self.params.get('forcejson', False):
2308             self.post_extract(info_dict)
2309             self.to_stdout(json.dumps(info_dict, default=repr))
2310
2311     def dl(self, name, info, subtitle=False, test=False):
2312
2313         if test:
2314             verbose = self.params.get('verbose')
2315             params = {
2316                 'test': True,
2317                 'quiet': not verbose,
2318                 'verbose': verbose,
2319                 'noprogress': not verbose,
2320                 'nopart': True,
2321                 'skip_unavailable_fragments': False,
2322                 'keep_fragments': False,
2323                 'overwrites': True,
2324                 '_no_ytdl_file': True,
2325             }
2326         else:
2327             params = self.params
2328         fd = get_suitable_downloader(info, params)(self, params)
2329         if not test:
2330             for ph in self._progress_hooks:
2331                 fd.add_progress_hook(ph)
2332             urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2333             self.write_debug('Invoking downloader on "%s"' % urls)
2334         new_info = dict(info)
2335         if new_info.get('http_headers') is None:
2336             new_info['http_headers'] = self._calc_headers(new_info)
2337         return fd.download(name, new_info, subtitle)
2338
2339     def process_info(self, info_dict):
2340         """Process a single resolved IE result."""
2341
2342         assert info_dict.get('_type', 'video') == 'video'
2343
2344         info_dict.setdefault('__postprocessors', [])
2345
2346         max_downloads = self.params.get('max_downloads')
2347         if max_downloads is not None:
2348             if self._num_downloads >= int(max_downloads):
2349                 raise MaxDownloadsReached()
2350
2351         # TODO: backward compatibility, to be removed
2352         info_dict['fulltitle'] = info_dict['title']
2353
2354         if 'format' not in info_dict and 'ext' in info_dict:
2355             info_dict['format'] = info_dict['ext']
2356
2357         if self._match_entry(info_dict) is not None:
2358             return
2359
2360         self.post_extract(info_dict)
2361         self._num_downloads += 1
2362
2363         # info_dict['_filename'] needs to be set for backward compatibility
2364         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2365         temp_filename = self.prepare_filename(info_dict, 'temp')
2366         files_to_move = {}
2367
2368         # Forced printings
2369         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2370
2371         if self.params.get('simulate', False):
2372             if self.params.get('force_write_download_archive', False):
2373                 self.record_download_archive(info_dict)
2374
2375             # Do nothing else if in simulate mode
2376             return
2377
2378         if full_filename is None:
2379             return
2380
2381         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2382             return
2383         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2384             return
2385
2386         if self.params.get('writedescription', False):
2387             descfn = self.prepare_filename(info_dict, 'description')
2388             if not self._ensure_dir_exists(encodeFilename(descfn)):
2389                 return
2390             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
2391                 self.to_screen('[info] Video description is already present')
2392             elif info_dict.get('description') is None:
2393                 self.report_warning('There\'s no description to write.')
2394             else:
2395                 try:
2396                     self.to_screen('[info] Writing video description to: ' + descfn)
2397                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
2398                         descfile.write(info_dict['description'])
2399                 except (OSError, IOError):
2400                     self.report_error('Cannot write description file ' + descfn)
2401                     return
2402
2403         if self.params.get('writeannotations', False):
2404             annofn = self.prepare_filename(info_dict, 'annotation')
2405             if not self._ensure_dir_exists(encodeFilename(annofn)):
2406                 return
2407             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2408                 self.to_screen('[info] Video annotations are already present')
2409             elif not info_dict.get('annotations'):
2410                 self.report_warning('There are no annotations to write.')
2411             else:
2412                 try:
2413                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2414                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2415                         annofile.write(info_dict['annotations'])
2416                 except (KeyError, TypeError):
2417                     self.report_warning('There are no annotations to write.')
2418                 except (OSError, IOError):
2419                     self.report_error('Cannot write annotations file: ' + annofn)
2420                     return
2421
2422         subtitles_are_requested = any([self.params.get('writesubtitles', False),
2423                                        self.params.get('writeautomaticsub')])
2424
2425         if subtitles_are_requested and info_dict.get('requested_subtitles'):
2426             # subtitles download errors are already managed as troubles in relevant IE
2427             # that way it will silently go on when used with unsupporting IE
2428             subtitles = info_dict['requested_subtitles']
2429             # ie = self.get_info_extractor(info_dict['extractor_key'])
2430             for sub_lang, sub_info in subtitles.items():
2431                 sub_format = sub_info['ext']
2432                 sub_filename = subtitles_filename(temp_filename, sub_lang, sub_format, info_dict.get('ext'))
2433                 sub_filename_final = subtitles_filename(
2434                     self.prepare_filename(info_dict, 'subtitle'), sub_lang, sub_format, info_dict.get('ext'))
2435                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
2436                     self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
2437                     sub_info['filepath'] = sub_filename
2438                     files_to_move[sub_filename] = sub_filename_final
2439                 else:
2440                     self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
2441                     if sub_info.get('data') is not None:
2442                         try:
2443                             # Use newline='' to prevent conversion of newline characters
2444                             # See https://github.com/ytdl-org/youtube-dl/issues/10268
2445                             with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
2446                                 subfile.write(sub_info['data'])
2447                             sub_info['filepath'] = sub_filename
2448                             files_to_move[sub_filename] = sub_filename_final
2449                         except (OSError, IOError):
2450                             self.report_error('Cannot write subtitles file ' + sub_filename)
2451                             return
2452                     else:
2453                         try:
2454                             self.dl(sub_filename, sub_info.copy(), subtitle=True)
2455                             sub_info['filepath'] = sub_filename
2456                             files_to_move[sub_filename] = sub_filename_final
2457                         except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
2458                             self.report_warning('Unable to download subtitle for "%s": %s' %
2459                                                 (sub_lang, error_to_compat_str(err)))
2460                             continue
2461
2462         if self.params.get('writeinfojson', False):
2463             infofn = self.prepare_filename(info_dict, 'infojson')
2464             if not self._ensure_dir_exists(encodeFilename(infofn)):
2465                 return
2466             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
2467                 self.to_screen('[info] Video metadata is already present')
2468             else:
2469                 self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
2470                 try:
2471                     write_json_file(self.filter_requested_info(info_dict, self.params.get('clean_infojson', True)), infofn)
2472                 except (OSError, IOError):
2473                     self.report_error('Cannot write video metadata to JSON file ' + infofn)
2474                     return
2475             info_dict['__infojson_filename'] = infofn
2476
2477         for thumb_ext in self._write_thumbnails(info_dict, temp_filename):
2478             thumb_filename_temp = replace_extension(temp_filename, thumb_ext, info_dict.get('ext'))
2479             thumb_filename = replace_extension(
2480                 self.prepare_filename(info_dict, 'thumbnail'), thumb_ext, info_dict.get('ext'))
2481             files_to_move[thumb_filename_temp] = thumb_filename
2482
2483         # Write internet shortcut files
2484         url_link = webloc_link = desktop_link = False
2485         if self.params.get('writelink', False):
2486             if sys.platform == "darwin":  # macOS.
2487                 webloc_link = True
2488             elif sys.platform.startswith("linux"):
2489                 desktop_link = True
2490             else:  # if sys.platform in ['win32', 'cygwin']:
2491                 url_link = True
2492         if self.params.get('writeurllink', False):
2493             url_link = True
2494         if self.params.get('writewebloclink', False):
2495             webloc_link = True
2496         if self.params.get('writedesktoplink', False):
2497             desktop_link = True
2498
2499         if url_link or webloc_link or desktop_link:
2500             if 'webpage_url' not in info_dict:
2501                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2502                 return
2503             ascii_url = iri_to_uri(info_dict['webpage_url'])
2504
2505         def _write_link_file(extension, template, newline, embed_filename):
2506             linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
2507             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2508                 self.to_screen('[info] Internet shortcut is already present')
2509             else:
2510                 try:
2511                     self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2512                     with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2513                         template_vars = {'url': ascii_url}
2514                         if embed_filename:
2515                             template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2516                         linkfile.write(template % template_vars)
2517                 except (OSError, IOError):
2518                     self.report_error('Cannot write internet shortcut ' + linkfn)
2519                     return False
2520             return True
2521
2522         if url_link:
2523             if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2524                 return
2525         if webloc_link:
2526             if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2527                 return
2528         if desktop_link:
2529             if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2530                 return
2531
2532         try:
2533             info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2534         except PostProcessingError as err:
2535             self.report_error('Preprocessing: %s' % str(err))
2536             return
2537
2538         must_record_download_archive = False
2539         if self.params.get('skip_download', False):
2540             info_dict['filepath'] = temp_filename
2541             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2542             info_dict['__files_to_move'] = files_to_move
2543             info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2544         else:
2545             # Download
2546             try:
2547
2548                 def existing_file(*filepaths):
2549                     ext = info_dict.get('ext')
2550                     final_ext = self.params.get('final_ext', ext)
2551                     existing_files = []
2552                     for file in orderedSet(filepaths):
2553                         if final_ext != ext:
2554                             converted = replace_extension(file, final_ext, ext)
2555                             if os.path.exists(encodeFilename(converted)):
2556                                 existing_files.append(converted)
2557                         if os.path.exists(encodeFilename(file)):
2558                             existing_files.append(file)
2559
2560                     if not existing_files or self.params.get('overwrites', False):
2561                         for file in orderedSet(existing_files):
2562                             self.report_file_delete(file)
2563                             os.remove(encodeFilename(file))
2564                         return None
2565
2566                     self.report_file_already_downloaded(existing_files[0])
2567                     info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2568                     return existing_files[0]
2569
2570                 success = True
2571                 if info_dict.get('requested_formats') is not None:
2572
2573                     def compatible_formats(formats):
2574                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2575                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2576                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2577                         if len(video_formats) > 2 or len(audio_formats) > 2:
2578                             return False
2579
2580                         # Check extension
2581                         exts = set(format.get('ext') for format in formats)
2582                         COMPATIBLE_EXTS = (
2583                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2584                             set(('webm',)),
2585                         )
2586                         for ext_sets in COMPATIBLE_EXTS:
2587                             if ext_sets.issuperset(exts):
2588                                 return True
2589                         # TODO: Check acodec/vcodec
2590                         return False
2591
2592                     requested_formats = info_dict['requested_formats']
2593                     old_ext = info_dict['ext']
2594                     if self.params.get('merge_output_format') is None:
2595                         if not compatible_formats(requested_formats):
2596                             info_dict['ext'] = 'mkv'
2597                             self.report_warning(
2598                                 'Requested formats are incompatible for merge and will be merged into mkv.')
2599                         if (info_dict['ext'] == 'webm'
2600                                 and self.params.get('writethumbnail', False)
2601                                 and info_dict.get('thumbnails')):
2602                             info_dict['ext'] = 'mkv'
2603                             self.report_warning(
2604                                 'webm doesn\'t support embedding a thumbnail, mkv will be used.')
2605
2606                     def correct_ext(filename):
2607                         filename_real_ext = os.path.splitext(filename)[1][1:]
2608                         filename_wo_ext = (
2609                             os.path.splitext(filename)[0]
2610                             if filename_real_ext == old_ext
2611                             else filename)
2612                         return '%s.%s' % (filename_wo_ext, info_dict['ext'])
2613
2614                     # Ensure filename always has a correct extension for successful merge
2615                     full_filename = correct_ext(full_filename)
2616                     temp_filename = correct_ext(temp_filename)
2617                     dl_filename = existing_file(full_filename, temp_filename)
2618                     info_dict['__real_download'] = False
2619
2620                     _protocols = set(determine_protocol(f) for f in requested_formats)
2621                     if len(_protocols) == 1:
2622                         info_dict['protocol'] = _protocols.pop()
2623                     directly_mergable = (
2624                         'no-direct-merge' not in self.params.get('compat_opts', [])
2625                         and info_dict.get('protocol') is not None  # All requested formats have same protocol
2626                         and not self.params.get('allow_unplayable_formats')
2627                         and get_suitable_downloader(info_dict, self.params).__name__ == 'FFmpegFD')
2628                     if directly_mergable:
2629                         info_dict['url'] = requested_formats[0]['url']
2630                         # Treat it as a single download
2631                         dl_filename = existing_file(full_filename, temp_filename)
2632                         if dl_filename is None:
2633                             success, real_download = self.dl(temp_filename, info_dict)
2634                             info_dict['__real_download'] = real_download
2635                     else:
2636                         downloaded = []
2637                         merger = FFmpegMergerPP(self)
2638                         if self.params.get('allow_unplayable_formats'):
2639                             self.report_warning(
2640                                 'You have requested merging of multiple formats '
2641                                 'while also allowing unplayable formats to be downloaded. '
2642                                 'The formats won\'t be merged to prevent data corruption.')
2643                         elif not merger.available:
2644                             self.report_warning(
2645                                 'You have requested merging of multiple formats but ffmpeg is not installed. '
2646                                 'The formats won\'t be merged.')
2647
2648                         if dl_filename is None:
2649                             for f in requested_formats:
2650                                 new_info = dict(info_dict)
2651                                 del new_info['requested_formats']
2652                                 new_info.update(f)
2653                                 fname = prepend_extension(
2654                                     self.prepare_filename(new_info, 'temp'),
2655                                     'f%s' % f['format_id'], new_info['ext'])
2656                                 if not self._ensure_dir_exists(fname):
2657                                     return
2658                                 downloaded.append(fname)
2659                                 partial_success, real_download = self.dl(fname, new_info)
2660                                 info_dict['__real_download'] = info_dict['__real_download'] or real_download
2661                                 success = success and partial_success
2662                             if merger.available and not self.params.get('allow_unplayable_formats'):
2663                                 info_dict['__postprocessors'].append(merger)
2664                                 info_dict['__files_to_merge'] = downloaded
2665                                 # Even if there were no downloads, it is being merged only now
2666                                 info_dict['__real_download'] = True
2667                             else:
2668                                 for file in downloaded:
2669                                     files_to_move[file] = None
2670                 else:
2671                     # Just a single file
2672                     dl_filename = existing_file(full_filename, temp_filename)
2673                     if dl_filename is None:
2674                         success, real_download = self.dl(temp_filename, info_dict)
2675                         info_dict['__real_download'] = real_download
2676
2677                 dl_filename = dl_filename or temp_filename
2678                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2679
2680             except network_exceptions as err:
2681                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2682                 return
2683             except (OSError, IOError) as err:
2684                 raise UnavailableVideoError(err)
2685             except (ContentTooShortError, ) as err:
2686                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2687                 return
2688
2689             if success and full_filename != '-':
2690
2691                 def fixup():
2692                     do_fixup = True
2693                     fixup_policy = self.params.get('fixup')
2694                     vid = info_dict['id']
2695
2696                     if fixup_policy in ('ignore', 'never'):
2697                         return
2698                     elif fixup_policy == 'warn':
2699                         do_fixup = False
2700                     elif fixup_policy != 'force':
2701                         assert fixup_policy in ('detect_or_warn', None)
2702                         if not info_dict.get('__real_download'):
2703                             do_fixup = False
2704
2705                     def ffmpeg_fixup(cndn, msg, cls):
2706                         if not cndn:
2707                             return
2708                         if not do_fixup:
2709                             self.report_warning(f'{vid}: {msg}')
2710                             return
2711                         pp = cls(self)
2712                         if pp.available:
2713                             info_dict['__postprocessors'].append(pp)
2714                         else:
2715                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
2716
2717                     stretched_ratio = info_dict.get('stretched_ratio')
2718                     ffmpeg_fixup(
2719                         stretched_ratio not in (1, None),
2720                         f'Non-uniform pixel ratio {stretched_ratio}',
2721                         FFmpegFixupStretchedPP)
2722
2723                     ffmpeg_fixup(
2724                         (info_dict.get('requested_formats') is None
2725                          and info_dict.get('container') == 'm4a_dash'
2726                          and info_dict.get('ext') == 'm4a'),
2727                         'writing DASH m4a. Only some players support this container',
2728                         FFmpegFixupM4aPP)
2729
2730                     downloader = (get_suitable_downloader(info_dict, self.params).__name__
2731                                   if 'protocol' in info_dict else None)
2732                     ffmpeg_fixup(downloader == 'HlsFD', 'malformed AAC bitstream detected', FFmpegFixupM3u8PP)
2733                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP)
2734                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed duration detected', FFmpegFixupDurationPP)
2735
2736                 fixup()
2737                 try:
2738                     info_dict = self.post_process(dl_filename, info_dict, files_to_move)
2739                 except PostProcessingError as err:
2740                     self.report_error('Postprocessing: %s' % str(err))
2741                     return
2742                 try:
2743                     for ph in self._post_hooks:
2744                         ph(info_dict['filepath'])
2745                 except Exception as err:
2746                     self.report_error('post hooks: %s' % str(err))
2747                     return
2748                 must_record_download_archive = True
2749
2750         if must_record_download_archive or self.params.get('force_write_download_archive', False):
2751             self.record_download_archive(info_dict)
2752         max_downloads = self.params.get('max_downloads')
2753         if max_downloads is not None and self._num_downloads >= int(max_downloads):
2754             raise MaxDownloadsReached()
2755
2756     def download(self, url_list):
2757         """Download a given list of URLs."""
2758         outtmpl = self.outtmpl_dict['default']
2759         if (len(url_list) > 1
2760                 and outtmpl != '-'
2761                 and '%' not in outtmpl
2762                 and self.params.get('max_downloads') != 1):
2763             raise SameFileError(outtmpl)
2764
2765         for url in url_list:
2766             try:
2767                 # It also downloads the videos
2768                 res = self.extract_info(
2769                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2770             except UnavailableVideoError:
2771                 self.report_error('unable to download video')
2772             except MaxDownloadsReached:
2773                 self.to_screen('[info] Maximum number of downloaded files reached')
2774                 raise
2775             except ExistingVideoReached:
2776                 self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
2777                 raise
2778             except RejectedVideoReached:
2779                 self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
2780                 raise
2781             else:
2782                 if self.params.get('dump_single_json', False):
2783                     self.post_extract(res)
2784                     self.to_stdout(json.dumps(res, default=repr))
2785
2786         return self._download_retcode
2787
2788     def download_with_info_file(self, info_filename):
2789         with contextlib.closing(fileinput.FileInput(
2790                 [info_filename], mode='r',
2791                 openhook=fileinput.hook_encoded('utf-8'))) as f:
2792             # FileInput doesn't have a read method, we can't call json.load
2793             info = self.filter_requested_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
2794         try:
2795             self.process_ie_result(info, download=True)
2796         except (DownloadError, EntryNotInPlaylist, ThrottledDownload):
2797             webpage_url = info.get('webpage_url')
2798             if webpage_url is not None:
2799                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2800                 return self.download([webpage_url])
2801             else:
2802                 raise
2803         return self._download_retcode
2804
2805     @staticmethod
2806     def filter_requested_info(info_dict, actually_filter=True):
2807         remove_keys = ['__original_infodict']  # Always remove this since this may contain a copy of the entire dict
2808         keep_keys = ['_type'],  # Always keep this to facilitate load-info-json
2809         if actually_filter:
2810             remove_keys += ('requested_formats', 'requested_subtitles', 'requested_entries', 'filepath', 'entries', 'original_url')
2811             empty_values = (None, {}, [], set(), tuple())
2812             reject = lambda k, v: k not in keep_keys and (
2813                 k.startswith('_') or k in remove_keys or v in empty_values)
2814         else:
2815             info_dict['epoch'] = int(time.time())
2816             reject = lambda k, v: k in remove_keys
2817         filter_fn = lambda obj: (
2818             list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))
2819             else obj if not isinstance(obj, dict)
2820             else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
2821         return filter_fn(info_dict)
2822
2823     def run_pp(self, pp, infodict):
2824         files_to_delete = []
2825         if '__files_to_move' not in infodict:
2826             infodict['__files_to_move'] = {}
2827         files_to_delete, infodict = pp.run(infodict)
2828         if not files_to_delete:
2829             return infodict
2830
2831         if self.params.get('keepvideo', False):
2832             for f in files_to_delete:
2833                 infodict['__files_to_move'].setdefault(f, '')
2834         else:
2835             for old_filename in set(files_to_delete):
2836                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2837                 try:
2838                     os.remove(encodeFilename(old_filename))
2839                 except (IOError, OSError):
2840                     self.report_warning('Unable to remove downloaded original file')
2841                 if old_filename in infodict['__files_to_move']:
2842                     del infodict['__files_to_move'][old_filename]
2843         return infodict
2844
2845     @staticmethod
2846     def post_extract(info_dict):
2847         def actual_post_extract(info_dict):
2848             if info_dict.get('_type') in ('playlist', 'multi_video'):
2849                 for video_dict in info_dict.get('entries', {}):
2850                     actual_post_extract(video_dict or {})
2851                 return
2852
2853             post_extractor = info_dict.get('__post_extractor') or (lambda: {})
2854             extra = post_extractor().items()
2855             info_dict.update(extra)
2856             info_dict.pop('__post_extractor', None)
2857
2858             original_infodict = info_dict.get('__original_infodict') or {}
2859             original_infodict.update(extra)
2860             original_infodict.pop('__post_extractor', None)
2861
2862         actual_post_extract(info_dict or {})
2863
2864     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
2865         info = dict(ie_info)
2866         info['__files_to_move'] = files_to_move or {}
2867         for pp in self._pps[key]:
2868             info = self.run_pp(pp, info)
2869         return info, info.pop('__files_to_move', None)
2870
2871     def post_process(self, filename, ie_info, files_to_move=None):
2872         """Run all the postprocessors on the given file."""
2873         info = dict(ie_info)
2874         info['filepath'] = filename
2875         info['__files_to_move'] = files_to_move or {}
2876
2877         for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
2878             info = self.run_pp(pp, info)
2879         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
2880         del info['__files_to_move']
2881         for pp in self._pps['after_move']:
2882             info = self.run_pp(pp, info)
2883         return info
2884
2885     def _make_archive_id(self, info_dict):
2886         video_id = info_dict.get('id')
2887         if not video_id:
2888             return
2889         # Future-proof against any change in case
2890         # and backwards compatibility with prior versions
2891         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
2892         if extractor is None:
2893             url = str_or_none(info_dict.get('url'))
2894             if not url:
2895                 return
2896             # Try to find matching extractor for the URL and take its ie_key
2897             for ie in self._ies:
2898                 if ie.suitable(url):
2899                     extractor = ie.ie_key()
2900                     break
2901             else:
2902                 return
2903         return '%s %s' % (extractor.lower(), video_id)
2904
2905     def in_download_archive(self, info_dict):
2906         fn = self.params.get('download_archive')
2907         if fn is None:
2908             return False
2909
2910         vid_id = self._make_archive_id(info_dict)
2911         if not vid_id:
2912             return False  # Incomplete video information
2913
2914         return vid_id in self.archive
2915
2916     def record_download_archive(self, info_dict):
2917         fn = self.params.get('download_archive')
2918         if fn is None:
2919             return
2920         vid_id = self._make_archive_id(info_dict)
2921         assert vid_id
2922         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
2923             archive_file.write(vid_id + '\n')
2924         self.archive.add(vid_id)
2925
2926     @staticmethod
2927     def format_resolution(format, default='unknown'):
2928         if format.get('vcodec') == 'none':
2929             if format.get('acodec') == 'none':
2930                 return 'images'
2931             return 'audio only'
2932         if format.get('resolution') is not None:
2933             return format['resolution']
2934         if format.get('width') and format.get('height'):
2935             res = '%dx%d' % (format['width'], format['height'])
2936         elif format.get('height'):
2937             res = '%sp' % format['height']
2938         elif format.get('width'):
2939             res = '%dx?' % format['width']
2940         else:
2941             res = default
2942         return res
2943
2944     def _format_note(self, fdict):
2945         res = ''
2946         if fdict.get('ext') in ['f4f', 'f4m']:
2947             res += '(unsupported) '
2948         if fdict.get('language'):
2949             if res:
2950                 res += ' '
2951             res += '[%s] ' % fdict['language']
2952         if fdict.get('format_note') is not None:
2953             res += fdict['format_note'] + ' '
2954         if fdict.get('tbr') is not None:
2955             res += '%4dk ' % fdict['tbr']
2956         if fdict.get('container') is not None:
2957             if res:
2958                 res += ', '
2959             res += '%s container' % fdict['container']
2960         if (fdict.get('vcodec') is not None
2961                 and fdict.get('vcodec') != 'none'):
2962             if res:
2963                 res += ', '
2964             res += fdict['vcodec']
2965             if fdict.get('vbr') is not None:
2966                 res += '@'
2967         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2968             res += 'video@'
2969         if fdict.get('vbr') is not None:
2970             res += '%4dk' % fdict['vbr']
2971         if fdict.get('fps') is not None:
2972             if res:
2973                 res += ', '
2974             res += '%sfps' % fdict['fps']
2975         if fdict.get('acodec') is not None:
2976             if res:
2977                 res += ', '
2978             if fdict['acodec'] == 'none':
2979                 res += 'video only'
2980             else:
2981                 res += '%-5s' % fdict['acodec']
2982         elif fdict.get('abr') is not None:
2983             if res:
2984                 res += ', '
2985             res += 'audio'
2986         if fdict.get('abr') is not None:
2987             res += '@%3dk' % fdict['abr']
2988         if fdict.get('asr') is not None:
2989             res += ' (%5dHz)' % fdict['asr']
2990         if fdict.get('filesize') is not None:
2991             if res:
2992                 res += ', '
2993             res += format_bytes(fdict['filesize'])
2994         elif fdict.get('filesize_approx') is not None:
2995             if res:
2996                 res += ', '
2997             res += '~' + format_bytes(fdict['filesize_approx'])
2998         return res
2999
3000     def _format_note_table(self, f):
3001         def join_fields(*vargs):
3002             return ', '.join((val for val in vargs if val != ''))
3003
3004         return join_fields(
3005             'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
3006             format_field(f, 'language', '[%s]'),
3007             format_field(f, 'format_note'),
3008             format_field(f, 'container', ignore=(None, f.get('ext'))),
3009             format_field(f, 'asr', '%5dHz'))
3010
3011     def list_formats(self, info_dict):
3012         formats = info_dict.get('formats', [info_dict])
3013         new_format = (
3014             'list-formats' not in self.params.get('compat_opts', [])
3015             and self.params.get('listformats_table', True) is not False)
3016         if new_format:
3017             table = [
3018                 [
3019                     format_field(f, 'format_id'),
3020                     format_field(f, 'ext'),
3021                     self.format_resolution(f),
3022                     format_field(f, 'fps', '%d'),
3023                     '|',
3024                     format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
3025                     format_field(f, 'tbr', '%4dk'),
3026                     shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
3027                     '|',
3028                     format_field(f, 'vcodec', default='unknown').replace('none', ''),
3029                     format_field(f, 'vbr', '%4dk'),
3030                     format_field(f, 'acodec', default='unknown').replace('none', ''),
3031                     format_field(f, 'abr', '%3dk'),
3032                     format_field(f, 'asr', '%5dHz'),
3033                     self._format_note_table(f)]
3034                 for f in formats
3035                 if f.get('preference') is None or f['preference'] >= -1000]
3036             header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', '  TBR', 'PROTO',
3037                            '|', 'VCODEC', '  VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']
3038         else:
3039             table = [
3040                 [
3041                     format_field(f, 'format_id'),
3042                     format_field(f, 'ext'),
3043                     self.format_resolution(f),
3044                     self._format_note(f)]
3045                 for f in formats
3046                 if f.get('preference') is None or f['preference'] >= -1000]
3047             header_line = ['format code', 'extension', 'resolution', 'note']
3048
3049         self.to_screen(
3050             '[info] Available formats for %s:' % info_dict['id'])
3051         self.to_stdout(render_table(
3052             header_line, table, delim=new_format, extraGap=(0 if new_format else 1), hideEmpty=new_format))
3053
3054     def list_thumbnails(self, info_dict):
3055         thumbnails = list(info_dict.get('thumbnails'))
3056         if not thumbnails:
3057             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3058             return
3059
3060         self.to_screen(
3061             '[info] Thumbnails for %s:' % info_dict['id'])
3062         self.to_stdout(render_table(
3063             ['ID', 'width', 'height', 'URL'],
3064             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
3065
3066     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3067         if not subtitles:
3068             self.to_screen('%s has no %s' % (video_id, name))
3069             return
3070         self.to_screen(
3071             'Available %s for %s:' % (name, video_id))
3072
3073         def _row(lang, formats):
3074             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3075             if len(set(names)) == 1:
3076                 names = [] if names[0] == 'unknown' else names[:1]
3077             return [lang, ', '.join(names), ', '.join(exts)]
3078
3079         self.to_stdout(render_table(
3080             ['Language', 'Name', 'Formats'],
3081             [_row(lang, formats) for lang, formats in subtitles.items()],
3082             hideEmpty=True))
3083
3084     def urlopen(self, req):
3085         """ Start an HTTP download """
3086         if isinstance(req, compat_basestring):
3087             req = sanitized_Request(req)
3088         return self._opener.open(req, timeout=self._socket_timeout)
3089
3090     def print_debug_header(self):
3091         if not self.params.get('verbose'):
3092             return
3093
3094         if type('') is not compat_str:
3095             # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
3096             self.report_warning(
3097                 'Your Python is broken! Update to a newer and supported version')
3098
3099         stdout_encoding = getattr(
3100             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
3101         encoding_str = (
3102             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
3103                 locale.getpreferredencoding(),
3104                 sys.getfilesystemencoding(),
3105                 stdout_encoding,
3106                 self.get_encoding()))
3107         write_string(encoding_str, encoding=None)
3108
3109         source = (
3110             '(exe)' if hasattr(sys, 'frozen')
3111             else '(zip)' if isinstance(globals().get('__loader__'), zipimporter)
3112             else '(source)' if os.path.basename(sys.argv[0]) == '__main__.py'
3113             else '')
3114         self._write_string('[debug] yt-dlp version %s %s\n' % (__version__, source))
3115         if _LAZY_LOADER:
3116             self._write_string('[debug] Lazy loading extractors enabled\n')
3117         if _PLUGIN_CLASSES:
3118             self._write_string(
3119                 '[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])
3120         if self.params.get('compat_opts'):
3121             self._write_string(
3122                 '[debug] Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts')))
3123         try:
3124             sp = subprocess.Popen(
3125                 ['git', 'rev-parse', '--short', 'HEAD'],
3126                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3127                 cwd=os.path.dirname(os.path.abspath(__file__)))
3128             out, err = process_communicate_or_kill(sp)
3129             out = out.decode().strip()
3130             if re.match('[0-9a-f]+', out):
3131                 self._write_string('[debug] Git HEAD: %s\n' % out)
3132         except Exception:
3133             try:
3134                 sys.exc_clear()
3135             except Exception:
3136                 pass
3137
3138         def python_implementation():
3139             impl_name = platform.python_implementation()
3140             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3141                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3142             return impl_name
3143
3144         self._write_string('[debug] Python version %s (%s %s) - %s\n' % (
3145             platform.python_version(),
3146             python_implementation(),
3147             platform.architecture()[0],
3148             platform_name()))
3149
3150         exe_versions = FFmpegPostProcessor.get_versions(self)
3151         exe_versions['rtmpdump'] = rtmpdump_version()
3152         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3153         exe_str = ', '.join(
3154             '%s %s' % (exe, v)
3155             for exe, v in sorted(exe_versions.items())
3156             if v
3157         )
3158         if not exe_str:
3159             exe_str = 'none'
3160         self._write_string('[debug] exe versions: %s\n' % exe_str)
3161
3162         proxy_map = {}
3163         for handler in self._opener.handlers:
3164             if hasattr(handler, 'proxies'):
3165                 proxy_map.update(handler.proxies)
3166         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
3167
3168         if self.params.get('call_home', False):
3169             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3170             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
3171             return
3172             latest_version = self.urlopen(
3173                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3174             if version_tuple(latest_version) > version_tuple(__version__):
3175                 self.report_warning(
3176                     'You are using an outdated version (newest version: %s)! '
3177                     'See https://yt-dl.org/update if you need help updating.' %
3178                     latest_version)
3179
3180     def _setup_opener(self):
3181         timeout_val = self.params.get('socket_timeout')
3182         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
3183
3184         opts_cookiefile = self.params.get('cookiefile')
3185         opts_proxy = self.params.get('proxy')
3186
3187         if opts_cookiefile is None:
3188             self.cookiejar = compat_cookiejar.CookieJar()
3189         else:
3190             opts_cookiefile = expand_path(opts_cookiefile)
3191             self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
3192             if os.access(opts_cookiefile, os.R_OK):
3193                 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
3194
3195         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3196         if opts_proxy is not None:
3197             if opts_proxy == '':
3198                 proxies = {}
3199             else:
3200                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3201         else:
3202             proxies = compat_urllib_request.getproxies()
3203             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3204             if 'http' in proxies and 'https' not in proxies:
3205                 proxies['https'] = proxies['http']
3206         proxy_handler = PerRequestProxyHandler(proxies)
3207
3208         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3209         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3210         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3211         redirect_handler = YoutubeDLRedirectHandler()
3212         data_handler = compat_urllib_request_DataHandler()
3213
3214         # When passing our own FileHandler instance, build_opener won't add the
3215         # default FileHandler and allows us to disable the file protocol, which
3216         # can be used for malicious purposes (see
3217         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3218         file_handler = compat_urllib_request.FileHandler()
3219
3220         def file_open(*args, **kwargs):
3221             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3222         file_handler.file_open = file_open
3223
3224         opener = compat_urllib_request.build_opener(
3225             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3226
3227         # Delete the default user-agent header, which would otherwise apply in
3228         # cases where our custom HTTP handler doesn't come into play
3229         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3230         opener.addheaders = []
3231         self._opener = opener
3232
3233     def encode(self, s):
3234         if isinstance(s, bytes):
3235             return s  # Already encoded
3236
3237         try:
3238             return s.encode(self.get_encoding())
3239         except UnicodeEncodeError as err:
3240             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3241             raise
3242
3243     def get_encoding(self):
3244         encoding = self.params.get('encoding')
3245         if encoding is None:
3246             encoding = preferredencoding()
3247         return encoding
3248
3249     def _write_thumbnails(self, info_dict, filename):  # return the extensions
3250         write_all = self.params.get('write_all_thumbnails', False)
3251         thumbnails = []
3252         if write_all or self.params.get('writethumbnail', False):
3253             thumbnails = info_dict.get('thumbnails') or []
3254         multiple = write_all and len(thumbnails) > 1
3255
3256         ret = []
3257         for t in thumbnails[::-1]:
3258             thumb_ext = determine_ext(t['url'], 'jpg')
3259             suffix = '%s.' % t['id'] if multiple else ''
3260             thumb_display_id = '%s ' % t['id'] if multiple else ''
3261             thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
3262
3263             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
3264                 ret.append(suffix + thumb_ext)
3265                 t['filepath'] = thumb_filename
3266                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
3267                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
3268             else:
3269                 self.to_screen('[%s] %s: Downloading thumbnail %s ...' %
3270                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
3271                 try:
3272                     uf = self.urlopen(t['url'])
3273                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3274                         shutil.copyfileobj(uf, thumbf)
3275                     ret.append(suffix + thumb_ext)
3276                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
3277                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
3278                     t['filepath'] = thumb_filename
3279                 except network_exceptions as err:
3280                     self.report_warning('Unable to download thumbnail "%s": %s' %
3281                                         (t['url'], error_to_compat_str(err)))
3282             if ret and not write_all:
3283                 break
3284         return ret