yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import sys
  23 import tempfile
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28
  29 from string import ascii_letters
  30 from zipimport import zipimporter
  31
  32 from .compat import (
  33     compat_basestring,
  34     compat_get_terminal_size,
  35     compat_kwargs,
  36     compat_numeric_types,
  37     compat_os_name,
  38     compat_str,
  39     compat_tokenize_tokenize,
  40     compat_urllib_error,
  41     compat_urllib_request,
  42     compat_urllib_request_DataHandler,
  43 )
  44 from .cookies import load_cookies
  45 from .utils import (
  46     age_restricted,
  47     args_to_str,
  48     ContentTooShortError,
  49     date_from_str,
  50     DateRange,
  51     DEFAULT_OUTTMPL,
  52     determine_ext,
  53     determine_protocol,
  54     DOT_DESKTOP_LINK_TEMPLATE,
  55     DOT_URL_LINK_TEMPLATE,
  56     DOT_WEBLOC_LINK_TEMPLATE,
  57     DownloadError,
  58     encode_compat_str,
  59     encodeFilename,
  60     EntryNotInPlaylist,
  61     error_to_compat_str,
  62     ExistingVideoReached,
  63     expand_path,
  64     ExtractorError,
  65     float_or_none,
  66     format_bytes,
  67     format_field,
  68     STR_FORMAT_RE,
  69     formatSeconds,
  70     GeoRestrictedError,
  71     HEADRequest,
  72     int_or_none,
  73     iri_to_uri,
  74     ISO3166Utils,
  75     LazyList,
  76     locked_file,
  77     make_dir,
  78     make_HTTPS_handler,
  79     MaxDownloadsReached,
  80     network_exceptions,
  81     orderedSet,
  82     OUTTMPL_TYPES,
  83     PagedList,
  84     parse_filesize,
  85     PerRequestProxyHandler,
  86     platform_name,
  87     PostProcessingError,
  88     preferredencoding,
  89     prepend_extension,
  90     process_communicate_or_kill,
  91     register_socks_protocols,
  92     RejectedVideoReached,
  93     render_table,
  94     replace_extension,
  95     SameFileError,
  96     sanitize_filename,
  97     sanitize_path,
  98     sanitize_url,
  99     sanitized_Request,
 100     std_headers,
 101     str_or_none,
 102     strftime_or_none,
 103     subtitles_filename,
 104     ThrottledDownload,
 105     to_high_limit_path,
 106     traverse_obj,
 107     try_get,
 108     UnavailableVideoError,
 109     url_basename,
 110     version_tuple,
 111     write_json_file,
 112     write_string,
 113     YoutubeDLCookieProcessor,
 114     YoutubeDLHandler,
 115     YoutubeDLRedirectHandler,
 116 )
 117 from .cache import Cache
 118 from .extractor import (
 119     gen_extractor_classes,
 120     get_info_extractor,
 121     _LAZY_LOADER,
 122     _PLUGIN_CLASSES
 123 )
 124 from .extractor.openload import PhantomJSwrapper
 125 from .downloader import (
 126     get_suitable_downloader,
 127     shorten_protocol_name
 128 )
 129 from .downloader.rtmp import rtmpdump_version
 130 from .postprocessor import (
 131     get_postprocessor,
 132     FFmpegFixupDurationPP,
 133     FFmpegFixupM3u8PP,
 134     FFmpegFixupM4aPP,
 135     FFmpegFixupStretchedPP,
 136     FFmpegFixupTimestampPP,
 137     FFmpegMergerPP,
 138     FFmpegPostProcessor,
 139     MoveFilesAfterDownloadPP,
 140 )
 141 from .version import __version__
 142
 143 if compat_os_name == 'nt':
 144     import ctypes
 145
 146
 147 class YoutubeDL(object):
 148     """YoutubeDL class.
 149
 150     YoutubeDL objects are the ones responsible of downloading the
 151     actual video file and writing it to disk if the user has requested
 152     it, among some other tasks. In most cases there should be one per
 153     program. As, given a video URL, the downloader doesn't know how to
 154     extract all the needed information, task that InfoExtractors do, it
 155     has to pass the URL to one of them.
 156
 157     For this, YoutubeDL objects have a method that allows
 158     InfoExtractors to be registered in a given order. When it is passed
 159     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 160     finds that reports being able to handle it. The InfoExtractor extracts
 161     all the information about the video or videos the URL refers to, and
 162     YoutubeDL process the extracted information, possibly using a File
 163     Downloader to download the video.
 164
 165     YoutubeDL objects accept a lot of parameters. In order not to saturate
 166     the object constructor with arguments, it receives a dictionary of
 167     options instead. These options are available through the params
 168     attribute for the InfoExtractors to use. The YoutubeDL also
 169     registers itself as the downloader in charge for the InfoExtractors
 170     that are added to it, so this is a "mutual registration".
 171
 172     Available options:
 173
 174     username:          Username for authentication purposes.
 175     password:          Password for authentication purposes.
 176     videopassword:     Password for accessing a video.
 177     ap_mso:            Adobe Pass multiple-system operator identifier.
 178     ap_username:       Multiple-system operator account username.
 179     ap_password:       Multiple-system operator account password.
 180     usenetrc:          Use netrc for authentication instead.
 181     verbose:           Print additional info to stdout.
 182     quiet:             Do not print messages to stdout.
 183     no_warnings:       Do not print out anything for warnings.
 184     forceprint:        A list of templates to force print
 185     forceurl:          Force printing final URL. (Deprecated)
 186     forcetitle:        Force printing title. (Deprecated)
 187     forceid:           Force printing ID. (Deprecated)
 188     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 189     forcedescription:  Force printing description. (Deprecated)
 190     forcefilename:     Force printing final filename. (Deprecated)
 191     forceduration:     Force printing duration. (Deprecated)
 192     forcejson:         Force printing info_dict as JSON.
 193     dump_single_json:  Force printing the info_dict of the whole playlist
 194                        (or video) as a single JSON line.
 195     force_write_download_archive: Force writing download archive regardless
 196                        of 'skip_download' or 'simulate'.
 197     simulate:          Do not download the video files.
 198     format:            Video format code. see "FORMAT SELECTION" for more details.
 199     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 200     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 201                        extracting metadata even if the video is not actually
 202                        available for download (experimental)
 203     format_sort:       How to sort the video formats. see "Sorting Formats"
 204                        for more details.
 205     format_sort_force: Force the given format_sort. see "Sorting Formats"
 206                        for more details.
 207     allow_multiple_video_streams:   Allow multiple video streams to be merged
 208                        into a single file
 209     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 210                        into a single file
 211     check_formats      Whether to test if the formats are downloadable.
 212                        Can be True (check all), False (check none)
 213                        or None (check only if requested by extractor)
 214     paths:             Dictionary of output paths. The allowed keys are 'home'
 215                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 216     outtmpl:           Dictionary of templates for output names. Allowed keys
 217                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 218                        A string a also accepted for backward compatibility
 219     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 220     restrictfilenames: Do not allow "&" and spaces in file names
 221     trim_file_name:    Limit length of filename (extension excluded)
 222     windowsfilenames:  Force the filenames to be windows compatible
 223     ignoreerrors:      Do not stop on download errors
 224                        (Default True when running yt-dlp,
 225                        but False when directly accessing YoutubeDL class)
 226     skip_playlist_after_errors: Number of allowed failures until the rest of
 227                        the playlist is skipped
 228     force_generic_extractor: Force downloader to use the generic extractor
 229     overwrites:        Overwrite all video and metadata files if True,
 230                        overwrite only non-video files if None
 231                        and don't overwrite any file if False
 232     playliststart:     Playlist item to start at.
 233     playlistend:       Playlist item to end at.
 234     playlist_items:    Specific indices of playlist to download.
 235     playlistreverse:   Download playlist items in reverse order.
 236     playlistrandom:    Download playlist items in random order.
 237     matchtitle:        Download only matching titles.
 238     rejecttitle:       Reject downloads for matching titles.
 239     logger:            Log messages to a logging.Logger instance.
 240     logtostderr:       Log messages to stderr instead of stdout.
 241     writedescription:  Write the video description to a .description file
 242     writeinfojson:     Write the video description to a .info.json file
 243     clean_infojson:    Remove private fields from the infojson
 244     writecomments:     Extract video comments. This will not be written to disk
 245                        unless writeinfojson is also given
 246     writeannotations:  Write the video annotations to a .annotations.xml file
 247     writethumbnail:    Write the thumbnail image to a file
 248     allow_playlist_files: Whether to write playlists' description, infojson etc
 249                        also to disk when using the 'write*' options
 250     write_all_thumbnails:  Write all thumbnail formats to files
 251     writelink:         Write an internet shortcut file, depending on the
 252                        current platform (.url/.webloc/.desktop)
 253     writeurllink:      Write a Windows internet shortcut file (.url)
 254     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 255     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 256     writesubtitles:    Write the video subtitles to a file
 257     writeautomaticsub: Write the automatically generated subtitles to a file
 258     allsubtitles:      Deprecated - Use subtitlelangs = ['all']
 259                        Downloads all the subtitles of the video
 260                        (requires writesubtitles or writeautomaticsub)
 261     listsubtitles:     Lists all available subtitles for the video
 262     subtitlesformat:   The format code for subtitles
 263     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 264                        The list may contain "all" to refer to all the available
 265                        subtitles. The language can be prefixed with a "-" to
 266                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 267     keepvideo:         Keep the video file after post-processing
 268     daterange:         A DateRange object, download only if the upload_date is in the range.
 269     skip_download:     Skip the actual download of the video file
 270     cachedir:          Location of the cache files in the filesystem.
 271                        False to disable filesystem cache.
 272     noplaylist:        Download single video instead of a playlist if in doubt.
 273     age_limit:         An integer representing the user's age in years.
 274                        Unsuitable videos for the given age are skipped.
 275     min_views:         An integer representing the minimum view count the video
 276                        must have in order to not be skipped.
 277                        Videos without view count information are always
 278                        downloaded. None for no limit.
 279     max_views:         An integer representing the maximum view count.
 280                        Videos that are more popular than that are not
 281                        downloaded.
 282                        Videos without view count information are always
 283                        downloaded. None for no limit.
 284     download_archive:  File name of a file where all downloads are recorded.
 285                        Videos already present in the file are not downloaded
 286                        again.
 287     break_on_existing: Stop the download process after attempting to download a
 288                        file that is in the archive.
 289     break_on_reject:   Stop the download process when encountering a video that
 290                        has been filtered out.
 291     cookiefile:        File name where cookies should be read from and dumped to
 292     cookiesfrombrowser: A tuple containing the name of the browser and the profile
 293                        name/path from where cookies are loaded.
 294                        Eg: ('chrome', ) or (vivaldi, 'default')
 295     nocheckcertificate:Do not verify SSL certificates
 296     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 297                        At the moment, this is only supported by YouTube.
 298     proxy:             URL of the proxy server to use
 299     geo_verification_proxy:  URL of the proxy to use for IP address verification
 300                        on geo-restricted sites.
 301     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 302     bidi_workaround:   Work around buggy terminals without bidirectional text
 303                        support, using fridibi
 304     debug_printtraffic:Print out sent and received HTTP traffic
 305     include_ads:       Download ads as well
 306     default_search:    Prepend this string if an input url is not valid.
 307                        'auto' for elaborate guessing
 308     encoding:          Use this encoding instead of the system-specified.
 309     extract_flat:      Do not resolve URLs, return the immediate result.
 310                        Pass in 'in_playlist' to only show this behavior for
 311                        playlist items.
 312     postprocessors:    A list of dictionaries, each with an entry
 313                        * key:  The name of the postprocessor. See
 314                                yt_dlp/postprocessor/__init__.py for a list.
 315                        * when: When to run the postprocessor. Can be one of
 316                                pre_process|before_dl|post_process|after_move.
 317                                Assumed to be 'post_process' if not given
 318     post_hooks:        A list of functions that get called as the final step
 319                        for each video file, after all postprocessors have been
 320                        called. The filename will be passed as the only argument.
 321     progress_hooks:    A list of functions that get called on download
 322                        progress, with a dictionary with the entries
 323                        * status: One of "downloading", "error", or "finished".
 324                                  Check this first and ignore unknown values.
 325
 326                        If status is one of "downloading", or "finished", the
 327                        following properties may also be present:
 328                        * filename: The final filename (always present)
 329                        * tmpfilename: The filename we're currently writing to
 330                        * downloaded_bytes: Bytes on disk
 331                        * total_bytes: Size of the whole file, None if unknown
 332                        * total_bytes_estimate: Guess of the eventual file size,
 333                                                None if unavailable.
 334                        * elapsed: The number of seconds since download started.
 335                        * eta: The estimated time in seconds, None if unknown
 336                        * speed: The download speed in bytes/second, None if
 337                                 unknown
 338                        * fragment_index: The counter of the currently
 339                                          downloaded video fragment.
 340                        * fragment_count: The number of fragments (= individual
 341                                          files that will be merged)
 342
 343                        Progress hooks are guaranteed to be called at least once
 344                        (with status "finished") if the download is successful.
 345     merge_output_format: Extension to use when merging formats.
 346     final_ext:         Expected final extension; used to detect when the file was
 347                        already downloaded and converted. "merge_output_format" is
 348                        replaced by this extension when given
 349     fixup:             Automatically correct known faults of the file.
 350                        One of:
 351                        - "never": do nothing
 352                        - "warn": only emit a warning
 353                        - "detect_or_warn": check whether we can do anything
 354                                            about it, warn otherwise (default)
 355     source_address:    Client-side IP address to bind to.
 356     call_home:         Boolean, true iff we are allowed to contact the
 357                        yt-dlp servers for debugging. (BROKEN)
 358     sleep_interval_requests: Number of seconds to sleep between requests
 359                        during extraction
 360     sleep_interval:    Number of seconds to sleep before each download when
 361                        used alone or a lower bound of a range for randomized
 362                        sleep before each download (minimum possible number
 363                        of seconds to sleep) when used along with
 364                        max_sleep_interval.
 365     max_sleep_interval:Upper bound of a range for randomized sleep before each
 366                        download (maximum possible number of seconds to sleep).
 367                        Must only be used along with sleep_interval.
 368                        Actual sleep time will be a random float from range
 369                        [sleep_interval; max_sleep_interval].
 370     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 371     listformats:       Print an overview of available video formats and exit.
 372     list_thumbnails:   Print a table of all thumbnails and exit.
 373     match_filter:      A function that gets called with the info_dict of
 374                        every video.
 375                        If it returns a message, the video is ignored.
 376                        If it returns None, the video is downloaded.
 377                        match_filter_func in utils.py is one example for this.
 378     no_color:          Do not emit color codes in output.
 379     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 380                        HTTP header
 381     geo_bypass_country:
 382                        Two-letter ISO 3166-2 country code that will be used for
 383                        explicit geographic restriction bypassing via faking
 384                        X-Forwarded-For HTTP header
 385     geo_bypass_ip_block:
 386                        IP range in CIDR notation that will be used similarly to
 387                        geo_bypass_country
 388
 389     The following options determine which downloader is picked:
 390     external_downloader: A dictionary of protocol keys and the executable of the
 391                        external downloader to use for it. The allowed protocols
 392                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 393                        Set the value to 'native' to use the native downloader
 394     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 395                        or {'m3u8': 'ffmpeg'} instead.
 396                        Use the native HLS downloader instead of ffmpeg/avconv
 397                        if True, otherwise use ffmpeg/avconv if False, otherwise
 398                        use downloader suggested by extractor if None.
 399     compat_opts:       Compatibility options. See "Differences in default behavior".
 400                        The following options do not work when used through the API:
 401                        filename, abort-on-error, multistreams, no-live-chat,
 402                        no-playlist-metafiles. Refer __init__.py for their implementation
 403
 404     The following parameters are not used by YoutubeDL itself, they are used by
 405     the downloader (see yt_dlp/downloader/common.py):
 406     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 407     max_filesize, test, noresizebuffer, retries, continuedl, noprogress, consoletitle,
 408     xattr_set_filesize, external_downloader_args, hls_use_mpegts, http_chunk_size.
 409
 410     The following options are used by the post processors:
 411     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 412                        otherwise prefer ffmpeg. (avconv support is deprecated)
 413     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 414                        to the binary or its containing directory.
 415     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 416                         and a list of additional command-line arguments for the
 417                         postprocessor/executable. The dict can also have "PP+EXE" keys
 418                         which are used when the given exe is used by the given PP.
 419                         Use 'default' as the name for arguments to passed to all PP
 420
 421     The following options are used by the extractors:
 422     extractor_retries: Number of times to retry for known errors
 423     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 424     hls_split_discontinuity: Split HLS playlists to different formats at
 425                        discontinuities such as ad breaks (default: False)
 426     extractor_args:    A dictionary of arguments to be passed to the extractors.
 427                        See "EXTRACTOR ARGUMENTS" for details.
 428                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 429     youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
 430                        If True (default), DASH manifests and related
 431                        data will be downloaded and processed by extractor.
 432                        You can reduce network I/O by disabling it if you don't
 433                        care about DASH. (only for youtube)
 434     youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
 435                        If True (default), HLS manifests and related
 436                        data will be downloaded and processed by extractor.
 437                        You can reduce network I/O by disabling it if you don't
 438                        care about HLS. (only for youtube)
 439     """
 440
 441     _NUMERIC_FIELDS = set((
 442         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 443         'timestamp', 'upload_year', 'upload_month', 'upload_day',
 444         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 445         'average_rating', 'comment_count', 'age_limit',
 446         'start_time', 'end_time',
 447         'chapter_number', 'season_number', 'episode_number',
 448         'track_number', 'disc_number', 'release_year',
 449         'playlist_index',
 450     ))
 451
 452     params = None
 453     _ies = []
 454     _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 455     _printed_messages = set()
 456     _first_webpage_request = True
 457     _download_retcode = None
 458     _num_downloads = None
 459     _playlist_level = 0
 460     _playlist_urls = set()
 461     _screen_file = None
 462
 463     def __init__(self, params=None, auto_init=True):
 464         """Create a FileDownloader object with the given options."""
 465         if params is None:
 466             params = {}
 467         self._ies = []
 468         self._ies_instances = {}
 469         self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 470         self._printed_messages = set()
 471         self._first_webpage_request = True
 472         self._post_hooks = []
 473         self._progress_hooks = []
 474         self._download_retcode = 0
 475         self._num_downloads = 0
 476         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 477         self._err_file = sys.stderr
 478         self.params = {
 479             # Default parameters
 480             'nocheckcertificate': False,
 481         }
 482         self.params.update(params)
 483         self.cache = Cache(self)
 484
 485         if sys.version_info < (3, 6):
 486             self.report_warning(
 487                 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 488
 489         def check_deprecated(param, option, suggestion):
 490             if self.params.get(param) is not None:
 491                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 492                 return True
 493             return False
 494
 495         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 496             if self.params.get('geo_verification_proxy') is None:
 497                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 498
 499         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 500         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 501         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 502
 503         for msg in self.params.get('warnings', []):
 504             self.report_warning(msg)
 505
 506         if self.params.get('final_ext'):
 507             if self.params.get('merge_output_format'):
 508                 self.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
 509             self.params['merge_output_format'] = self.params['final_ext']
 510
 511         if 'overwrites' in self.params and self.params['overwrites'] is None:
 512             del self.params['overwrites']
 513
 514         if params.get('bidi_workaround', False):
 515             try:
 516                 import pty
 517                 master, slave = pty.openpty()
 518                 width = compat_get_terminal_size().columns
 519                 if width is None:
 520                     width_args = []
 521                 else:
 522                     width_args = ['-w', str(width)]
 523                 sp_kwargs = dict(
 524                     stdin=subprocess.PIPE,
 525                     stdout=slave,
 526                     stderr=self._err_file)
 527                 try:
 528                     self._output_process = subprocess.Popen(
 529                         ['bidiv'] + width_args, **sp_kwargs
 530                     )
 531                 except OSError:
 532                     self._output_process = subprocess.Popen(
 533                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 534                 self._output_channel = os.fdopen(master, 'rb')
 535             except OSError as ose:
 536                 if ose.errno == errno.ENOENT:
 537                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 538                 else:
 539                     raise
 540
 541         if (sys.platform != 'win32'
 542                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 543                 and not params.get('restrictfilenames', False)):
 544             # Unicode filesystem API will throw errors (#1474, #13027)
 545             self.report_warning(
 546                 'Assuming --restrict-filenames since file system encoding '
 547                 'cannot encode all characters. '
 548                 'Set the LC_ALL environment variable to fix this.')
 549             self.params['restrictfilenames'] = True
 550
 551         self.outtmpl_dict = self.parse_outtmpl()
 552
 553         # Creating format selector here allows us to catch syntax errors before the extraction
 554         self.format_selector = (
 555             None if self.params.get('format') is None
 556             else self.build_format_selector(self.params['format']))
 557
 558         self._setup_opener()
 559
 560         """Preload the archive, if any is specified"""
 561         def preload_download_archive(fn):
 562             if fn is None:
 563                 return False
 564             self.write_debug('Loading archive file %r\n' % fn)
 565             try:
 566                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 567                     for line in archive_file:
 568                         self.archive.add(line.strip())
 569             except IOError as ioe:
 570                 if ioe.errno != errno.ENOENT:
 571                     raise
 572                 return False
 573             return True
 574
 575         self.archive = set()
 576         preload_download_archive(self.params.get('download_archive'))
 577
 578         if auto_init:
 579             self.print_debug_header()
 580             self.add_default_info_extractors()
 581
 582         for pp_def_raw in self.params.get('postprocessors', []):
 583             pp_def = dict(pp_def_raw)
 584             when = pp_def.pop('when', 'post_process')
 585             pp_class = get_postprocessor(pp_def.pop('key'))
 586             pp = pp_class(self, **compat_kwargs(pp_def))
 587             self.add_post_processor(pp, when=when)
 588
 589         for ph in self.params.get('post_hooks', []):
 590             self.add_post_hook(ph)
 591
 592         for ph in self.params.get('progress_hooks', []):
 593             self.add_progress_hook(ph)
 594
 595         register_socks_protocols()
 596
 597     def warn_if_short_id(self, argv):
 598         # short YouTube ID starting with dash?
 599         idxs = [
 600             i for i, a in enumerate(argv)
 601             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 602         if idxs:
 603             correct_argv = (
 604                 ['yt-dlp']
 605                 + [a for i, a in enumerate(argv) if i not in idxs]
 606                 + ['--'] + [argv[i] for i in idxs]
 607             )
 608             self.report_warning(
 609                 'Long argument string detected. '
 610                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 611                 args_to_str(correct_argv))
 612
 613     def add_info_extractor(self, ie):
 614         """Add an InfoExtractor object to the end of the list."""
 615         self._ies.append(ie)
 616         if not isinstance(ie, type):
 617             self._ies_instances[ie.ie_key()] = ie
 618             ie.set_downloader(self)
 619
 620     def get_info_extractor(self, ie_key):
 621         """
 622         Get an instance of an IE with name ie_key, it will try to get one from
 623         the _ies list, if there's no instance it will create a new one and add
 624         it to the extractor list.
 625         """
 626         ie = self._ies_instances.get(ie_key)
 627         if ie is None:
 628             ie = get_info_extractor(ie_key)()
 629             self.add_info_extractor(ie)
 630         return ie
 631
 632     def add_default_info_extractors(self):
 633         """
 634         Add the InfoExtractors returned by gen_extractors to the end of the list
 635         """
 636         for ie in gen_extractor_classes():
 637             self.add_info_extractor(ie)
 638
 639     def add_post_processor(self, pp, when='post_process'):
 640         """Add a PostProcessor object to the end of the chain."""
 641         self._pps[when].append(pp)
 642         pp.set_downloader(self)
 643
 644     def add_post_hook(self, ph):
 645         """Add the post hook"""
 646         self._post_hooks.append(ph)
 647
 648     def add_progress_hook(self, ph):
 649         """Add the progress hook (currently only for the file downloader)"""
 650         self._progress_hooks.append(ph)
 651
 652     def _bidi_workaround(self, message):
 653         if not hasattr(self, '_output_channel'):
 654             return message
 655
 656         assert hasattr(self, '_output_process')
 657         assert isinstance(message, compat_str)
 658         line_count = message.count('\n') + 1
 659         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 660         self._output_process.stdin.flush()
 661         res = ''.join(self._output_channel.readline().decode('utf-8')
 662                       for _ in range(line_count))
 663         return res[:-len('\n')]
 664
 665     def _write_string(self, message, out=None, only_once=False):
 666         if only_once:
 667             if message in self._printed_messages:
 668                 return
 669             self._printed_messages.add(message)
 670         write_string(message, out=out, encoding=self.params.get('encoding'))
 671
 672     def to_stdout(self, message, skip_eol=False, quiet=False):
 673         """Print message to stdout"""
 674         if self.params.get('logger'):
 675             self.params['logger'].debug(message)
 676         elif not quiet or self.params.get('verbose'):
 677             self._write_string(
 678                 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 679                 self._err_file if quiet else self._screen_file)
 680
 681     def to_stderr(self, message, only_once=False):
 682         """Print message to stderr"""
 683         assert isinstance(message, compat_str)
 684         if self.params.get('logger'):
 685             self.params['logger'].error(message)
 686         else:
 687             self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
 688
 689     def to_console_title(self, message):
 690         if not self.params.get('consoletitle', False):
 691             return
 692         if compat_os_name == 'nt':
 693             if ctypes.windll.kernel32.GetConsoleWindow():
 694                 # c_wchar_p() might not be necessary if `message` is
 695                 # already of type unicode()
 696                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 697         elif 'TERM' in os.environ:
 698             self._write_string('\033]0;%s\007' % message, self._screen_file)
 699
 700     def save_console_title(self):
 701         if not self.params.get('consoletitle', False):
 702             return
 703         if self.params.get('simulate', False):
 704             return
 705         if compat_os_name != 'nt' and 'TERM' in os.environ:
 706             # Save the title on stack
 707             self._write_string('\033[22;0t', self._screen_file)
 708
 709     def restore_console_title(self):
 710         if not self.params.get('consoletitle', False):
 711             return
 712         if self.params.get('simulate', False):
 713             return
 714         if compat_os_name != 'nt' and 'TERM' in os.environ:
 715             # Restore the title from stack
 716             self._write_string('\033[23;0t', self._screen_file)
 717
 718     def __enter__(self):
 719         self.save_console_title()
 720         return self
 721
 722     def __exit__(self, *args):
 723         self.restore_console_title()
 724
 725         if self.params.get('cookiefile') is not None:
 726             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 727
 728     def trouble(self, message=None, tb=None):
 729         """Determine action to take when a download problem appears.
 730
 731         Depending on if the downloader has been configured to ignore
 732         download errors or not, this method may throw an exception or
 733         not when errors are found, after printing the message.
 734
 735         tb, if given, is additional traceback information.
 736         """
 737         if message is not None:
 738             self.to_stderr(message)
 739         if self.params.get('verbose'):
 740             if tb is None:
 741                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 742                     tb = ''
 743                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 744                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 745                     tb += encode_compat_str(traceback.format_exc())
 746                 else:
 747                     tb_data = traceback.format_list(traceback.extract_stack())
 748                     tb = ''.join(tb_data)
 749             if tb:
 750                 self.to_stderr(tb)
 751         if not self.params.get('ignoreerrors', False):
 752             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 753                 exc_info = sys.exc_info()[1].exc_info
 754             else:
 755                 exc_info = sys.exc_info()
 756             raise DownloadError(message, exc_info)
 757         self._download_retcode = 1
 758
 759     def to_screen(self, message, skip_eol=False):
 760         """Print message to stdout if not in quiet mode"""
 761         self.to_stdout(
 762             message, skip_eol, quiet=self.params.get('quiet', False))
 763
 764     def report_warning(self, message, only_once=False):
 765         '''
 766         Print the message to stderr, it will be prefixed with 'WARNING:'
 767         If stderr is a tty file the 'WARNING:' will be colored
 768         '''
 769         if self.params.get('logger') is not None:
 770             self.params['logger'].warning(message)
 771         else:
 772             if self.params.get('no_warnings'):
 773                 return
 774             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 775                 _msg_header = '\033[0;33mWARNING:\033[0m'
 776             else:
 777                 _msg_header = 'WARNING:'
 778             warning_message = '%s %s' % (_msg_header, message)
 779             self.to_stderr(warning_message, only_once)
 780
 781     def report_error(self, message, tb=None):
 782         '''
 783         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 784         in red if stderr is a tty file.
 785         '''
 786         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 787             _msg_header = '\033[0;31mERROR:\033[0m'
 788         else:
 789             _msg_header = 'ERROR:'
 790         error_message = '%s %s' % (_msg_header, message)
 791         self.trouble(error_message, tb)
 792
 793     def write_debug(self, message, only_once=False):
 794         '''Log debug message or Print message to stderr'''
 795         if not self.params.get('verbose', False):
 796             return
 797         message = '[debug] %s' % message
 798         if self.params.get('logger'):
 799             self.params['logger'].debug(message)
 800         else:
 801             self.to_stderr(message, only_once)
 802
 803     def report_file_already_downloaded(self, file_name):
 804         """Report file has already been fully downloaded."""
 805         try:
 806             self.to_screen('[download] %s has already been downloaded' % file_name)
 807         except UnicodeEncodeError:
 808             self.to_screen('[download] The file has already been downloaded')
 809
 810     def report_file_delete(self, file_name):
 811         """Report that existing file will be deleted."""
 812         try:
 813             self.to_screen('Deleting existing file %s' % file_name)
 814         except UnicodeEncodeError:
 815             self.to_screen('Deleting existing file')
 816
 817     def parse_outtmpl(self):
 818         outtmpl_dict = self.params.get('outtmpl', {})
 819         if not isinstance(outtmpl_dict, dict):
 820             outtmpl_dict = {'default': outtmpl_dict}
 821         outtmpl_dict.update({
 822             k: v for k, v in DEFAULT_OUTTMPL.items()
 823             if not outtmpl_dict.get(k)})
 824         for key, val in outtmpl_dict.items():
 825             if isinstance(val, bytes):
 826                 self.report_warning(
 827                     'Parameter outtmpl is bytes, but should be a unicode string. '
 828                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 829         return outtmpl_dict
 830
 831     def get_output_path(self, dir_type='', filename=None):
 832         paths = self.params.get('paths', {})
 833         assert isinstance(paths, dict)
 834         path = os.path.join(
 835             expand_path(paths.get('home', '').strip()),
 836             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
 837             filename or '')
 838
 839         # Temporary fix for #4787
 840         # 'Treat' all problem characters by passing filename through preferredencoding
 841         # to workaround encoding issues with subprocess on python2 @ Windows
 842         if sys.version_info < (3, 0) and sys.platform == 'win32':
 843             path = encodeFilename(path, True).decode(preferredencoding())
 844         return sanitize_path(path, force=self.params.get('windowsfilenames'))
 845
 846     @staticmethod
 847     def validate_outtmpl(tmpl):
 848         ''' @return None or Exception object '''
 849         try:
 850             re.sub(
 851                 STR_FORMAT_RE.format(''),
 852                 lambda mobj: ('%' if not mobj.group('has_key') else '') + mobj.group(0),
 853                 tmpl
 854             ) % collections.defaultdict(int)
 855             return None
 856         except ValueError as err:
 857             return err
 858
 859     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
 860         """ Make the template and info_dict suitable for substitution (outtmpl % info_dict)"""
 861         info_dict = dict(info_dict)
 862         na = self.params.get('outtmpl_na_placeholder', 'NA')
 863
 864         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
 865             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
 866             if info_dict.get('duration', None) is not None
 867             else None)
 868         info_dict['epoch'] = int(time.time())
 869         info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
 870         if info_dict.get('resolution') is None:
 871             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
 872
 873         # For fields playlist_index and autonumber convert all occurrences
 874         # of %(field)s to %(field)0Nd for backward compatibility
 875         field_size_compat_map = {
 876             'playlist_index': len(str(info_dict.get('_last_playlist_index') or '')),
 877             'autonumber': self.params.get('autonumber_size') or 5,
 878         }
 879
 880         TMPL_DICT = {}
 881         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE.format('[^)]*'))
 882         MATH_FUNCTIONS = {
 883             '+': float.__add__,
 884             '-': float.__sub__,
 885         }
 886         # Field is of the form key1.key2...
 887         # where keys (except first) can be string, int or slice
 888         FIELD_RE = r'\w+(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
 889         MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
 890         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
 891         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
 892             (?P<negate>-)?
 893             (?P<fields>{field})
 894             (?P<maths>(?:{math_op}{math_field})*)
 895             (?:>(?P<strf_format>.+?))?
 896             (?:\|(?P<default>.*?))?
 897             $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
 898
 899         get_key = lambda k: traverse_obj(
 900             info_dict, k.split('.'), is_user_input=True, traverse_string=True)
 901
 902         def get_value(mdict):
 903             # Object traversal
 904             value = get_key(mdict['fields'])
 905             # Negative
 906             if mdict['negate']:
 907                 value = float_or_none(value)
 908                 if value is not None:
 909                     value *= -1
 910             # Do maths
 911             offset_key = mdict['maths']
 912             if offset_key:
 913                 value = float_or_none(value)
 914                 operator = None
 915                 while offset_key:
 916                     item = re.match(
 917                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
 918                         offset_key).group(0)
 919                     offset_key = offset_key[len(item):]
 920                     if operator is None:
 921                         operator = MATH_FUNCTIONS[item]
 922                         continue
 923                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
 924                     offset = float_or_none(item)
 925                     if offset is None:
 926                         offset = float_or_none(get_key(item))
 927                     try:
 928                         value = operator(value, multiplier * offset)
 929                     except (TypeError, ZeroDivisionError):
 930                         return None
 931                     operator = None
 932             # Datetime formatting
 933             if mdict['strf_format']:
 934                 value = strftime_or_none(value, mdict['strf_format'])
 935
 936             return value
 937
 938         def create_key(outer_mobj):
 939             if not outer_mobj.group('has_key'):
 940                 return '%{}'.format(outer_mobj.group(0))
 941
 942             key = outer_mobj.group('key')
 943             fmt = outer_mobj.group('format')
 944             mobj = re.match(INTERNAL_FORMAT_RE, key)
 945             if mobj is None:
 946                 value, default, mobj = None, na, {'fields': ''}
 947             else:
 948                 mobj = mobj.groupdict()
 949                 default = mobj['default'] if mobj['default'] is not None else na
 950                 value = get_value(mobj)
 951
 952             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
 953                 fmt = '0{:d}d'.format(field_size_compat_map[key])
 954
 955             value = default if value is None else value
 956
 957             if fmt == 'c':
 958                 value = compat_str(value)
 959                 if value is None:
 960                     value, fmt = default, 's'
 961                 else:
 962                     value = value[0]
 963             elif fmt[-1] not in 'rs':  # numeric
 964                 value = float_or_none(value)
 965                 if value is None:
 966                     value, fmt = default, 's'
 967             if sanitize:
 968                 if fmt[-1] == 'r':
 969                     # If value is an object, sanitize might convert it to a string
 970                     # So we convert it to repr first
 971                     value, fmt = repr(value), '%ss' % fmt[:-1]
 972                 if fmt[-1] in 'csr':
 973                     value = sanitize(mobj['fields'].split('.')[-1], value)
 974             key += '\0%s' % fmt
 975             TMPL_DICT[key] = value
 976             return '%({key}){fmt}'.format(key=key, fmt=fmt)
 977
 978         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
 979
 980     def _prepare_filename(self, info_dict, tmpl_type='default'):
 981         try:
 982             sanitize = lambda k, v: sanitize_filename(
 983                 compat_str(v),
 984                 restricted=self.params.get('restrictfilenames'),
 985                 is_id=(k == 'id' or k.endswith('_id')))
 986             outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
 987             outtmpl, template_dict = self.prepare_outtmpl(outtmpl, info_dict, sanitize)
 988
 989             # expand_path translates '%%' into '%' and '$$' into '$'
 990             # correspondingly that is not what we want since we need to keep
 991             # '%%' intact for template dict substitution step. Working around
 992             # with boundary-alike separator hack.
 993             sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 994             outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 995
 996             # outtmpl should be expand_path'ed before template dict substitution
 997             # because meta fields may contain env variables we don't want to
 998             # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 999             # title "Hello $PATH", we don't want `$PATH` to be expanded.
1000             filename = expand_path(outtmpl).replace(sep, '') % template_dict
1001
1002             force_ext = OUTTMPL_TYPES.get(tmpl_type)
1003             if force_ext is not None:
1004                 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1005
1006             # https://github.com/blackjack4494/youtube-dlc/issues/85
1007             trim_file_name = self.params.get('trim_file_name', False)
1008             if trim_file_name:
1009                 fn_groups = filename.rsplit('.')
1010                 ext = fn_groups[-1]
1011                 sub_ext = ''
1012                 if len(fn_groups) > 2:
1013                     sub_ext = fn_groups[-2]
1014                 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
1015
1016             return filename
1017         except ValueError as err:
1018             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1019             return None
1020
1021     def prepare_filename(self, info_dict, dir_type='', warn=False):
1022         """Generate the output filename."""
1023
1024         filename = self._prepare_filename(info_dict, dir_type or 'default')
1025
1026         if warn:
1027             if not self.params.get('paths'):
1028                 pass
1029             elif filename == '-':
1030                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1031             elif os.path.isabs(filename):
1032                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1033             self.__prepare_filename_warned = True
1034         if filename == '-' or not filename:
1035             return filename
1036
1037         return self.get_output_path(dir_type, filename)
1038
1039     def _match_entry(self, info_dict, incomplete=False, silent=False):
1040         """ Returns None if the file should be downloaded """
1041
1042         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1043
1044         def check_filter():
1045             if 'title' in info_dict:
1046                 # This can happen when we're just evaluating the playlist
1047                 title = info_dict['title']
1048                 matchtitle = self.params.get('matchtitle', False)
1049                 if matchtitle:
1050                     if not re.search(matchtitle, title, re.IGNORECASE):
1051                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1052                 rejecttitle = self.params.get('rejecttitle', False)
1053                 if rejecttitle:
1054                     if re.search(rejecttitle, title, re.IGNORECASE):
1055                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1056             date = info_dict.get('upload_date')
1057             if date is not None:
1058                 dateRange = self.params.get('daterange', DateRange())
1059                 if date not in dateRange:
1060                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1061             view_count = info_dict.get('view_count')
1062             if view_count is not None:
1063                 min_views = self.params.get('min_views')
1064                 if min_views is not None and view_count < min_views:
1065                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1066                 max_views = self.params.get('max_views')
1067                 if max_views is not None and view_count > max_views:
1068                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1069             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1070                 return 'Skipping "%s" because it is age restricted' % video_title
1071
1072             if not incomplete:
1073                 match_filter = self.params.get('match_filter')
1074                 if match_filter is not None:
1075                     ret = match_filter(info_dict)
1076                     if ret is not None:
1077                         return ret
1078             return None
1079
1080         if self.in_download_archive(info_dict):
1081             reason = '%s has already been recorded in the archive' % video_title
1082             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1083         else:
1084             reason = check_filter()
1085             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1086         if reason is not None:
1087             if not silent:
1088                 self.to_screen('[download] ' + reason)
1089             if self.params.get(break_opt, False):
1090                 raise break_err()
1091         return reason
1092
1093     @staticmethod
1094     def add_extra_info(info_dict, extra_info):
1095         '''Set the keys from extra_info in info dict if they are missing'''
1096         for key, value in extra_info.items():
1097             info_dict.setdefault(key, value)
1098
1099     def extract_info(self, url, download=True, ie_key=None, extra_info={},
1100                      process=True, force_generic_extractor=False):
1101         """
1102         Return a list with a dictionary for each video extracted.
1103
1104         Arguments:
1105         url -- URL to extract
1106
1107         Keyword arguments:
1108         download -- whether to download videos during extraction
1109         ie_key -- extractor key hint
1110         extra_info -- dictionary containing the extra values to add to each result
1111         process -- whether to resolve all unresolved references (URLs, playlist items),
1112             must be True for download to work.
1113         force_generic_extractor -- force using the generic extractor
1114         """
1115
1116         if not ie_key and force_generic_extractor:
1117             ie_key = 'Generic'
1118
1119         if ie_key:
1120             ies = [self.get_info_extractor(ie_key)]
1121         else:
1122             ies = self._ies
1123
1124         for ie in ies:
1125             if not ie.suitable(url):
1126                 continue
1127
1128             ie_key = ie.ie_key()
1129             ie = self.get_info_extractor(ie_key)
1130             if not ie.working():
1131                 self.report_warning('The program functionality for this site has been marked as broken, '
1132                                     'and will probably not work.')
1133
1134             try:
1135                 temp_id = str_or_none(
1136                     ie.extract_id(url) if callable(getattr(ie, 'extract_id', None))
1137                     else ie._match_id(url))
1138             except (AssertionError, IndexError, AttributeError):
1139                 temp_id = None
1140             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1141                 self.to_screen("[%s] %s: has already been recorded in archive" % (
1142                                ie_key, temp_id))
1143                 break
1144             return self.__extract_info(url, ie, download, extra_info, process)
1145         else:
1146             self.report_error('no suitable InfoExtractor for URL %s' % url)
1147
1148     def __handle_extraction_exceptions(func, handle_all_errors=True):
1149         def wrapper(self, *args, **kwargs):
1150             try:
1151                 return func(self, *args, **kwargs)
1152             except GeoRestrictedError as e:
1153                 msg = e.msg
1154                 if e.countries:
1155                     msg += '\nThis video is available in %s.' % ', '.join(
1156                         map(ISO3166Utils.short2full, e.countries))
1157                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1158                 self.report_error(msg)
1159             except ExtractorError as e:  # An error we somewhat expected
1160                 self.report_error(compat_str(e), e.format_traceback())
1161             except ThrottledDownload:
1162                 self.to_stderr('\r')
1163                 self.report_warning('The download speed is below throttle limit. Re-extracting data')
1164                 return wrapper(self, *args, **kwargs)
1165             except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
1166                 raise
1167             except Exception as e:
1168                 if handle_all_errors and self.params.get('ignoreerrors', False):
1169                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
1170                 else:
1171                     raise
1172         return wrapper
1173
1174     @__handle_extraction_exceptions
1175     def __extract_info(self, url, ie, download, extra_info, process):
1176         ie_result = ie.extract(url)
1177         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1178             return
1179         if isinstance(ie_result, list):
1180             # Backwards compatibility: old IE result format
1181             ie_result = {
1182                 '_type': 'compat_list',
1183                 'entries': ie_result,
1184             }
1185         if extra_info.get('original_url'):
1186             ie_result.setdefault('original_url', extra_info['original_url'])
1187         self.add_default_extra_info(ie_result, ie, url)
1188         if process:
1189             return self.process_ie_result(ie_result, download, extra_info)
1190         else:
1191             return ie_result
1192
1193     def add_default_extra_info(self, ie_result, ie, url):
1194         if url is not None:
1195             self.add_extra_info(ie_result, {
1196                 'webpage_url': url,
1197                 'original_url': url,
1198                 'webpage_url_basename': url_basename(url),
1199             })
1200         if ie is not None:
1201             self.add_extra_info(ie_result, {
1202                 'extractor': ie.IE_NAME,
1203                 'extractor_key': ie.ie_key(),
1204             })
1205
1206     def process_ie_result(self, ie_result, download=True, extra_info={}):
1207         """
1208         Take the result of the ie(may be modified) and resolve all unresolved
1209         references (URLs, playlist items).
1210
1211         It will also download the videos if 'download'.
1212         Returns the resolved ie_result.
1213         """
1214         result_type = ie_result.get('_type', 'video')
1215
1216         if result_type in ('url', 'url_transparent'):
1217             ie_result['url'] = sanitize_url(ie_result['url'])
1218             if ie_result.get('original_url'):
1219                 extra_info.setdefault('original_url', ie_result['original_url'])
1220
1221             extract_flat = self.params.get('extract_flat', False)
1222             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1223                     or extract_flat is True):
1224                 info_copy = ie_result.copy()
1225                 self.add_extra_info(info_copy, extra_info)
1226                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1227                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1228                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1229                 return ie_result
1230
1231         if result_type == 'video':
1232             self.add_extra_info(ie_result, extra_info)
1233             ie_result = self.process_video_result(ie_result, download=download)
1234             additional_urls = (ie_result or {}).get('additional_urls')
1235             if additional_urls:
1236                 # TODO: Improve MetadataFromFieldPP to allow setting a list
1237                 if isinstance(additional_urls, compat_str):
1238                     additional_urls = [additional_urls]
1239                 self.to_screen(
1240                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1241                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1242                 ie_result['additional_entries'] = [
1243                     self.extract_info(
1244                         url, download, extra_info,
1245                         force_generic_extractor=self.params.get('force_generic_extractor'))
1246                     for url in additional_urls
1247                 ]
1248             return ie_result
1249         elif result_type == 'url':
1250             # We have to add extra_info to the results because it may be
1251             # contained in a playlist
1252             return self.extract_info(
1253                 ie_result['url'], download,
1254                 ie_key=ie_result.get('ie_key'),
1255                 extra_info=extra_info)
1256         elif result_type == 'url_transparent':
1257             # Use the information from the embedding page
1258             info = self.extract_info(
1259                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1260                 extra_info=extra_info, download=False, process=False)
1261
1262             # extract_info may return None when ignoreerrors is enabled and
1263             # extraction failed with an error, don't crash and return early
1264             # in this case
1265             if not info:
1266                 return info
1267
1268             force_properties = dict(
1269                 (k, v) for k, v in ie_result.items() if v is not None)
1270             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1271                 if f in force_properties:
1272                     del force_properties[f]
1273             new_result = info.copy()
1274             new_result.update(force_properties)
1275
1276             # Extracted info may not be a video result (i.e.
1277             # info.get('_type', 'video') != video) but rather an url or
1278             # url_transparent. In such cases outer metadata (from ie_result)
1279             # should be propagated to inner one (info). For this to happen
1280             # _type of info should be overridden with url_transparent. This
1281             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1282             if new_result.get('_type') == 'url':
1283                 new_result['_type'] = 'url_transparent'
1284
1285             return self.process_ie_result(
1286                 new_result, download=download, extra_info=extra_info)
1287         elif result_type in ('playlist', 'multi_video'):
1288             # Protect from infinite recursion due to recursively nested playlists
1289             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1290             webpage_url = ie_result['webpage_url']
1291             if webpage_url in self._playlist_urls:
1292                 self.to_screen(
1293                     '[download] Skipping already downloaded playlist: %s'
1294                     % ie_result.get('title') or ie_result.get('id'))
1295                 return
1296
1297             self._playlist_level += 1
1298             self._playlist_urls.add(webpage_url)
1299             self._sanitize_thumbnails(ie_result)
1300             try:
1301                 return self.__process_playlist(ie_result, download)
1302             finally:
1303                 self._playlist_level -= 1
1304                 if not self._playlist_level:
1305                     self._playlist_urls.clear()
1306         elif result_type == 'compat_list':
1307             self.report_warning(
1308                 'Extractor %s returned a compat_list result. '
1309                 'It needs to be updated.' % ie_result.get('extractor'))
1310
1311             def _fixup(r):
1312                 self.add_extra_info(
1313                     r,
1314                     {
1315                         'extractor': ie_result['extractor'],
1316                         'webpage_url': ie_result['webpage_url'],
1317                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
1318                         'extractor_key': ie_result['extractor_key'],
1319                     }
1320                 )
1321                 return r
1322             ie_result['entries'] = [
1323                 self.process_ie_result(_fixup(r), download, extra_info)
1324                 for r in ie_result['entries']
1325             ]
1326             return ie_result
1327         else:
1328             raise Exception('Invalid result type: %s' % result_type)
1329
1330     def _ensure_dir_exists(self, path):
1331         return make_dir(path, self.report_error)
1332
1333     def __process_playlist(self, ie_result, download):
1334         # We process each entry in the playlist
1335         playlist = ie_result.get('title') or ie_result.get('id')
1336         self.to_screen('[download] Downloading playlist: %s' % playlist)
1337
1338         if 'entries' not in ie_result:
1339             raise EntryNotInPlaylist()
1340         incomplete_entries = bool(ie_result.get('requested_entries'))
1341         if incomplete_entries:
1342             def fill_missing_entries(entries, indexes):
1343                 ret = [None] * max(*indexes)
1344                 for i, entry in zip(indexes, entries):
1345                     ret[i - 1] = entry
1346                 return ret
1347             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1348
1349         playlist_results = []
1350
1351         playliststart = self.params.get('playliststart', 1)
1352         playlistend = self.params.get('playlistend')
1353         # For backwards compatibility, interpret -1 as whole list
1354         if playlistend == -1:
1355             playlistend = None
1356
1357         playlistitems_str = self.params.get('playlist_items')
1358         playlistitems = None
1359         if playlistitems_str is not None:
1360             def iter_playlistitems(format):
1361                 for string_segment in format.split(','):
1362                     if '-' in string_segment:
1363                         start, end = string_segment.split('-')
1364                         for item in range(int(start), int(end) + 1):
1365                             yield int(item)
1366                     else:
1367                         yield int(string_segment)
1368             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1369
1370         ie_entries = ie_result['entries']
1371         msg = (
1372             'Downloading %d videos' if not isinstance(ie_entries, list)
1373             else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
1374         if not isinstance(ie_entries, (list, PagedList)):
1375             ie_entries = LazyList(ie_entries)
1376
1377         def get_entry(i):
1378             return YoutubeDL.__handle_extraction_exceptions(
1379                 lambda self, i: ie_entries[i - 1],
1380                 False
1381             )(self, i)
1382
1383         entries = []
1384         for i in playlistitems or itertools.count(playliststart):
1385             if playlistitems is None and playlistend is not None and playlistend < i:
1386                 break
1387             entry = None
1388             try:
1389                 entry = get_entry(i)
1390                 if entry is None:
1391                     raise EntryNotInPlaylist()
1392             except (IndexError, EntryNotInPlaylist):
1393                 if incomplete_entries:
1394                     raise EntryNotInPlaylist()
1395                 elif not playlistitems:
1396                     break
1397             entries.append(entry)
1398             try:
1399                 if entry is not None:
1400                     self._match_entry(entry, incomplete=True, silent=True)
1401             except (ExistingVideoReached, RejectedVideoReached):
1402                 break
1403         ie_result['entries'] = entries
1404
1405         # Save playlist_index before re-ordering
1406         entries = [
1407             ((playlistitems[i - 1] if playlistitems else i), entry)
1408             for i, entry in enumerate(entries, 1)
1409             if entry is not None]
1410         n_entries = len(entries)
1411
1412         if not playlistitems and (playliststart or playlistend):
1413             playlistitems = list(range(playliststart, playliststart + n_entries))
1414         ie_result['requested_entries'] = playlistitems
1415
1416         if self.params.get('allow_playlist_files', True):
1417             ie_copy = {
1418                 'playlist': playlist,
1419                 'playlist_id': ie_result.get('id'),
1420                 'playlist_title': ie_result.get('title'),
1421                 'playlist_uploader': ie_result.get('uploader'),
1422                 'playlist_uploader_id': ie_result.get('uploader_id'),
1423                 'playlist_index': 0,
1424             }
1425             ie_copy.update(dict(ie_result))
1426
1427             if self.params.get('writeinfojson', False):
1428                 infofn = self.prepare_filename(ie_copy, 'pl_infojson')
1429                 if not self._ensure_dir_exists(encodeFilename(infofn)):
1430                     return
1431                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
1432                     self.to_screen('[info] Playlist metadata is already present')
1433                 else:
1434                     self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
1435                     try:
1436                         write_json_file(self.filter_requested_info(ie_result, self.params.get('clean_infojson', True)), infofn)
1437                     except (OSError, IOError):
1438                         self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
1439
1440             # TODO: This should be passed to ThumbnailsConvertor if necessary
1441             self._write_thumbnails(ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1442
1443             if self.params.get('writedescription', False):
1444                 descfn = self.prepare_filename(ie_copy, 'pl_description')
1445                 if not self._ensure_dir_exists(encodeFilename(descfn)):
1446                     return
1447                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
1448                     self.to_screen('[info] Playlist description is already present')
1449                 elif ie_result.get('description') is None:
1450                     self.report_warning('There\'s no playlist description to write.')
1451                 else:
1452                     try:
1453                         self.to_screen('[info] Writing playlist description to: ' + descfn)
1454                         with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1455                             descfile.write(ie_result['description'])
1456                     except (OSError, IOError):
1457                         self.report_error('Cannot write playlist description file ' + descfn)
1458                         return
1459
1460         if self.params.get('playlistreverse', False):
1461             entries = entries[::-1]
1462         if self.params.get('playlistrandom', False):
1463             random.shuffle(entries)
1464
1465         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1466
1467         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1468         failures = 0
1469         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1470         for i, entry_tuple in enumerate(entries, 1):
1471             playlist_index, entry = entry_tuple
1472             if 'playlist_index' in self.params.get('compat_options', []):
1473                 playlist_index = playlistitems[i - 1] if playlistitems else i
1474             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1475             # This __x_forwarded_for_ip thing is a bit ugly but requires
1476             # minimal changes
1477             if x_forwarded_for:
1478                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1479             extra = {
1480                 'n_entries': n_entries,
1481                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1482                 'playlist_index': playlist_index,
1483                 'playlist_autonumber': i,
1484                 'playlist': playlist,
1485                 'playlist_id': ie_result.get('id'),
1486                 'playlist_title': ie_result.get('title'),
1487                 'playlist_uploader': ie_result.get('uploader'),
1488                 'playlist_uploader_id': ie_result.get('uploader_id'),
1489                 'extractor': ie_result['extractor'],
1490                 'webpage_url': ie_result['webpage_url'],
1491                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1492                 'extractor_key': ie_result['extractor_key'],
1493             }
1494
1495             if self._match_entry(entry, incomplete=True) is not None:
1496                 continue
1497
1498             entry_result = self.__process_iterable_entry(entry, download, extra)
1499             if not entry_result:
1500                 failures += 1
1501             if failures >= max_failures:
1502                 self.report_error(
1503                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1504                 break
1505             # TODO: skip failed (empty) entries?
1506             playlist_results.append(entry_result)
1507         ie_result['entries'] = playlist_results
1508         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1509         return ie_result
1510
1511     @__handle_extraction_exceptions
1512     def __process_iterable_entry(self, entry, download, extra_info):
1513         return self.process_ie_result(
1514             entry, download=download, extra_info=extra_info)
1515
1516     def _build_format_filter(self, filter_spec):
1517         " Returns a function to filter the formats according to the filter_spec "
1518
1519         OPERATORS = {
1520             '<': operator.lt,
1521             '<=': operator.le,
1522             '>': operator.gt,
1523             '>=': operator.ge,
1524             '=': operator.eq,
1525             '!=': operator.ne,
1526         }
1527         operator_rex = re.compile(r'''(?x)\s*
1528             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1529             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1530             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1531             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1532         m = operator_rex.fullmatch(filter_spec)
1533         if m:
1534             try:
1535                 comparison_value = int(m.group('value'))
1536             except ValueError:
1537                 comparison_value = parse_filesize(m.group('value'))
1538                 if comparison_value is None:
1539                     comparison_value = parse_filesize(m.group('value') + 'B')
1540                 if comparison_value is None:
1541                     raise ValueError(
1542                         'Invalid value %r in format specification %r' % (
1543                             m.group('value'), filter_spec))
1544             op = OPERATORS[m.group('op')]
1545
1546         if not m:
1547             STR_OPERATORS = {
1548                 '=': operator.eq,
1549                 '^=': lambda attr, value: attr.startswith(value),
1550                 '$=': lambda attr, value: attr.endswith(value),
1551                 '*=': lambda attr, value: value in attr,
1552             }
1553             str_operator_rex = re.compile(r'''(?x)\s*
1554                 (?P<key>[a-zA-Z0-9._-]+)\s*
1555                 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1556                 (?P<value>[a-zA-Z0-9._-]+)\s*
1557                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1558             m = str_operator_rex.fullmatch(filter_spec)
1559             if m:
1560                 comparison_value = m.group('value')
1561                 str_op = STR_OPERATORS[m.group('op')]
1562                 if m.group('negation'):
1563                     op = lambda attr, value: not str_op(attr, value)
1564                 else:
1565                     op = str_op
1566
1567         if not m:
1568             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1569
1570         def _filter(f):
1571             actual_value = f.get(m.group('key'))
1572             if actual_value is None:
1573                 return m.group('none_inclusive')
1574             return op(actual_value, comparison_value)
1575         return _filter
1576
1577     def _default_format_spec(self, info_dict, download=True):
1578
1579         def can_merge():
1580             merger = FFmpegMergerPP(self)
1581             return merger.available and merger.can_merge()
1582
1583         prefer_best = (
1584             not self.params.get('simulate', False)
1585             and download
1586             and (
1587                 not can_merge()
1588                 or info_dict.get('is_live', False)
1589                 or self.outtmpl_dict['default'] == '-'))
1590         compat = (
1591             prefer_best
1592             or self.params.get('allow_multiple_audio_streams', False)
1593             or 'format-spec' in self.params.get('compat_opts', []))
1594
1595         return (
1596             'best/bestvideo+bestaudio' if prefer_best
1597             else 'bestvideo*+bestaudio/best' if not compat
1598             else 'bestvideo+bestaudio/best')
1599
1600     def build_format_selector(self, format_spec):
1601         def syntax_error(note, start):
1602             message = (
1603                 'Invalid format specification: '
1604                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1605             return SyntaxError(message)
1606
1607         PICKFIRST = 'PICKFIRST'
1608         MERGE = 'MERGE'
1609         SINGLE = 'SINGLE'
1610         GROUP = 'GROUP'
1611         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1612
1613         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1614                                   'video': self.params.get('allow_multiple_video_streams', False)}
1615
1616         check_formats = self.params.get('check_formats')
1617
1618         def _parse_filter(tokens):
1619             filter_parts = []
1620             for type, string, start, _, _ in tokens:
1621                 if type == tokenize.OP and string == ']':
1622                     return ''.join(filter_parts)
1623                 else:
1624                     filter_parts.append(string)
1625
1626         def _remove_unused_ops(tokens):
1627             # Remove operators that we don't use and join them with the surrounding strings
1628             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1629             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1630             last_string, last_start, last_end, last_line = None, None, None, None
1631             for type, string, start, end, line in tokens:
1632                 if type == tokenize.OP and string == '[':
1633                     if last_string:
1634                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1635                         last_string = None
1636                     yield type, string, start, end, line
1637                     # everything inside brackets will be handled by _parse_filter
1638                     for type, string, start, end, line in tokens:
1639                         yield type, string, start, end, line
1640                         if type == tokenize.OP and string == ']':
1641                             break
1642                 elif type == tokenize.OP and string in ALLOWED_OPS:
1643                     if last_string:
1644                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1645                         last_string = None
1646                     yield type, string, start, end, line
1647                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1648                     if not last_string:
1649                         last_string = string
1650                         last_start = start
1651                         last_end = end
1652                     else:
1653                         last_string += string
1654             if last_string:
1655                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1656
1657         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1658             selectors = []
1659             current_selector = None
1660             for type, string, start, _, _ in tokens:
1661                 # ENCODING is only defined in python 3.x
1662                 if type == getattr(tokenize, 'ENCODING', None):
1663                     continue
1664                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1665                     current_selector = FormatSelector(SINGLE, string, [])
1666                 elif type == tokenize.OP:
1667                     if string == ')':
1668                         if not inside_group:
1669                             # ')' will be handled by the parentheses group
1670                             tokens.restore_last_token()
1671                         break
1672                     elif inside_merge and string in ['/', ',']:
1673                         tokens.restore_last_token()
1674                         break
1675                     elif inside_choice and string == ',':
1676                         tokens.restore_last_token()
1677                         break
1678                     elif string == ',':
1679                         if not current_selector:
1680                             raise syntax_error('"," must follow a format selector', start)
1681                         selectors.append(current_selector)
1682                         current_selector = None
1683                     elif string == '/':
1684                         if not current_selector:
1685                             raise syntax_error('"/" must follow a format selector', start)
1686                         first_choice = current_selector
1687                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1688                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1689                     elif string == '[':
1690                         if not current_selector:
1691                             current_selector = FormatSelector(SINGLE, 'best', [])
1692                         format_filter = _parse_filter(tokens)
1693                         current_selector.filters.append(format_filter)
1694                     elif string == '(':
1695                         if current_selector:
1696                             raise syntax_error('Unexpected "("', start)
1697                         group = _parse_format_selection(tokens, inside_group=True)
1698                         current_selector = FormatSelector(GROUP, group, [])
1699                     elif string == '+':
1700                         if not current_selector:
1701                             raise syntax_error('Unexpected "+"', start)
1702                         selector_1 = current_selector
1703                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1704                         if not selector_2:
1705                             raise syntax_error('Expected a selector', start)
1706                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1707                     else:
1708                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1709                 elif type == tokenize.ENDMARKER:
1710                     break
1711             if current_selector:
1712                 selectors.append(current_selector)
1713             return selectors
1714
1715         def _merge(formats_pair):
1716             format_1, format_2 = formats_pair
1717
1718             formats_info = []
1719             formats_info.extend(format_1.get('requested_formats', (format_1,)))
1720             formats_info.extend(format_2.get('requested_formats', (format_2,)))
1721
1722             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1723                 get_no_more = {'video': False, 'audio': False}
1724                 for (i, fmt_info) in enumerate(formats_info):
1725                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
1726                         formats_info.pop(i)
1727                         continue
1728                     for aud_vid in ['audio', 'video']:
1729                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1730                             if get_no_more[aud_vid]:
1731                                 formats_info.pop(i)
1732                             get_no_more[aud_vid] = True
1733
1734             if len(formats_info) == 1:
1735                 return formats_info[0]
1736
1737             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1738             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1739
1740             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1741             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1742
1743             output_ext = self.params.get('merge_output_format')
1744             if not output_ext:
1745                 if the_only_video:
1746                     output_ext = the_only_video['ext']
1747                 elif the_only_audio and not video_fmts:
1748                     output_ext = the_only_audio['ext']
1749                 else:
1750                     output_ext = 'mkv'
1751
1752             new_dict = {
1753                 'requested_formats': formats_info,
1754                 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1755                 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1756                 'ext': output_ext,
1757             }
1758
1759             if the_only_video:
1760                 new_dict.update({
1761                     'width': the_only_video.get('width'),
1762                     'height': the_only_video.get('height'),
1763                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1764                     'fps': the_only_video.get('fps'),
1765                     'vcodec': the_only_video.get('vcodec'),
1766                     'vbr': the_only_video.get('vbr'),
1767                     'stretched_ratio': the_only_video.get('stretched_ratio'),
1768                 })
1769
1770             if the_only_audio:
1771                 new_dict.update({
1772                     'acodec': the_only_audio.get('acodec'),
1773                     'abr': the_only_audio.get('abr'),
1774                 })
1775
1776             return new_dict
1777
1778         def _check_formats(formats):
1779             if not check_formats:
1780                 yield from formats
1781                 return
1782             for f in formats:
1783                 self.to_screen('[info] Testing format %s' % f['format_id'])
1784                 temp_file = tempfile.NamedTemporaryFile(
1785                     suffix='.tmp', delete=False,
1786                     dir=self.get_output_path('temp') or None)
1787                 temp_file.close()
1788                 try:
1789                     success, _ = self.dl(temp_file.name, f, test=True)
1790                 except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1791                     success = False
1792                 finally:
1793                     if os.path.exists(temp_file.name):
1794                         try:
1795                             os.remove(temp_file.name)
1796                         except OSError:
1797                             self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1798                 if success:
1799                     yield f
1800                 else:
1801                     self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1802
1803         def _build_selector_function(selector):
1804             if isinstance(selector, list):  # ,
1805                 fs = [_build_selector_function(s) for s in selector]
1806
1807                 def selector_function(ctx):
1808                     for f in fs:
1809                         yield from f(ctx)
1810                 return selector_function
1811
1812             elif selector.type == GROUP:  # ()
1813                 selector_function = _build_selector_function(selector.selector)
1814
1815             elif selector.type == PICKFIRST:  # /
1816                 fs = [_build_selector_function(s) for s in selector.selector]
1817
1818                 def selector_function(ctx):
1819                     for f in fs:
1820                         picked_formats = list(f(ctx))
1821                         if picked_formats:
1822                             return picked_formats
1823                     return []
1824
1825             elif selector.type == MERGE:  # +
1826                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1827
1828                 def selector_function(ctx):
1829                     for pair in itertools.product(
1830                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1831                         yield _merge(pair)
1832
1833             elif selector.type == SINGLE:  # atom
1834                 format_spec = selector.selector or 'best'
1835
1836                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
1837                 if format_spec == 'all':
1838                     def selector_function(ctx):
1839                         yield from _check_formats(ctx['formats'])
1840                 elif format_spec == 'mergeall':
1841                     def selector_function(ctx):
1842                         formats = list(_check_formats(ctx['formats']))
1843                         if not formats:
1844                             return
1845                         merged_format = formats[-1]
1846                         for f in formats[-2::-1]:
1847                             merged_format = _merge((merged_format, f))
1848                         yield merged_format
1849
1850                 else:
1851                     format_fallback, format_reverse, format_idx = False, True, 1
1852                     mobj = re.match(
1853                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
1854                         format_spec)
1855                     if mobj is not None:
1856                         format_idx = int_or_none(mobj.group('n'), default=1)
1857                         format_reverse = mobj.group('bw')[0] == 'b'
1858                         format_type = (mobj.group('type') or [None])[0]
1859                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
1860                         format_modified = mobj.group('mod') is not None
1861
1862                         format_fallback = not format_type and not format_modified  # for b, w
1863                         _filter_f = (
1864                             (lambda f: f.get('%scodec' % format_type) != 'none')
1865                             if format_type and format_modified  # bv*, ba*, wv*, wa*
1866                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
1867                             if format_type  # bv, ba, wv, wa
1868                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1869                             if not format_modified  # b, w
1870                             else lambda f: True)  # b*, w*
1871                         filter_f = lambda f: _filter_f(f) and (
1872                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
1873                     else:
1874                         filter_f = ((lambda f: f.get('ext') == format_spec)
1875                                     if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']  # extension
1876                                     else (lambda f: f.get('format_id') == format_spec))  # id
1877
1878                     def selector_function(ctx):
1879                         formats = list(ctx['formats'])
1880                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
1881                         if format_fallback and ctx['incomplete_formats'] and not matches:
1882                             # for extractors with incomplete formats (audio only (soundcloud)
1883                             # or video only (imgur)) best/worst will fallback to
1884                             # best/worst {video,audio}-only format
1885                             matches = formats
1886                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
1887                         try:
1888                             yield matches[format_idx - 1]
1889                         except IndexError:
1890                             return
1891
1892             filters = [self._build_format_filter(f) for f in selector.filters]
1893
1894             def final_selector(ctx):
1895                 ctx_copy = copy.deepcopy(ctx)
1896                 for _filter in filters:
1897                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1898                 return selector_function(ctx_copy)
1899             return final_selector
1900
1901         stream = io.BytesIO(format_spec.encode('utf-8'))
1902         try:
1903             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1904         except tokenize.TokenError:
1905             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1906
1907         class TokenIterator(object):
1908             def __init__(self, tokens):
1909                 self.tokens = tokens
1910                 self.counter = 0
1911
1912             def __iter__(self):
1913                 return self
1914
1915             def __next__(self):
1916                 if self.counter >= len(self.tokens):
1917                     raise StopIteration()
1918                 value = self.tokens[self.counter]
1919                 self.counter += 1
1920                 return value
1921
1922             next = __next__
1923
1924             def restore_last_token(self):
1925                 self.counter -= 1
1926
1927         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1928         return _build_selector_function(parsed_selector)
1929
1930     def _calc_headers(self, info_dict):
1931         res = std_headers.copy()
1932
1933         add_headers = info_dict.get('http_headers')
1934         if add_headers:
1935             res.update(add_headers)
1936
1937         cookies = self._calc_cookies(info_dict)
1938         if cookies:
1939             res['Cookie'] = cookies
1940
1941         if 'X-Forwarded-For' not in res:
1942             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1943             if x_forwarded_for_ip:
1944                 res['X-Forwarded-For'] = x_forwarded_for_ip
1945
1946         return res
1947
1948     def _calc_cookies(self, info_dict):
1949         pr = sanitized_Request(info_dict['url'])
1950         self.cookiejar.add_cookie_header(pr)
1951         return pr.get_header('Cookie')
1952
1953     def _sanitize_thumbnails(self, info_dict):
1954         thumbnails = info_dict.get('thumbnails')
1955         if thumbnails is None:
1956             thumbnail = info_dict.get('thumbnail')
1957             if thumbnail:
1958                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1959         if thumbnails:
1960             thumbnails.sort(key=lambda t: (
1961                 t.get('preference') if t.get('preference') is not None else -1,
1962                 t.get('width') if t.get('width') is not None else -1,
1963                 t.get('height') if t.get('height') is not None else -1,
1964                 t.get('id') if t.get('id') is not None else '',
1965                 t.get('url')))
1966
1967             def thumbnail_tester():
1968                 if self.params.get('check_formats'):
1969                     test_all = True
1970                     to_screen = lambda msg: self.to_screen(f'[info] {msg}')
1971                 else:
1972                     test_all = False
1973                     to_screen = self.write_debug
1974
1975                 def test_thumbnail(t):
1976                     if not test_all and not t.get('_test_url'):
1977                         return True
1978                     to_screen('Testing thumbnail %s' % t['id'])
1979                     try:
1980                         self.urlopen(HEADRequest(t['url']))
1981                     except network_exceptions as err:
1982                         to_screen('Unable to connect to thumbnail %s URL "%s" - %s. Skipping...' % (
1983                             t['id'], t['url'], error_to_compat_str(err)))
1984                         return False
1985                     return True
1986
1987                 return test_thumbnail
1988
1989             for i, t in enumerate(thumbnails):
1990                 if t.get('id') is None:
1991                     t['id'] = '%d' % i
1992                 if t.get('width') and t.get('height'):
1993                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1994                 t['url'] = sanitize_url(t['url'])
1995
1996             if self.params.get('check_formats') is not False:
1997                 info_dict['thumbnails'] = LazyList(filter(thumbnail_tester(), thumbnails[::-1])).reverse()
1998             else:
1999                 info_dict['thumbnails'] = thumbnails
2000
2001     def process_video_result(self, info_dict, download=True):
2002         assert info_dict.get('_type', 'video') == 'video'
2003
2004         if 'id' not in info_dict:
2005             raise ExtractorError('Missing "id" field in extractor result')
2006         if 'title' not in info_dict:
2007             raise ExtractorError('Missing "title" field in extractor result')
2008
2009         def report_force_conversion(field, field_not, conversion):
2010             self.report_warning(
2011                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2012                 % (field, field_not, conversion))
2013
2014         def sanitize_string_field(info, string_field):
2015             field = info.get(string_field)
2016             if field is None or isinstance(field, compat_str):
2017                 return
2018             report_force_conversion(string_field, 'a string', 'string')
2019             info[string_field] = compat_str(field)
2020
2021         def sanitize_numeric_fields(info):
2022             for numeric_field in self._NUMERIC_FIELDS:
2023                 field = info.get(numeric_field)
2024                 if field is None or isinstance(field, compat_numeric_types):
2025                     continue
2026                 report_force_conversion(numeric_field, 'numeric', 'int')
2027                 info[numeric_field] = int_or_none(field)
2028
2029         sanitize_string_field(info_dict, 'id')
2030         sanitize_numeric_fields(info_dict)
2031
2032         if 'playlist' not in info_dict:
2033             # It isn't part of a playlist
2034             info_dict['playlist'] = None
2035             info_dict['playlist_index'] = None
2036
2037         self._sanitize_thumbnails(info_dict)
2038
2039         thumbnail = info_dict.get('thumbnail')
2040         thumbnails = info_dict.get('thumbnails')
2041         if thumbnail:
2042             info_dict['thumbnail'] = sanitize_url(thumbnail)
2043         elif thumbnails:
2044             info_dict['thumbnail'] = thumbnails[-1]['url']
2045
2046         if info_dict.get('display_id') is None and 'id' in info_dict:
2047             info_dict['display_id'] = info_dict['id']
2048
2049         for ts_key, date_key in (
2050                 ('timestamp', 'upload_date'),
2051                 ('release_timestamp', 'release_date'),
2052         ):
2053             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2054                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2055                 # see http://bugs.python.org/issue1646728)
2056                 try:
2057                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2058                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2059                 except (ValueError, OverflowError, OSError):
2060                     pass
2061
2062         live_keys = ('is_live', 'was_live')
2063         live_status = info_dict.get('live_status')
2064         if live_status is None:
2065             for key in live_keys:
2066                 if info_dict.get(key) is False:
2067                     continue
2068                 if info_dict.get(key):
2069                     live_status = key
2070                 break
2071             if all(info_dict.get(key) is False for key in live_keys):
2072                 live_status = 'not_live'
2073         if live_status:
2074             info_dict['live_status'] = live_status
2075             for key in live_keys:
2076                 if info_dict.get(key) is None:
2077                     info_dict[key] = (live_status == key)
2078
2079         # Auto generate title fields corresponding to the *_number fields when missing
2080         # in order to always have clean titles. This is very common for TV series.
2081         for field in ('chapter', 'season', 'episode'):
2082             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2083                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2084
2085         for cc_kind in ('subtitles', 'automatic_captions'):
2086             cc = info_dict.get(cc_kind)
2087             if cc:
2088                 for _, subtitle in cc.items():
2089                     for subtitle_format in subtitle:
2090                         if subtitle_format.get('url'):
2091                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2092                         if subtitle_format.get('ext') is None:
2093                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2094
2095         automatic_captions = info_dict.get('automatic_captions')
2096         subtitles = info_dict.get('subtitles')
2097
2098         info_dict['requested_subtitles'] = self.process_subtitles(
2099             info_dict['id'], subtitles, automatic_captions)
2100
2101         # We now pick which formats have to be downloaded
2102         if info_dict.get('formats') is None:
2103             # There's only one format available
2104             formats = [info_dict]
2105         else:
2106             formats = info_dict['formats']
2107
2108         if not formats:
2109             if not self.params.get('ignore_no_formats_error'):
2110                 raise ExtractorError('No video formats found!')
2111             else:
2112                 self.report_warning('No video formats found!')
2113
2114         def is_wellformed(f):
2115             url = f.get('url')
2116             if not url:
2117                 self.report_warning(
2118                     '"url" field is missing or empty - skipping format, '
2119                     'there is an error in extractor')
2120                 return False
2121             if isinstance(url, bytes):
2122                 sanitize_string_field(f, 'url')
2123             return True
2124
2125         # Filter out malformed formats for better extraction robustness
2126         formats = list(filter(is_wellformed, formats))
2127
2128         formats_dict = {}
2129
2130         # We check that all the formats have the format and format_id fields
2131         for i, format in enumerate(formats):
2132             sanitize_string_field(format, 'format_id')
2133             sanitize_numeric_fields(format)
2134             format['url'] = sanitize_url(format['url'])
2135             if not format.get('format_id'):
2136                 format['format_id'] = compat_str(i)
2137             else:
2138                 # Sanitize format_id from characters used in format selector expression
2139                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2140             format_id = format['format_id']
2141             if format_id not in formats_dict:
2142                 formats_dict[format_id] = []
2143             formats_dict[format_id].append(format)
2144
2145         # Make sure all formats have unique format_id
2146         for format_id, ambiguous_formats in formats_dict.items():
2147             if len(ambiguous_formats) > 1:
2148                 for i, format in enumerate(ambiguous_formats):
2149                     format['format_id'] = '%s-%d' % (format_id, i)
2150
2151         for i, format in enumerate(formats):
2152             if format.get('format') is None:
2153                 format['format'] = '{id} - {res}{note}'.format(
2154                     id=format['format_id'],
2155                     res=self.format_resolution(format),
2156                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
2157                 )
2158             # Automatically determine file extension if missing
2159             if format.get('ext') is None:
2160                 format['ext'] = determine_ext(format['url']).lower()
2161             # Automatically determine protocol if missing (useful for format
2162             # selection purposes)
2163             if format.get('protocol') is None:
2164                 format['protocol'] = determine_protocol(format)
2165             # Add HTTP headers, so that external programs can use them from the
2166             # json output
2167             full_format_info = info_dict.copy()
2168             full_format_info.update(format)
2169             format['http_headers'] = self._calc_headers(full_format_info)
2170         # Remove private housekeeping stuff
2171         if '__x_forwarded_for_ip' in info_dict:
2172             del info_dict['__x_forwarded_for_ip']
2173
2174         # TODO Central sorting goes here
2175
2176         if formats and formats[0] is not info_dict:
2177             # only set the 'formats' fields if the original info_dict list them
2178             # otherwise we end up with a circular reference, the first (and unique)
2179             # element in the 'formats' field in info_dict is info_dict itself,
2180             # which can't be exported to json
2181             info_dict['formats'] = formats
2182
2183         info_dict, _ = self.pre_process(info_dict)
2184
2185         list_only = self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles')
2186         if list_only:
2187             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2188             if self.params.get('list_thumbnails'):
2189                 self.list_thumbnails(info_dict)
2190             if self.params.get('listformats'):
2191                 if not info_dict.get('formats'):
2192                     raise ExtractorError('No video formats found', expected=True)
2193                 self.list_formats(info_dict)
2194             if self.params.get('listsubtitles'):
2195                 if 'automatic_captions' in info_dict:
2196                     self.list_subtitles(
2197                         info_dict['id'], automatic_captions, 'automatic captions')
2198                 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2199             return
2200
2201         format_selector = self.format_selector
2202         if format_selector is None:
2203             req_format = self._default_format_spec(info_dict, download=download)
2204             self.write_debug('Default format spec: %s' % req_format)
2205             format_selector = self.build_format_selector(req_format)
2206
2207         # While in format selection we may need to have an access to the original
2208         # format set in order to calculate some metrics or do some processing.
2209         # For now we need to be able to guess whether original formats provided
2210         # by extractor are incomplete or not (i.e. whether extractor provides only
2211         # video-only or audio-only formats) for proper formats selection for
2212         # extractors with such incomplete formats (see
2213         # https://github.com/ytdl-org/youtube-dl/pull/5556).
2214         # Since formats may be filtered during format selection and may not match
2215         # the original formats the results may be incorrect. Thus original formats
2216         # or pre-calculated metrics should be passed to format selection routines
2217         # as well.
2218         # We will pass a context object containing all necessary additional data
2219         # instead of just formats.
2220         # This fixes incorrect format selection issue (see
2221         # https://github.com/ytdl-org/youtube-dl/issues/10083).
2222         incomplete_formats = (
2223             # All formats are video-only or
2224             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2225             # all formats are audio-only
2226             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2227
2228         ctx = {
2229             'formats': formats,
2230             'incomplete_formats': incomplete_formats,
2231         }
2232
2233         formats_to_download = list(format_selector(ctx))
2234         if not formats_to_download:
2235             if not self.params.get('ignore_no_formats_error'):
2236                 raise ExtractorError('Requested format is not available', expected=True)
2237             else:
2238                 self.report_warning('Requested format is not available')
2239                 # Process what we can, even without any available formats.
2240                 self.process_info(dict(info_dict))
2241         elif download:
2242             self.to_screen(
2243                 '[info] %s: Downloading %d format(s): %s' % (
2244                     info_dict['id'], len(formats_to_download),
2245                     ", ".join([f['format_id'] for f in formats_to_download])))
2246             for fmt in formats_to_download:
2247                 new_info = dict(info_dict)
2248                 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2249                 new_info['__original_infodict'] = info_dict
2250                 new_info.update(fmt)
2251                 self.process_info(new_info)
2252         # We update the info dict with the best quality format (backwards compatibility)
2253         if formats_to_download:
2254             info_dict.update(formats_to_download[-1])
2255         return info_dict
2256
2257     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2258         """Select the requested subtitles and their format"""
2259         available_subs = {}
2260         if normal_subtitles and self.params.get('writesubtitles'):
2261             available_subs.update(normal_subtitles)
2262         if automatic_captions and self.params.get('writeautomaticsub'):
2263             for lang, cap_info in automatic_captions.items():
2264                 if lang not in available_subs:
2265                     available_subs[lang] = cap_info
2266
2267         if (not self.params.get('writesubtitles') and not
2268                 self.params.get('writeautomaticsub') or not
2269                 available_subs):
2270             return None
2271
2272         all_sub_langs = available_subs.keys()
2273         if self.params.get('allsubtitles', False):
2274             requested_langs = all_sub_langs
2275         elif self.params.get('subtitleslangs', False):
2276             requested_langs = set()
2277             for lang in self.params.get('subtitleslangs'):
2278                 if lang == 'all':
2279                     requested_langs.update(all_sub_langs)
2280                     continue
2281                 discard = lang[0] == '-'
2282                 if discard:
2283                     lang = lang[1:]
2284                 current_langs = filter(re.compile(lang + '$').match, all_sub_langs)
2285                 if discard:
2286                     for lang in current_langs:
2287                         requested_langs.discard(lang)
2288                 else:
2289                     requested_langs.update(current_langs)
2290         elif 'en' in available_subs:
2291             requested_langs = ['en']
2292         else:
2293             requested_langs = [list(all_sub_langs)[0]]
2294         self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2295
2296         formats_query = self.params.get('subtitlesformat', 'best')
2297         formats_preference = formats_query.split('/') if formats_query else []
2298         subs = {}
2299         for lang in requested_langs:
2300             formats = available_subs.get(lang)
2301             if formats is None:
2302                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2303                 continue
2304             for ext in formats_preference:
2305                 if ext == 'best':
2306                     f = formats[-1]
2307                     break
2308                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2309                 if matches:
2310                     f = matches[-1]
2311                     break
2312             else:
2313                 f = formats[-1]
2314                 self.report_warning(
2315                     'No subtitle format found matching "%s" for language %s, '
2316                     'using %s' % (formats_query, lang, f['ext']))
2317             subs[lang] = f
2318         return subs
2319
2320     def __forced_printings(self, info_dict, filename, incomplete):
2321         def print_mandatory(field, actual_field=None):
2322             if actual_field is None:
2323                 actual_field = field
2324             if (self.params.get('force%s' % field, False)
2325                     and (not incomplete or info_dict.get(actual_field) is not None)):
2326                 self.to_stdout(info_dict[actual_field])
2327
2328         def print_optional(field):
2329             if (self.params.get('force%s' % field, False)
2330                     and info_dict.get(field) is not None):
2331                 self.to_stdout(info_dict[field])
2332
2333         info_dict = info_dict.copy()
2334         if filename is not None:
2335             info_dict['filename'] = filename
2336         if info_dict.get('requested_formats') is not None:
2337             # For RTMP URLs, also include the playpath
2338             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2339         elif 'url' in info_dict:
2340             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2341
2342         for tmpl in self.params.get('forceprint', []):
2343             if re.match(r'\w+$', tmpl):
2344                 tmpl = '%({})s'.format(tmpl)
2345             tmpl, info_copy = self.prepare_outtmpl(tmpl, info_dict)
2346             self.to_stdout(tmpl % info_copy)
2347
2348         print_mandatory('title')
2349         print_mandatory('id')
2350         print_mandatory('url', 'urls')
2351         print_optional('thumbnail')
2352         print_optional('description')
2353         print_optional('filename')
2354         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
2355             self.to_stdout(formatSeconds(info_dict['duration']))
2356         print_mandatory('format')
2357
2358         if self.params.get('forcejson', False):
2359             self.post_extract(info_dict)
2360             self.to_stdout(json.dumps(info_dict, default=repr))
2361
2362     def dl(self, name, info, subtitle=False, test=False):
2363
2364         if test:
2365             verbose = self.params.get('verbose')
2366             params = {
2367                 'test': True,
2368                 'quiet': not verbose,
2369                 'verbose': verbose,
2370                 'noprogress': not verbose,
2371                 'nopart': True,
2372                 'skip_unavailable_fragments': False,
2373                 'keep_fragments': False,
2374                 'overwrites': True,
2375                 '_no_ytdl_file': True,
2376             }
2377         else:
2378             params = self.params
2379         fd = get_suitable_downloader(info, params)(self, params)
2380         if not test:
2381             for ph in self._progress_hooks:
2382                 fd.add_progress_hook(ph)
2383             urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2384             self.write_debug('Invoking downloader on "%s"' % urls)
2385         new_info = dict(info)
2386         if new_info.get('http_headers') is None:
2387             new_info['http_headers'] = self._calc_headers(new_info)
2388         return fd.download(name, new_info, subtitle)
2389
2390     def process_info(self, info_dict):
2391         """Process a single resolved IE result."""
2392
2393         assert info_dict.get('_type', 'video') == 'video'
2394
2395         info_dict.setdefault('__postprocessors', [])
2396
2397         max_downloads = self.params.get('max_downloads')
2398         if max_downloads is not None:
2399             if self._num_downloads >= int(max_downloads):
2400                 raise MaxDownloadsReached()
2401
2402         # TODO: backward compatibility, to be removed
2403         info_dict['fulltitle'] = info_dict['title']
2404
2405         if 'format' not in info_dict and 'ext' in info_dict:
2406             info_dict['format'] = info_dict['ext']
2407
2408         if self._match_entry(info_dict) is not None:
2409             return
2410
2411         self.post_extract(info_dict)
2412         self._num_downloads += 1
2413
2414         # info_dict['_filename'] needs to be set for backward compatibility
2415         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2416         temp_filename = self.prepare_filename(info_dict, 'temp')
2417         files_to_move = {}
2418
2419         # Forced printings
2420         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2421
2422         if self.params.get('simulate', False):
2423             if self.params.get('force_write_download_archive', False):
2424                 self.record_download_archive(info_dict)
2425
2426             # Do nothing else if in simulate mode
2427             return
2428
2429         if full_filename is None:
2430             return
2431
2432         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2433             return
2434         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2435             return
2436
2437         if self.params.get('writedescription', False):
2438             descfn = self.prepare_filename(info_dict, 'description')
2439             if not self._ensure_dir_exists(encodeFilename(descfn)):
2440                 return
2441             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
2442                 self.to_screen('[info] Video description is already present')
2443             elif info_dict.get('description') is None:
2444                 self.report_warning('There\'s no description to write.')
2445             else:
2446                 try:
2447                     self.to_screen('[info] Writing video description to: ' + descfn)
2448                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
2449                         descfile.write(info_dict['description'])
2450                 except (OSError, IOError):
2451                     self.report_error('Cannot write description file ' + descfn)
2452                     return
2453
2454         if self.params.get('writeannotations', False):
2455             annofn = self.prepare_filename(info_dict, 'annotation')
2456             if not self._ensure_dir_exists(encodeFilename(annofn)):
2457                 return
2458             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2459                 self.to_screen('[info] Video annotations are already present')
2460             elif not info_dict.get('annotations'):
2461                 self.report_warning('There are no annotations to write.')
2462             else:
2463                 try:
2464                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2465                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2466                         annofile.write(info_dict['annotations'])
2467                 except (KeyError, TypeError):
2468                     self.report_warning('There are no annotations to write.')
2469                 except (OSError, IOError):
2470                     self.report_error('Cannot write annotations file: ' + annofn)
2471                     return
2472
2473         subtitles_are_requested = any([self.params.get('writesubtitles', False),
2474                                        self.params.get('writeautomaticsub')])
2475
2476         if subtitles_are_requested and info_dict.get('requested_subtitles'):
2477             # subtitles download errors are already managed as troubles in relevant IE
2478             # that way it will silently go on when used with unsupporting IE
2479             subtitles = info_dict['requested_subtitles']
2480             # ie = self.get_info_extractor(info_dict['extractor_key'])
2481             for sub_lang, sub_info in subtitles.items():
2482                 sub_format = sub_info['ext']
2483                 sub_filename = subtitles_filename(temp_filename, sub_lang, sub_format, info_dict.get('ext'))
2484                 sub_filename_final = subtitles_filename(
2485                     self.prepare_filename(info_dict, 'subtitle'), sub_lang, sub_format, info_dict.get('ext'))
2486                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
2487                     self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
2488                     sub_info['filepath'] = sub_filename
2489                     files_to_move[sub_filename] = sub_filename_final
2490                 else:
2491                     self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
2492                     if sub_info.get('data') is not None:
2493                         try:
2494                             # Use newline='' to prevent conversion of newline characters
2495                             # See https://github.com/ytdl-org/youtube-dl/issues/10268
2496                             with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
2497                                 subfile.write(sub_info['data'])
2498                             sub_info['filepath'] = sub_filename
2499                             files_to_move[sub_filename] = sub_filename_final
2500                         except (OSError, IOError):
2501                             self.report_error('Cannot write subtitles file ' + sub_filename)
2502                             return
2503                     else:
2504                         try:
2505                             self.dl(sub_filename, sub_info.copy(), subtitle=True)
2506                             sub_info['filepath'] = sub_filename
2507                             files_to_move[sub_filename] = sub_filename_final
2508                         except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
2509                             self.report_warning('Unable to download subtitle for "%s": %s' %
2510                                                 (sub_lang, error_to_compat_str(err)))
2511                             continue
2512
2513         if self.params.get('writeinfojson', False):
2514             infofn = self.prepare_filename(info_dict, 'infojson')
2515             if not self._ensure_dir_exists(encodeFilename(infofn)):
2516                 return
2517             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
2518                 self.to_screen('[info] Video metadata is already present')
2519             else:
2520                 self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
2521                 try:
2522                     write_json_file(self.filter_requested_info(info_dict, self.params.get('clean_infojson', True)), infofn)
2523                 except (OSError, IOError):
2524                     self.report_error('Cannot write video metadata to JSON file ' + infofn)
2525                     return
2526             info_dict['__infojson_filename'] = infofn
2527
2528         for thumb_ext in self._write_thumbnails(info_dict, temp_filename):
2529             thumb_filename_temp = replace_extension(temp_filename, thumb_ext, info_dict.get('ext'))
2530             thumb_filename = replace_extension(
2531                 self.prepare_filename(info_dict, 'thumbnail'), thumb_ext, info_dict.get('ext'))
2532             files_to_move[thumb_filename_temp] = thumb_filename
2533
2534         # Write internet shortcut files
2535         url_link = webloc_link = desktop_link = False
2536         if self.params.get('writelink', False):
2537             if sys.platform == "darwin":  # macOS.
2538                 webloc_link = True
2539             elif sys.platform.startswith("linux"):
2540                 desktop_link = True
2541             else:  # if sys.platform in ['win32', 'cygwin']:
2542                 url_link = True
2543         if self.params.get('writeurllink', False):
2544             url_link = True
2545         if self.params.get('writewebloclink', False):
2546             webloc_link = True
2547         if self.params.get('writedesktoplink', False):
2548             desktop_link = True
2549
2550         if url_link or webloc_link or desktop_link:
2551             if 'webpage_url' not in info_dict:
2552                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2553                 return
2554             ascii_url = iri_to_uri(info_dict['webpage_url'])
2555
2556         def _write_link_file(extension, template, newline, embed_filename):
2557             linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
2558             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2559                 self.to_screen('[info] Internet shortcut is already present')
2560             else:
2561                 try:
2562                     self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2563                     with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2564                         template_vars = {'url': ascii_url}
2565                         if embed_filename:
2566                             template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2567                         linkfile.write(template % template_vars)
2568                 except (OSError, IOError):
2569                     self.report_error('Cannot write internet shortcut ' + linkfn)
2570                     return False
2571             return True
2572
2573         if url_link:
2574             if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2575                 return
2576         if webloc_link:
2577             if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2578                 return
2579         if desktop_link:
2580             if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2581                 return
2582
2583         try:
2584             info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2585         except PostProcessingError as err:
2586             self.report_error('Preprocessing: %s' % str(err))
2587             return
2588
2589         must_record_download_archive = False
2590         if self.params.get('skip_download', False):
2591             info_dict['filepath'] = temp_filename
2592             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2593             info_dict['__files_to_move'] = files_to_move
2594             info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2595         else:
2596             # Download
2597             try:
2598
2599                 def existing_file(*filepaths):
2600                     ext = info_dict.get('ext')
2601                     final_ext = self.params.get('final_ext', ext)
2602                     existing_files = []
2603                     for file in orderedSet(filepaths):
2604                         if final_ext != ext:
2605                             converted = replace_extension(file, final_ext, ext)
2606                             if os.path.exists(encodeFilename(converted)):
2607                                 existing_files.append(converted)
2608                         if os.path.exists(encodeFilename(file)):
2609                             existing_files.append(file)
2610
2611                     if not existing_files or self.params.get('overwrites', False):
2612                         for file in orderedSet(existing_files):
2613                             self.report_file_delete(file)
2614                             os.remove(encodeFilename(file))
2615                         return None
2616
2617                     self.report_file_already_downloaded(existing_files[0])
2618                     info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2619                     return existing_files[0]
2620
2621                 success = True
2622                 if info_dict.get('requested_formats') is not None:
2623
2624                     def compatible_formats(formats):
2625                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2626                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2627                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2628                         if len(video_formats) > 2 or len(audio_formats) > 2:
2629                             return False
2630
2631                         # Check extension
2632                         exts = set(format.get('ext') for format in formats)
2633                         COMPATIBLE_EXTS = (
2634                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2635                             set(('webm',)),
2636                         )
2637                         for ext_sets in COMPATIBLE_EXTS:
2638                             if ext_sets.issuperset(exts):
2639                                 return True
2640                         # TODO: Check acodec/vcodec
2641                         return False
2642
2643                     requested_formats = info_dict['requested_formats']
2644                     old_ext = info_dict['ext']
2645                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
2646                         info_dict['ext'] = 'mkv'
2647                         self.report_warning(
2648                             'Requested formats are incompatible for merge and will be merged into mkv.')
2649
2650                     def correct_ext(filename):
2651                         filename_real_ext = os.path.splitext(filename)[1][1:]
2652                         filename_wo_ext = (
2653                             os.path.splitext(filename)[0]
2654                             if filename_real_ext == old_ext
2655                             else filename)
2656                         return '%s.%s' % (filename_wo_ext, info_dict['ext'])
2657
2658                     # Ensure filename always has a correct extension for successful merge
2659                     full_filename = correct_ext(full_filename)
2660                     temp_filename = correct_ext(temp_filename)
2661                     dl_filename = existing_file(full_filename, temp_filename)
2662                     info_dict['__real_download'] = False
2663
2664                     _protocols = set(determine_protocol(f) for f in requested_formats)
2665                     if len(_protocols) == 1:
2666                         info_dict['protocol'] = _protocols.pop()
2667                     directly_mergable = (
2668                         'no-direct-merge' not in self.params.get('compat_opts', [])
2669                         and info_dict.get('protocol') is not None  # All requested formats have same protocol
2670                         and not self.params.get('allow_unplayable_formats')
2671                         and get_suitable_downloader(info_dict, self.params).__name__ == 'FFmpegFD')
2672                     if directly_mergable:
2673                         info_dict['url'] = requested_formats[0]['url']
2674                         # Treat it as a single download
2675                         dl_filename = existing_file(full_filename, temp_filename)
2676                         if dl_filename is None:
2677                             success, real_download = self.dl(temp_filename, info_dict)
2678                             info_dict['__real_download'] = real_download
2679                     else:
2680                         downloaded = []
2681                         merger = FFmpegMergerPP(self)
2682                         if self.params.get('allow_unplayable_formats'):
2683                             self.report_warning(
2684                                 'You have requested merging of multiple formats '
2685                                 'while also allowing unplayable formats to be downloaded. '
2686                                 'The formats won\'t be merged to prevent data corruption.')
2687                         elif not merger.available:
2688                             self.report_warning(
2689                                 'You have requested merging of multiple formats but ffmpeg is not installed. '
2690                                 'The formats won\'t be merged.')
2691
2692                         if dl_filename is None:
2693                             for f in requested_formats:
2694                                 new_info = dict(info_dict)
2695                                 del new_info['requested_formats']
2696                                 new_info.update(f)
2697                                 fname = prepend_extension(
2698                                     self.prepare_filename(new_info, 'temp'),
2699                                     'f%s' % f['format_id'], new_info['ext'])
2700                                 if not self._ensure_dir_exists(fname):
2701                                     return
2702                                 downloaded.append(fname)
2703                                 partial_success, real_download = self.dl(fname, new_info)
2704                                 info_dict['__real_download'] = info_dict['__real_download'] or real_download
2705                                 success = success and partial_success
2706                             if merger.available and not self.params.get('allow_unplayable_formats'):
2707                                 info_dict['__postprocessors'].append(merger)
2708                                 info_dict['__files_to_merge'] = downloaded
2709                                 # Even if there were no downloads, it is being merged only now
2710                                 info_dict['__real_download'] = True
2711                             else:
2712                                 for file in downloaded:
2713                                     files_to_move[file] = None
2714                 else:
2715                     # Just a single file
2716                     dl_filename = existing_file(full_filename, temp_filename)
2717                     if dl_filename is None:
2718                         success, real_download = self.dl(temp_filename, info_dict)
2719                         info_dict['__real_download'] = real_download
2720
2721                 dl_filename = dl_filename or temp_filename
2722                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2723
2724             except network_exceptions as err:
2725                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2726                 return
2727             except (OSError, IOError) as err:
2728                 raise UnavailableVideoError(err)
2729             except (ContentTooShortError, ) as err:
2730                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2731                 return
2732
2733             if success and full_filename != '-':
2734
2735                 def fixup():
2736                     do_fixup = True
2737                     fixup_policy = self.params.get('fixup')
2738                     vid = info_dict['id']
2739
2740                     if fixup_policy in ('ignore', 'never'):
2741                         return
2742                     elif fixup_policy == 'warn':
2743                         do_fixup = False
2744                     elif fixup_policy != 'force':
2745                         assert fixup_policy in ('detect_or_warn', None)
2746                         if not info_dict.get('__real_download'):
2747                             do_fixup = False
2748
2749                     def ffmpeg_fixup(cndn, msg, cls):
2750                         if not cndn:
2751                             return
2752                         if not do_fixup:
2753                             self.report_warning(f'{vid}: {msg}')
2754                             return
2755                         pp = cls(self)
2756                         if pp.available:
2757                             info_dict['__postprocessors'].append(pp)
2758                         else:
2759                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
2760
2761                     stretched_ratio = info_dict.get('stretched_ratio')
2762                     ffmpeg_fixup(
2763                         stretched_ratio not in (1, None),
2764                         f'Non-uniform pixel ratio {stretched_ratio}',
2765                         FFmpegFixupStretchedPP)
2766
2767                     ffmpeg_fixup(
2768                         (info_dict.get('requested_formats') is None
2769                          and info_dict.get('container') == 'm4a_dash'
2770                          and info_dict.get('ext') == 'm4a'),
2771                         'writing DASH m4a. Only some players support this container',
2772                         FFmpegFixupM4aPP)
2773
2774                     downloader = (get_suitable_downloader(info_dict, self.params).__name__
2775                                   if 'protocol' in info_dict else None)
2776                     ffmpeg_fixup(downloader == 'HlsFD', 'malformed AAC bitstream detected', FFmpegFixupM3u8PP)
2777                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP)
2778                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed duration detected', FFmpegFixupDurationPP)
2779
2780                 fixup()
2781                 try:
2782                     info_dict = self.post_process(dl_filename, info_dict, files_to_move)
2783                 except PostProcessingError as err:
2784                     self.report_error('Postprocessing: %s' % str(err))
2785                     return
2786                 try:
2787                     for ph in self._post_hooks:
2788                         ph(info_dict['filepath'])
2789                 except Exception as err:
2790                     self.report_error('post hooks: %s' % str(err))
2791                     return
2792                 must_record_download_archive = True
2793
2794         if must_record_download_archive or self.params.get('force_write_download_archive', False):
2795             self.record_download_archive(info_dict)
2796         max_downloads = self.params.get('max_downloads')
2797         if max_downloads is not None and self._num_downloads >= int(max_downloads):
2798             raise MaxDownloadsReached()
2799
2800     def download(self, url_list):
2801         """Download a given list of URLs."""
2802         outtmpl = self.outtmpl_dict['default']
2803         if (len(url_list) > 1
2804                 and outtmpl != '-'
2805                 and '%' not in outtmpl
2806                 and self.params.get('max_downloads') != 1):
2807             raise SameFileError(outtmpl)
2808
2809         for url in url_list:
2810             try:
2811                 # It also downloads the videos
2812                 res = self.extract_info(
2813                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2814             except UnavailableVideoError:
2815                 self.report_error('unable to download video')
2816             except MaxDownloadsReached:
2817                 self.to_screen('[info] Maximum number of downloaded files reached')
2818                 raise
2819             except ExistingVideoReached:
2820                 self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
2821                 raise
2822             except RejectedVideoReached:
2823                 self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
2824                 raise
2825             else:
2826                 if self.params.get('dump_single_json', False):
2827                     self.post_extract(res)
2828                     self.to_stdout(json.dumps(res, default=repr))
2829
2830         return self._download_retcode
2831
2832     def download_with_info_file(self, info_filename):
2833         with contextlib.closing(fileinput.FileInput(
2834                 [info_filename], mode='r',
2835                 openhook=fileinput.hook_encoded('utf-8'))) as f:
2836             # FileInput doesn't have a read method, we can't call json.load
2837             info = self.filter_requested_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
2838         try:
2839             self.process_ie_result(info, download=True)
2840         except (DownloadError, EntryNotInPlaylist, ThrottledDownload):
2841             webpage_url = info.get('webpage_url')
2842             if webpage_url is not None:
2843                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2844                 return self.download([webpage_url])
2845             else:
2846                 raise
2847         return self._download_retcode
2848
2849     @staticmethod
2850     def filter_requested_info(info_dict, actually_filter=True):
2851         remove_keys = ['__original_infodict']  # Always remove this since this may contain a copy of the entire dict
2852         keep_keys = ['_type'],  # Always keep this to facilitate load-info-json
2853         if actually_filter:
2854             remove_keys += ('requested_formats', 'requested_subtitles', 'requested_entries', 'filepath', 'entries', 'original_url')
2855             empty_values = (None, {}, [], set(), tuple())
2856             reject = lambda k, v: k not in keep_keys and (
2857                 k.startswith('_') or k in remove_keys or v in empty_values)
2858         else:
2859             info_dict['epoch'] = int(time.time())
2860             reject = lambda k, v: k in remove_keys
2861         filter_fn = lambda obj: (
2862             list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))
2863             else obj if not isinstance(obj, dict)
2864             else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
2865         return filter_fn(info_dict)
2866
2867     def run_pp(self, pp, infodict):
2868         files_to_delete = []
2869         if '__files_to_move' not in infodict:
2870             infodict['__files_to_move'] = {}
2871         files_to_delete, infodict = pp.run(infodict)
2872         if not files_to_delete:
2873             return infodict
2874
2875         if self.params.get('keepvideo', False):
2876             for f in files_to_delete:
2877                 infodict['__files_to_move'].setdefault(f, '')
2878         else:
2879             for old_filename in set(files_to_delete):
2880                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2881                 try:
2882                     os.remove(encodeFilename(old_filename))
2883                 except (IOError, OSError):
2884                     self.report_warning('Unable to remove downloaded original file')
2885                 if old_filename in infodict['__files_to_move']:
2886                     del infodict['__files_to_move'][old_filename]
2887         return infodict
2888
2889     @staticmethod
2890     def post_extract(info_dict):
2891         def actual_post_extract(info_dict):
2892             if info_dict.get('_type') in ('playlist', 'multi_video'):
2893                 for video_dict in info_dict.get('entries', {}):
2894                     actual_post_extract(video_dict or {})
2895                 return
2896
2897             post_extractor = info_dict.get('__post_extractor') or (lambda: {})
2898             extra = post_extractor().items()
2899             info_dict.update(extra)
2900             info_dict.pop('__post_extractor', None)
2901
2902             original_infodict = info_dict.get('__original_infodict') or {}
2903             original_infodict.update(extra)
2904             original_infodict.pop('__post_extractor', None)
2905
2906         actual_post_extract(info_dict or {})
2907
2908     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
2909         info = dict(ie_info)
2910         info['__files_to_move'] = files_to_move or {}
2911         for pp in self._pps[key]:
2912             info = self.run_pp(pp, info)
2913         return info, info.pop('__files_to_move', None)
2914
2915     def post_process(self, filename, ie_info, files_to_move=None):
2916         """Run all the postprocessors on the given file."""
2917         info = dict(ie_info)
2918         info['filepath'] = filename
2919         info['__files_to_move'] = files_to_move or {}
2920
2921         for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
2922             info = self.run_pp(pp, info)
2923         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
2924         del info['__files_to_move']
2925         for pp in self._pps['after_move']:
2926             info = self.run_pp(pp, info)
2927         return info
2928
2929     def _make_archive_id(self, info_dict):
2930         video_id = info_dict.get('id')
2931         if not video_id:
2932             return
2933         # Future-proof against any change in case
2934         # and backwards compatibility with prior versions
2935         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
2936         if extractor is None:
2937             url = str_or_none(info_dict.get('url'))
2938             if not url:
2939                 return
2940             # Try to find matching extractor for the URL and take its ie_key
2941             for ie in self._ies:
2942                 if ie.suitable(url):
2943                     extractor = ie.ie_key()
2944                     break
2945             else:
2946                 return
2947         return '%s %s' % (extractor.lower(), video_id)
2948
2949     def in_download_archive(self, info_dict):
2950         fn = self.params.get('download_archive')
2951         if fn is None:
2952             return False
2953
2954         vid_id = self._make_archive_id(info_dict)
2955         if not vid_id:
2956             return False  # Incomplete video information
2957
2958         return vid_id in self.archive
2959
2960     def record_download_archive(self, info_dict):
2961         fn = self.params.get('download_archive')
2962         if fn is None:
2963             return
2964         vid_id = self._make_archive_id(info_dict)
2965         assert vid_id
2966         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
2967             archive_file.write(vid_id + '\n')
2968         self.archive.add(vid_id)
2969
2970     @staticmethod
2971     def format_resolution(format, default='unknown'):
2972         if format.get('vcodec') == 'none':
2973             if format.get('acodec') == 'none':
2974                 return 'images'
2975             return 'audio only'
2976         if format.get('resolution') is not None:
2977             return format['resolution']
2978         if format.get('width') and format.get('height'):
2979             res = '%dx%d' % (format['width'], format['height'])
2980         elif format.get('height'):
2981             res = '%sp' % format['height']
2982         elif format.get('width'):
2983             res = '%dx?' % format['width']
2984         else:
2985             res = default
2986         return res
2987
2988     def _format_note(self, fdict):
2989         res = ''
2990         if fdict.get('ext') in ['f4f', 'f4m']:
2991             res += '(unsupported) '
2992         if fdict.get('language'):
2993             if res:
2994                 res += ' '
2995             res += '[%s] ' % fdict['language']
2996         if fdict.get('format_note') is not None:
2997             res += fdict['format_note'] + ' '
2998         if fdict.get('tbr') is not None:
2999             res += '%4dk ' % fdict['tbr']
3000         if fdict.get('container') is not None:
3001             if res:
3002                 res += ', '
3003             res += '%s container' % fdict['container']
3004         if (fdict.get('vcodec') is not None
3005                 and fdict.get('vcodec') != 'none'):
3006             if res:
3007                 res += ', '
3008             res += fdict['vcodec']
3009             if fdict.get('vbr') is not None:
3010                 res += '@'
3011         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3012             res += 'video@'
3013         if fdict.get('vbr') is not None:
3014             res += '%4dk' % fdict['vbr']
3015         if fdict.get('fps') is not None:
3016             if res:
3017                 res += ', '
3018             res += '%sfps' % fdict['fps']
3019         if fdict.get('acodec') is not None:
3020             if res:
3021                 res += ', '
3022             if fdict['acodec'] == 'none':
3023                 res += 'video only'
3024             else:
3025                 res += '%-5s' % fdict['acodec']
3026         elif fdict.get('abr') is not None:
3027             if res:
3028                 res += ', '
3029             res += 'audio'
3030         if fdict.get('abr') is not None:
3031             res += '@%3dk' % fdict['abr']
3032         if fdict.get('asr') is not None:
3033             res += ' (%5dHz)' % fdict['asr']
3034         if fdict.get('filesize') is not None:
3035             if res:
3036                 res += ', '
3037             res += format_bytes(fdict['filesize'])
3038         elif fdict.get('filesize_approx') is not None:
3039             if res:
3040                 res += ', '
3041             res += '~' + format_bytes(fdict['filesize_approx'])
3042         return res
3043
3044     def list_formats(self, info_dict):
3045         formats = info_dict.get('formats', [info_dict])
3046         new_format = (
3047             'list-formats' not in self.params.get('compat_opts', [])
3048             and self.params.get('listformats_table', True) is not False)
3049         if new_format:
3050             table = [
3051                 [
3052                     format_field(f, 'format_id'),
3053                     format_field(f, 'ext'),
3054                     self.format_resolution(f),
3055                     format_field(f, 'fps', '%d'),
3056                     '|',
3057                     format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
3058                     format_field(f, 'tbr', '%4dk'),
3059                     shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
3060                     '|',
3061                     format_field(f, 'vcodec', default='unknown').replace('none', ''),
3062                     format_field(f, 'vbr', '%4dk'),
3063                     format_field(f, 'acodec', default='unknown').replace('none', ''),
3064                     format_field(f, 'abr', '%3dk'),
3065                     format_field(f, 'asr', '%5dHz'),
3066                     ', '.join(filter(None, (
3067                         'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
3068                         format_field(f, 'language', '[%s]'),
3069                         format_field(f, 'format_note'),
3070                         format_field(f, 'container', ignore=(None, f.get('ext'))),
3071                         format_field(f, 'asr', '%5dHz')))),
3072                 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3073             header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', '  TBR', 'PROTO',
3074                            '|', 'VCODEC', '  VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO']
3075         else:
3076             table = [
3077                 [
3078                     format_field(f, 'format_id'),
3079                     format_field(f, 'ext'),
3080                     self.format_resolution(f),
3081                     self._format_note(f)]
3082                 for f in formats
3083                 if f.get('preference') is None or f['preference'] >= -1000]
3084             header_line = ['format code', 'extension', 'resolution', 'note']
3085
3086         self.to_screen(
3087             '[info] Available formats for %s:' % info_dict['id'])
3088         self.to_stdout(render_table(
3089             header_line, table, delim=new_format, extraGap=(0 if new_format else 1), hideEmpty=new_format))
3090
3091     def list_thumbnails(self, info_dict):
3092         thumbnails = list(info_dict.get('thumbnails'))
3093         if not thumbnails:
3094             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3095             return
3096
3097         self.to_screen(
3098             '[info] Thumbnails for %s:' % info_dict['id'])
3099         self.to_stdout(render_table(
3100             ['ID', 'width', 'height', 'URL'],
3101             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
3102
3103     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3104         if not subtitles:
3105             self.to_screen('%s has no %s' % (video_id, name))
3106             return
3107         self.to_screen(
3108             'Available %s for %s:' % (name, video_id))
3109
3110         def _row(lang, formats):
3111             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3112             if len(set(names)) == 1:
3113                 names = [] if names[0] == 'unknown' else names[:1]
3114             return [lang, ', '.join(names), ', '.join(exts)]
3115
3116         self.to_stdout(render_table(
3117             ['Language', 'Name', 'Formats'],
3118             [_row(lang, formats) for lang, formats in subtitles.items()],
3119             hideEmpty=True))
3120
3121     def urlopen(self, req):
3122         """ Start an HTTP download """
3123         if isinstance(req, compat_basestring):
3124             req = sanitized_Request(req)
3125         return self._opener.open(req, timeout=self._socket_timeout)
3126
3127     def print_debug_header(self):
3128         if not self.params.get('verbose'):
3129             return
3130
3131         if type('') is not compat_str:
3132             # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
3133             self.report_warning(
3134                 'Your Python is broken! Update to a newer and supported version')
3135
3136         stdout_encoding = getattr(
3137             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
3138         encoding_str = (
3139             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
3140                 locale.getpreferredencoding(),
3141                 sys.getfilesystemencoding(),
3142                 stdout_encoding,
3143                 self.get_encoding()))
3144         write_string(encoding_str, encoding=None)
3145
3146         source = (
3147             '(exe)' if hasattr(sys, 'frozen')
3148             else '(zip)' if isinstance(globals().get('__loader__'), zipimporter)
3149             else '(source)' if os.path.basename(sys.argv[0]) == '__main__.py'
3150             else '')
3151         self._write_string('[debug] yt-dlp version %s %s\n' % (__version__, source))
3152         if _LAZY_LOADER:
3153             self._write_string('[debug] Lazy loading extractors enabled\n')
3154         if _PLUGIN_CLASSES:
3155             self._write_string(
3156                 '[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])
3157         if self.params.get('compat_opts'):
3158             self._write_string(
3159                 '[debug] Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts')))
3160         try:
3161             sp = subprocess.Popen(
3162                 ['git', 'rev-parse', '--short', 'HEAD'],
3163                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3164                 cwd=os.path.dirname(os.path.abspath(__file__)))
3165             out, err = process_communicate_or_kill(sp)
3166             out = out.decode().strip()
3167             if re.match('[0-9a-f]+', out):
3168                 self._write_string('[debug] Git HEAD: %s\n' % out)
3169         except Exception:
3170             try:
3171                 sys.exc_clear()
3172             except Exception:
3173                 pass
3174
3175         def python_implementation():
3176             impl_name = platform.python_implementation()
3177             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3178                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3179             return impl_name
3180
3181         self._write_string('[debug] Python version %s (%s %s) - %s\n' % (
3182             platform.python_version(),
3183             python_implementation(),
3184             platform.architecture()[0],
3185             platform_name()))
3186
3187         exe_versions = FFmpegPostProcessor.get_versions(self)
3188         exe_versions['rtmpdump'] = rtmpdump_version()
3189         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3190         exe_str = ', '.join(
3191             '%s %s' % (exe, v)
3192             for exe, v in sorted(exe_versions.items())
3193             if v
3194         )
3195         if not exe_str:
3196             exe_str = 'none'
3197         self._write_string('[debug] exe versions: %s\n' % exe_str)
3198
3199         proxy_map = {}
3200         for handler in self._opener.handlers:
3201             if hasattr(handler, 'proxies'):
3202                 proxy_map.update(handler.proxies)
3203         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
3204
3205         if self.params.get('call_home', False):
3206             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3207             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
3208             return
3209             latest_version = self.urlopen(
3210                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3211             if version_tuple(latest_version) > version_tuple(__version__):
3212                 self.report_warning(
3213                     'You are using an outdated version (newest version: %s)! '
3214                     'See https://yt-dl.org/update if you need help updating.' %
3215                     latest_version)
3216
3217     def _setup_opener(self):
3218         timeout_val = self.params.get('socket_timeout')
3219         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
3220
3221         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3222         opts_cookiefile = self.params.get('cookiefile')
3223         opts_proxy = self.params.get('proxy')
3224
3225         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3226
3227         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3228         if opts_proxy is not None:
3229             if opts_proxy == '':
3230                 proxies = {}
3231             else:
3232                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3233         else:
3234             proxies = compat_urllib_request.getproxies()
3235             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3236             if 'http' in proxies and 'https' not in proxies:
3237                 proxies['https'] = proxies['http']
3238         proxy_handler = PerRequestProxyHandler(proxies)
3239
3240         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3241         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3242         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3243         redirect_handler = YoutubeDLRedirectHandler()
3244         data_handler = compat_urllib_request_DataHandler()
3245
3246         # When passing our own FileHandler instance, build_opener won't add the
3247         # default FileHandler and allows us to disable the file protocol, which
3248         # can be used for malicious purposes (see
3249         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3250         file_handler = compat_urllib_request.FileHandler()
3251
3252         def file_open(*args, **kwargs):
3253             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3254         file_handler.file_open = file_open
3255
3256         opener = compat_urllib_request.build_opener(
3257             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3258
3259         # Delete the default user-agent header, which would otherwise apply in
3260         # cases where our custom HTTP handler doesn't come into play
3261         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3262         opener.addheaders = []
3263         self._opener = opener
3264
3265     def encode(self, s):
3266         if isinstance(s, bytes):
3267             return s  # Already encoded
3268
3269         try:
3270             return s.encode(self.get_encoding())
3271         except UnicodeEncodeError as err:
3272             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3273             raise
3274
3275     def get_encoding(self):
3276         encoding = self.params.get('encoding')
3277         if encoding is None:
3278             encoding = preferredencoding()
3279         return encoding
3280
3281     def _write_thumbnails(self, info_dict, filename):  # return the extensions
3282         write_all = self.params.get('write_all_thumbnails', False)
3283         thumbnails = []
3284         if write_all or self.params.get('writethumbnail', False):
3285             thumbnails = info_dict.get('thumbnails') or []
3286         multiple = write_all and len(thumbnails) > 1
3287
3288         ret = []
3289         for t in thumbnails[::-1]:
3290             thumb_ext = determine_ext(t['url'], 'jpg')
3291             suffix = '%s.' % t['id'] if multiple else ''
3292             thumb_display_id = '%s ' % t['id'] if multiple else ''
3293             thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
3294
3295             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
3296                 ret.append(suffix + thumb_ext)
3297                 t['filepath'] = thumb_filename
3298                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
3299                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
3300             else:
3301                 self.to_screen('[%s] %s: Downloading thumbnail %s ...' %
3302                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
3303                 try:
3304                     uf = self.urlopen(t['url'])
3305                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3306                         shutil.copyfileobj(uf, thumbf)
3307                     ret.append(suffix + thumb_ext)
3308                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
3309                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
3310                     t['filepath'] = thumb_filename
3311                 except network_exceptions as err:
3312                     self.report_warning('Unable to download thumbnail "%s": %s' %
3313                                         (t['url'], error_to_compat_str(err)))
3314             if ret and not write_all:
3315                 break
3316         return ret